00001
00002
00003
00004
00005
00006
00007 #include <vcl_iostream.h>
00008 #include <vcl_iterator.h>
00009 #include <vcl_fstream.h>
00010 #include <vcl_string.h>
00011 #include <vcl_exception.h>
00012 #include <vcl_map.h>
00013 #include <vcl_typeinfo.h>
00014 #include <vul/vul_arg.h>
00015 #include <vul/vul_sprintf.h>
00016 #include <mbl/mbl_log.h>
00017 #include <mbl/mbl_exception.h>
00018 #include <mbl/mbl_sample_stats_1d.h>
00019
00020
00021
00022
00023
00024 static mbl_logger& logger()
00025 {
00026 static mbl_logger l("mul.mbl.tools.sample_stats");
00027 return l;
00028 }
00029
00030
00031
00032
00033
00034 static void do_error(const vcl_string& msg)
00035 {
00036 MBL_LOG(ERR, logger(), msg);
00037 throw mbl_exception_abort(msg);
00038 }
00039
00040
00041
00042
00043
00044 int main2(int argc, char *argv[])
00045 {
00046
00047 vul_arg<vcl_string> in_file("-i", "input file containing scalar values (whitespace-separated); otherwise uses stdin", "");
00048 vul_arg<vcl_string> out_file("-o", "output file to append statistics; logger will be used otherwise", "");
00049 vul_arg<vcl_string> sep("-sep", "String to use as a separator between output values, e.g. \", \" or \" \" (default=TAB)", "\t");
00050 vul_arg<bool> nohead("-h","Specify this to SUPPRESS column headers", false);
00051 vul_arg<bool> n("-n", "Specify this to record the number of samples", false);
00052 vul_arg<bool> mean("-mean", "Specify this to record the mean", false);
00053 vul_arg<bool> variance("-var", "Specify this to record the variance", false);
00054 vul_arg<bool> sd("-sd", "Specify this to record the standard deviation", false);
00055 vul_arg<bool> se("-se", "Specify this to record the standard error", false);
00056 vul_arg<bool> med("-med", "Specify this to record the median", false);
00057 vul_arg<bool> min("-min", "Specify this to record the minimum", false);
00058 vul_arg<bool> max("-max", "Specify this to record the maximum", false);
00059 vul_arg<vcl_vector<int> > pc("-pc", "Specify this switch with 1 or more comma-separated integers (e.g. 5,50,95) to record percentile(s)");
00060 vul_arg<vcl_vector<double> > quant("-q", "Specify this switch with 1 or more comma-separated floats (e.g. 0.05,0.50,0.95) to record quantile(s)");
00061 vul_arg<bool> sum("-sum", "Specify this to record the sum", false);
00062 vul_arg<bool> sum_squares("-ssq", "Specify this to record the sum of squares", false);
00063 vul_arg<bool> rms("-rms", "Specify this to record the rms", false);
00064 vul_arg<bool> mean_of_absolutes("-moa", "Specify this to record the mean_of_absolutes", false);
00065 vul_arg<bool> skewness("-skew", "Specify this to record the skewness", false);
00066 vul_arg<bool> kurtosis("-kurt", "Specify this to record the kurtosis", false);
00067 vul_arg<bool> absolute("-absolute", "Calculate statistics of absolute sample values", false);
00068 vul_arg<vcl_string> label("-label","Adds this label to each line outputting a statistic - useful for later grep");
00069 vul_arg_parse(argc, argv);
00070
00071
00072 vcl_istream* is=0;
00073 if (!in_file().empty())
00074 {
00075 is = new vcl_ifstream(in_file().c_str());
00076 if (!is || !is->good())
00077 do_error(vcl_string("Failed to open input file ") + in_file().c_str());
00078 MBL_LOG(DEBUG, logger(), "Opened input file: " << in_file().c_str());
00079 }
00080 else
00081 {
00082 is = &vcl_cin;
00083 }
00084
00085
00086 vcl_vector<double> data_vec;
00087 data_vec.assign(vcl_istream_iterator<double>(*is), vcl_istream_iterator<double>());
00088 if (data_vec.empty())
00089 do_error("Could not parse data file.");
00090 MBL_LOG(DEBUG, logger(), "data file contained " << data_vec.size() << " values.");
00091
00092 if (absolute())
00093 {
00094 for (unsigned i=0;i<data_vec.size();++i) data_vec[i]=vcl_abs(data_vec[i]);
00095 }
00096
00097
00098
00099 if (!in_file().empty()) (dynamic_cast<vcl_ifstream*>(is))->close();
00100
00101
00102 mbl_sample_stats_1d data(data_vec);
00103 vcl_map<vcl_string, double> stats;
00104 if (n.set()) stats["n"]=data.n_samples();
00105 if (mean.set()) stats["mean"]=data.mean();
00106 if (variance.set()) stats["var"]=data.variance();
00107 if (sd.set()) stats["sd"]=data.sd();
00108 if (se.set()) stats["se"]=data.stdError();
00109 if (med.set()) stats["med"]=data.median();
00110 if (min.set()) stats["min"]=data.min();
00111 if (max.set()) stats["max"]=data.max();
00112 if (sum.set()) stats["sum"]=data.sum();
00113 if (sum_squares.set()) stats["ssq"]=data.sum_squares();
00114 if (rms.set()) stats["rms"]=data.rms();
00115 if (mean_of_absolutes.set()) stats["moa"]=data.mean_of_absolutes();
00116 if (skewness.set()) stats["skew"]=data.skewness();
00117 if (kurtosis.set()) stats["kurt"]=data.kurtosis();
00118 if (pc.set())
00119 {
00120 for (vcl_vector<int>::const_iterator it=pc().begin(); it!=pc().end(); ++it)
00121 {
00122 vcl_string name = vul_sprintf("pc%02u", *it);
00123 stats[name] = data.nth_percentile(*it);
00124 }
00125 }
00126 if (quant.set())
00127 {
00128 for (vcl_vector<double>::const_iterator it=quant().begin(); it!=quant().end(); ++it)
00129 {
00130 vcl_string name = vul_sprintf("q%f", *it);
00131 stats[name] = data.quantile(*it);
00132 }
00133 }
00134
00135
00136 vcl_ofstream* ofs=0;
00137 if (!out_file().empty())
00138 {
00139 ofs = new vcl_ofstream(out_file().c_str(), vcl_ios::app);
00140 if (!ofs || !ofs->good())
00141 do_error(vcl_string("Failed to open outout file ") + out_file().c_str());
00142 MBL_LOG(DEBUG, logger(), "Opened output file: " << out_file().c_str());
00143 }
00144
00145
00146 MBL_LOG(INFO, logger(), "in_file: " << in_file());
00147
00148
00149 if (!nohead())
00150 {
00151 if (ofs && ofs->good()) *ofs << '#' << sep();
00152 for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00153 {
00154 if (ofs && ofs->good()) *ofs << it->first << sep();
00155 }
00156 if (ofs && ofs->good()) *ofs << '\n';
00157 }
00158 vcl_string my_label = in_file();
00159 if (label.set()) my_label = label();
00160
00161 if (ofs && ofs->good()) *ofs << my_label << sep();
00162 for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00163 {
00164 vcl_cout << my_label << " " << it->first << ": " << it->second << vcl_endl;
00165 if (ofs && ofs->good()) *ofs << it->second << sep();
00166 }
00167 if (ofs && ofs->good()) *ofs << vcl_endl;
00168
00169
00170 if (ofs) ofs->close();
00171
00172 return 0;
00173 }
00174
00175
00176
00177
00178
00179 int main(int argc, char *argv[])
00180 {
00181 int errcode = 0;
00182
00183
00184 mbl_logger::root().load_log_config_file();
00185
00186 try
00187 {
00188 errcode = main2(argc, argv);
00189 }
00190 catch (const vcl_exception & e)
00191 {
00192 vcl_cerr << "ERROR: " << typeid(e).name() << '\n' <<
00193 e.what() << vcl_endl;
00194 errcode = 4;
00195 }
00196
00197 return errcode;
00198 }
00199