contrib/mul/mbl/tools/sample_stats.cxx
Go to the documentation of this file.
00001 //////////////////////////////////////////////////////////////////////////
00002 //
00003 // Calculate various statistics on a sample of 1D data.
00004 //
00005 //////////////////////////////////////////////////////////////////////////
00006 
00007 #include <vcl_iostream.h>
00008 #include <vcl_iterator.h>
00009 #include <vcl_fstream.h>
00010 #include <vcl_string.h>
00011 #include <vcl_exception.h>
00012 #include <vcl_map.h>
00013 #include <vcl_typeinfo.h>
00014 #include <vul/vul_arg.h>
00015 #include <vul/vul_sprintf.h>
00016 #include <mbl/mbl_log.h>
00017 #include <mbl/mbl_exception.h>
00018 #include <mbl/mbl_sample_stats_1d.h>
00019 
00020 
00021 //=========================================================================
00022 // Static function to create a static logger when first required
00023 //=========================================================================
00024 static mbl_logger& logger()
00025 {
00026   static mbl_logger l("mul.mbl.tools.sample_stats");
00027   return l;
00028 }
00029 
00030 
00031 //=========================================================================
00032 // Report an error message in several ways, then throw an exception.
00033 //=========================================================================
00034 static void do_error(const vcl_string& msg)
00035 {
00036   MBL_LOG(ERR, logger(), msg);
00037   throw mbl_exception_abort(msg);
00038 }
00039 
00040 
00041 //=========================================================================
00042 // Actual main function (apart from exception-handling wrapper)
00043 //=========================================================================
00044 int main2(int argc, char *argv[])
00045 {
00046   // Parse the program arguments
00047   vul_arg<vcl_string> in_file("-i", "input file containing scalar values (whitespace-separated); otherwise uses stdin", "");
00048   vul_arg<vcl_string> out_file("-o", "output file to append statistics; logger will be used otherwise", "");
00049   vul_arg<vcl_string> sep("-sep", "String to use as a separator between output values, e.g. \", \" or \"  \" (default=TAB)", "\t");
00050   vul_arg<bool> nohead("-h","Specify this to SUPPRESS column headers", false);
00051   vul_arg<bool> n("-n", "Specify this to record the number of samples", false);
00052   vul_arg<bool> mean("-mean", "Specify this to record the mean", false);
00053   vul_arg<bool> variance("-var", "Specify this to record the variance", false);
00054   vul_arg<bool> sd("-sd", "Specify this to record the standard deviation", false);
00055   vul_arg<bool> se("-se", "Specify this to record the standard error", false);
00056   vul_arg<bool> med("-med", "Specify this to record the median", false);
00057   vul_arg<bool> min("-min", "Specify this to record the minimum", false);
00058   vul_arg<bool> max("-max", "Specify this to record the maximum", false);
00059   vul_arg<vcl_vector<int> > pc("-pc", "Specify this switch with 1 or more comma-separated integers (e.g. 5,50,95) to record percentile(s)");
00060   vul_arg<vcl_vector<double> > quant("-q", "Specify this switch with 1 or more comma-separated floats (e.g. 0.05,0.50,0.95) to record quantile(s)");
00061   vul_arg<bool> sum("-sum", "Specify this to record the sum", false);
00062   vul_arg<bool> sum_squares("-ssq", "Specify this to record the sum of squares", false);
00063   vul_arg<bool> rms("-rms", "Specify this to record the rms", false);
00064   vul_arg<bool> mean_of_absolutes("-moa", "Specify this to record the mean_of_absolutes", false);
00065   vul_arg<bool> skewness("-skew", "Specify this to record the skewness", false);
00066   vul_arg<bool> kurtosis("-kurt", "Specify this to record the kurtosis", false);
00067   vul_arg<bool> absolute("-absolute", "Calculate statistics of absolute sample values", false);
00068   vul_arg<vcl_string> label("-label","Adds this label to each line outputting a statistic - useful for later grep");
00069   vul_arg_parse(argc, argv);
00070 
00071   // Try to open the input file if specified or use stdin
00072   vcl_istream* is=0;
00073   if (!in_file().empty())
00074   {
00075     is = new vcl_ifstream(in_file().c_str());
00076     if (!is || !is->good())
00077       do_error(vcl_string("Failed to open input file ") + in_file().c_str());
00078     MBL_LOG(DEBUG, logger(), "Opened input file: " << in_file().c_str());
00079   }
00080   else
00081   {
00082     is = &vcl_cin;
00083   }
00084 
00085   // Load the data from stream until end
00086   vcl_vector<double> data_vec;
00087   data_vec.assign(vcl_istream_iterator<double>(*is), vcl_istream_iterator<double>());
00088   if (data_vec.empty())
00089     do_error("Could not parse data file.");
00090   MBL_LOG(DEBUG, logger(), "data file contained " << data_vec.size() << " values.");
00091   
00092   if (absolute())
00093   {
00094     for (unsigned i=0;i<data_vec.size();++i) data_vec[i]=vcl_abs(data_vec[i]);
00095   }
00096 
00097 
00098   // Close the input filestream
00099   if (!in_file().empty()) (dynamic_cast<vcl_ifstream*>(is))->close();
00100 
00101   // Calculate the requested statistics
00102   mbl_sample_stats_1d data(data_vec);
00103   vcl_map<vcl_string, double> stats;
00104   if (n.set())                 stats["n"]=data.n_samples();
00105   if (mean.set())              stats["mean"]=data.mean();
00106   if (variance.set())          stats["var"]=data.variance();
00107   if (sd.set())                stats["sd"]=data.sd();
00108   if (se.set())                stats["se"]=data.stdError();
00109   if (med.set())               stats["med"]=data.median();
00110   if (min.set())               stats["min"]=data.min();
00111   if (max.set())               stats["max"]=data.max();
00112   if (sum.set())               stats["sum"]=data.sum();
00113   if (sum_squares.set())       stats["ssq"]=data.sum_squares();
00114   if (rms.set())               stats["rms"]=data.rms();
00115   if (mean_of_absolutes.set()) stats["moa"]=data.mean_of_absolutes();
00116   if (skewness.set())          stats["skew"]=data.skewness();
00117   if (kurtosis.set())          stats["kurt"]=data.kurtosis();
00118   if (pc.set())
00119   {
00120     for (vcl_vector<int>::const_iterator it=pc().begin(); it!=pc().end(); ++it)
00121     {
00122       vcl_string name = vul_sprintf("pc%02u", *it);
00123       stats[name] = data.nth_percentile(*it);
00124     }
00125   }
00126   if (quant.set())
00127   {
00128     for (vcl_vector<double>::const_iterator it=quant().begin(); it!=quant().end(); ++it)
00129     {
00130       vcl_string name = vul_sprintf("q%f", *it);
00131       stats[name] = data.quantile(*it);
00132     }
00133   }
00134 
00135   // Open output file if requested
00136   vcl_ofstream* ofs=0;
00137   if (!out_file().empty())
00138   {
00139     ofs = new vcl_ofstream(out_file().c_str(), vcl_ios::app);
00140     if (!ofs || !ofs->good())
00141       do_error(vcl_string("Failed to open outout file ") + out_file().c_str());
00142     MBL_LOG(DEBUG, logger(), "Opened output file: " << out_file().c_str());
00143   }
00144 
00145   // Output requested statistics
00146   MBL_LOG(INFO, logger(), "in_file: " << in_file());
00147 
00148   // Write a line of column headers unless suppressed
00149   if (!nohead())
00150   {
00151     if (ofs && ofs->good()) *ofs << '#' << sep();
00152     for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00153     {
00154       if (ofs && ofs->good()) *ofs << it->first << sep();
00155     }
00156     if (ofs && ofs->good()) *ofs << '\n';
00157   }
00158   vcl_string my_label = in_file();
00159   if (label.set()) my_label = label();
00160   // Write statistics in a single line
00161   if (ofs && ofs->good()) *ofs << my_label << sep();
00162   for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00163   {
00164     vcl_cout <<  my_label << " " << it->first << ": " << it->second << vcl_endl;
00165     if (ofs && ofs->good()) *ofs << it->second << sep();
00166   }
00167   if (ofs && ofs->good()) *ofs << vcl_endl;
00168 
00169   // Close file
00170   if (ofs) ofs->close();
00171 
00172   return 0;
00173 }
00174 
00175 
00176 //=========================================================================
00177 // main() function with exception-handling wrapper
00178 //=========================================================================
00179 int main(int argc, char *argv[])
00180 {
00181   int errcode = 0;
00182 
00183   // Initialize the logger
00184   mbl_logger::root().load_log_config_file();
00185 
00186   try
00187   {
00188     errcode = main2(argc, argv);
00189   }
00190   catch (const vcl_exception & e)
00191   {
00192     vcl_cerr << "ERROR: " << typeid(e).name() << '\n' <<
00193       e.what() << vcl_endl;
00194     errcode = 4;
00195   }
00196 
00197   return errcode;
00198 }
00199