contrib/mul/mbl/tools/sample_stats.cxx
Go to the documentation of this file.
00001 //////////////////////////////////////////////////////////////////////////
00002 //
00003 // Calculate various statistics on a sample of 1D data.
00004 //
00005 //////////////////////////////////////////////////////////////////////////
00006 
00007 #include <vcl_iostream.h>
00008 #include <vcl_iterator.h>
00009 #include <vcl_fstream.h>
00010 #include <vcl_string.h>
00011 #include <vcl_exception.h>
00012 #include <vcl_map.h>
00013 #include <vcl_typeinfo.h>
00014 #include <vul/vul_arg.h>
00015 #include <vul/vul_sprintf.h>
00016 #include <mbl/mbl_log.h>
00017 #include <mbl/mbl_exception.h>
00018 #include <mbl/mbl_sample_stats_1d.h>
00019 
00020 
00021 //=========================================================================
00022 // Static function to create a static logger when first required
00023 //=========================================================================
00024 static mbl_logger& logger()
00025 {
00026   static mbl_logger l("vxl.contrib.mul.mbl.tools.sample_stats");
00027   return l;
00028 }
00029 
00030 
00031 //=========================================================================
00032 // Report an error message in several ways, then throw an exception.
00033 //=========================================================================
00034 static void do_error(const vcl_string& msg)
00035 {
00036   MBL_LOG(ERR, logger(), msg);
00037   throw mbl_exception_abort(msg);
00038 }
00039 
00040 
00041 //=========================================================================
00042 // Actual main function (apart from exception-handling wrapper)
00043 //=========================================================================
00044 int main2(int argc, char *argv[])
00045 {
00046   // Parse the program arguments
00047   vul_arg<vcl_string> in_file("-i", "input file containing scalar values (whitespace-separated); otherwise uses stdin", "");
00048   vul_arg<vcl_string> out_file("-o", "output file to append statistics; logger will be used otherwise", "");
00049   vul_arg<bool> n("-n", "Specify this to record number of samples", false);
00050   vul_arg<bool> min("-min", "Specify this to record minimum", false);
00051   vul_arg<bool> max("-max", "Specify this to record maximum", false);
00052   vul_arg<bool> mean("-mean", "Specify this to record mean", false);
00053   vul_arg<bool> sd("-sd", "Specify this to record standard deviation", false);
00054   vul_arg<bool> se("-se", "Specify this to record standard error", false);
00055   vul_arg<bool> med("-med", "Specify this to record median", false);
00056   vul_arg<vcl_vector<int> > pc("-pc", "Specify this switch with 1 or more comma-separated integers (e.g. 5,50,95) to record percentile(s)");
00057   vul_arg<vcl_vector<double> > quant("-q", "Specify this switch with 1 or more comma-separated floats (e.g. 0.05,0.50,0.95) to record quantile(s)");
00058   vul_arg<vcl_string> sep("-sep", "String to use as a separator between output values, e.g. \", \" or \"  \" (default=TAB)", "\t");
00059   vul_arg<bool> nohead("-h","Specify this to SUPPRESS column headers", false);
00060   vul_arg_parse(argc, argv);
00061 
00062   // Try to open the input file if specified or use stdin
00063   vcl_istream* is=0;
00064   if (!in_file().empty())
00065   {
00066     is = new vcl_ifstream(in_file().c_str());
00067     if (!is || !is->good())
00068       do_error(vcl_string("Failed to open input file ") + in_file().c_str());
00069     MBL_LOG(DEBUG, logger(), "Opened input file: " << in_file().c_str());
00070   }
00071   else
00072   {
00073     is = &vcl_cin;
00074     vcl_cout << "Please provide some input data (or use -? for help)." << vcl_endl;
00075   }
00076 
00077   // Load the data from stream until end
00078   vcl_vector<double> data_vec;
00079   data_vec.assign(vcl_istream_iterator<double>(*is), vcl_istream_iterator<double>());
00080   if (data_vec.empty())
00081     do_error("Could not parse data file.");
00082   MBL_LOG(DEBUG, logger(), "data file contained " << data_vec.size() << " values.");
00083 
00084   // Close the input filestream
00085   if (!in_file().empty()) (dynamic_cast<vcl_ifstream*>(is))->close();
00086 
00087   // Calculate the requested statistics
00088   mbl_sample_stats_1d data(data_vec);
00089   vcl_map<vcl_string, double> stats;
00090   if (n.set())    stats["n"]=data.n_samples();
00091   if (min.set())  stats["min"]=data.min();
00092   if (max.set())  stats["max"]=data.max();
00093   if (mean.set()) stats["mean"]=data.mean();
00094   if (sd.set())   stats["sd"]=data.sd();
00095   if (se.set())   stats["se"]=data.stdError();
00096   if (med.set())  stats["med"]=data.median();
00097   if (pc.set())
00098   {
00099     for (vcl_vector<int>::const_iterator it=pc().begin(); it!=pc().end(); ++it)
00100     {
00101       vcl_string name = vul_sprintf("pc%02u", *it);
00102       stats[name] = data.nth_percentile(*it);
00103     }
00104   }
00105   if (quant.set())
00106   {
00107     for (vcl_vector<double>::const_iterator it=quant().begin(); it!=quant().end(); ++it)
00108     {
00109       vcl_string name = vul_sprintf("q%f", *it);
00110       stats[name] = data.quantile(*it);
00111     }
00112   }
00113 
00114   // Open output file if requested
00115   vcl_ofstream* ofs=0;
00116   if (!out_file().empty())
00117   {
00118     ofs = new vcl_ofstream(out_file().c_str(), vcl_ios::app);
00119     if (!ofs || !ofs->good())
00120       do_error(vcl_string("Failed to open outout file ") + out_file().c_str());
00121     MBL_LOG(DEBUG, logger(), "Opened output file: " << out_file().c_str());
00122   }
00123 
00124   // Output requested statistics
00125   MBL_LOG(NOTICE, logger(), "in_file: " << in_file());
00126 
00127   // Write a line of column headers unless suppressed
00128   if (!nohead())
00129   {
00130     if (ofs && ofs->good()) *ofs << '#' << sep();
00131     for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00132     {
00133       if (ofs && ofs->good()) *ofs << it->first << sep();
00134     }
00135     if (ofs && ofs->good()) *ofs << '\n';
00136   }
00137 
00138   // Write statistics in a single line
00139   if (ofs && ofs->good()) *ofs << in_file() << sep();
00140   for (vcl_map<vcl_string,double>::const_iterator it=stats.begin(); it!=stats.end(); ++it)
00141   {
00142     MBL_LOG(NOTICE, logger(), it->first << ": " << it->second);
00143     if (ofs && ofs->good()) *ofs << it->second << sep();
00144   }
00145   if (ofs && ofs->good()) *ofs << vcl_endl;
00146 
00147   // Close file
00148   if (ofs) ofs->close();
00149 
00150   return 0;
00151 }
00152 
00153 
00154 //=========================================================================
00155 // main() function with exception-handling wrapper
00156 //=========================================================================
00157 int main(int argc, char *argv[])
00158 {
00159   int errcode = 0;
00160 
00161   // Initialize the logger
00162   mbl_logger::root().load_log_config_file();
00163 
00164   try
00165   {
00166     errcode = main2(argc, argv);
00167   }
00168   catch (const vcl_exception & e)
00169   {
00170     vcl_cerr << "ERROR: " << typeid(e).name() << '\n' <<
00171       e.what() << vcl_endl;
00172     errcode = 4;
00173   }
00174 
00175   return errcode;
00176 }
00177