contrib/mul/vpdfl/vpdfl_pc_gaussian_builder.cxx
Go to the documentation of this file.
00001 // This is mul/vpdfl/vpdfl_pc_gaussian_builder.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \brief Interface for Multi-variate Principle Component gaussian PDF Builder.
00008 // \author Ian Scott
00009 // \date 21-Jul-2000
00010 //
00011 // Modifications
00012 // \verbatim
00013 // 23 April 2001 IMS - Ported to VXL
00014 // \endverbatim
00015 
00016 #include "vpdfl_pc_gaussian_builder.h"
00017 
00018 #include <vcl_string.h>
00019 #include <vcl_cassert.h>
00020 #include <vcl_cstdlib.h> // for vcl_abort()
00021 #include <mbl/mbl_data_wrapper.h>
00022 #include <vnl/vnl_math.h>
00023 #include <vnl/vnl_c_vector.h>
00024 #include <vnl/algo/vnl_symmetric_eigensystem.h>
00025 #include <vpdfl/vpdfl_gaussian_builder.h>
00026 #include <vpdfl/vpdfl_pdf_base.h>
00027 #include <vpdfl/vpdfl_pc_gaussian.h>
00028 #include <mbl/mbl_parse_block.h>
00029 #include <mbl/mbl_read_props.h>
00030 #include <mbl/mbl_exception.h>
00031 #include <vul/vul_string.h>
00032 
00033 //=======================================================================
00034 
00035 vpdfl_pc_gaussian_builder::vpdfl_pc_gaussian_builder() :
00036   partitionMethod_(vpdfl_pc_gaussian_builder::fixed),
00037   proportionOfVariance_(0),
00038   fixed_partition_(1)
00039 {
00040 }
00041 
00042 //=======================================================================
00043 
00044 vpdfl_pc_gaussian_builder::~vpdfl_pc_gaussian_builder()
00045 {
00046 }
00047 
00048 //=======================================================================
00049 
00050 //: Use proportion of variance to decide on the number of principle components.
00051 // Specify the proportion (between 0 and 1).
00052 // The default setting uses a fixed number of principle components.
00053 void vpdfl_pc_gaussian_builder::set_proportion_partition( double proportion)
00054 {
00055   assert(proportion >= 0.0);
00056   assert(proportion <= 1.0);
00057 
00058   proportionOfVariance_ = proportion;
00059   partitionMethod_ = proportionate;
00060 }
00061 
00062 //=======================================================================
00063 
00064 //: Set the number of principle components when using fixed partition.
00065 void vpdfl_pc_gaussian_builder::set_fixed_partition(int n_principle_components)
00066 {
00067   assert(n_principle_components >=0);
00068   fixed_partition_ = n_principle_components;
00069   partitionMethod_ = vpdfl_pc_gaussian_builder::fixed;
00070 }
00071 
00072 //=======================================================================
00073 
00074 vpdfl_pc_gaussian& vpdfl_pc_gaussian_builder::gaussian(vpdfl_pdf_base& model) const
00075 {
00076     // need a vpdfl_gaussian
00077   assert(model.is_class("vpdfl_pc_gaussian"));
00078   return static_cast<vpdfl_pc_gaussian&>( model);
00079 }
00080 
00081 //=======================================================================
00082 
00083 vpdfl_pdf_base* vpdfl_pc_gaussian_builder::new_model() const
00084 {
00085   return new vpdfl_pc_gaussian();
00086 }
00087 
00088 //=======================================================================
00089 
00090 void vpdfl_pc_gaussian_builder::build(vpdfl_pdf_base& model,
00091                                       const vnl_vector<double>& mean) const
00092 {
00093   vpdfl_pc_gaussian& g = gaussian(model);
00094   int n = mean.size();
00095 
00096   // Generate an identity matrix for eigenvectors
00097   vnl_matrix<double> P(n,n);
00098   P.fill(0);
00099   P.fill_diagonal(1.0);
00100 
00101   g.set(mean,P,vnl_vector<double>(0), min_var());
00102 }
00103 
00104 #if 0 // this doesn't work
00105     //: Build model from mean and covariance
00106 void vpdfl_pc_gaussian_builder::buildFromCovar(vpdfl_pc_gaussian& g,
00107                                                const vnl_vector<double>& mean,
00108                                                const vnl_matrix<double>& S,
00109                                                unsigned nPrinComps) const
00110 {
00111   int n = mean.size();
00112   vnl_matrix<double> evecs;
00113   vnl_vector<double> evals;
00114 
00115   NR_CalcSymEigens(S,evecs,evals,0);
00116   vnl_vector<double> principleEVals(nPrinComps);
00117 
00118   // Apply threshold to variance
00119   for (int i=1;i<=nPrinComps;++i)
00120     if (evals(i)<min_var())
00121       principleEVals(i)=min_var();
00122     else
00123       principleEVals(i)=evals(i);
00124 
00125   double sum = 0.0; // The sum of the complementary space eigenvalues.
00126   for (int i=nPrinComps+1; i <= n; i++)
00127     sum += evals(i);
00128 
00129     // The Eigenvalue of the complementary space basis vectors
00130   double complementaryEVals = sum / (n - nPrinComps);
00131 
00132   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00133 
00134   g.set(mean, evecs, principleEVals, complementaryEVals);
00135 }
00136 #endif
00137 
00138 
00139 //: replace any eigenvalues that are less than zero, with zero.
00140 // Small negative eigenvalues can be generated due to rounding errors.
00141 // This function assumes that the eigenvalues are stored in descending order.
00142 static void eValsFloorZero(vnl_vector<double> &v)
00143 {
00144   int n = v.size();
00145   double *v_data = v.data_block();
00146   int i=n-1;
00147   while (i && v_data[i] < 0.0)
00148   {
00149     v_data[i]=0.0;
00150     i--;
00151   }
00152 }
00153 
00154 
00155 void vpdfl_pc_gaussian_builder::build(vpdfl_pdf_base& model,
00156                                       mbl_data_wrapper<vnl_vector<double> >& data) const
00157 {
00158   vpdfl_pc_gaussian& g = gaussian(model);
00159 
00160   unsigned long n_samples = data.size();
00161   assert (n_samples>=2L);
00162 
00163   int n = data.current().size();
00164 
00165   vnl_vector<double> mean;
00166 //vnl_matrix<double> evecs;
00167 //vnl_vector<double> evals;
00168   vnl_matrix<double> evecs(n,n);
00169   vnl_vector<double> evals(n);
00170   vnl_matrix<double> S;
00171 
00172   meanCovar(mean,S,data);
00173 
00174   vnl_symmetric_eigensystem_compute(S, evecs, evals);
00175   // eigenvalues are lowest first here
00176   evals.flip();
00177   evecs.fliplr();
00178   // eigenvalues are highest first now
00179 
00180   int n_principle_components = decide_partition(evals, n_samples, 0);
00181 
00182   vnl_vector<double> principleEVals(n_principle_components);
00183 
00184   // Apply threshold to variance
00185   for (int i=0;i<n_principle_components;++i)
00186     if (evals(i)<min_var())
00187       principleEVals(i)=min_var();
00188     else
00189       principleEVals(i)=evals(i);
00190 
00191   double eVsum = 0.0; // The sum of the complementary space eigenvalues.
00192   for (int i=n_principle_components; i < n; i++)
00193     eVsum += evals(i);
00194 
00195     // The Eigenvalue of the complementary space basis vectors
00196   double complementaryEVals = eVsum / (n - n_principle_components);
00197 
00198   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00199 
00200   g.set(mean, evecs, principleEVals, complementaryEVals);
00201 }
00202 
00203 //: Computes mean and covariance of given data
00204 void vpdfl_pc_gaussian_builder::mean_covar(vnl_vector<double>& mean, vnl_matrix<double>& S,
00205                                            mbl_data_wrapper<vnl_vector<double> >& data) const
00206 {
00207   unsigned long n_samples = data.size();
00208 
00209   assert (n_samples!=0L);
00210 
00211   int n_dims = data.current().size();
00212   vnl_vector<double> sum(n_dims);
00213   sum.fill(0);
00214 
00215   S.set_size(0,0);
00216 
00217   data.reset();
00218   for (unsigned long i=0;i<n_samples;i++)
00219   {
00220     sum += data.current();
00221     updateCovar(S,data.current(),1.0);
00222 
00223     data.next();
00224   }
00225 
00226   mean = sum;
00227   mean/=n_samples;
00228   S/=n_samples;
00229   updateCovar(S,mean,-1.0);
00230 }
00231 
00232 
00233 void vpdfl_pc_gaussian_builder::weighted_build(vpdfl_pdf_base& model,
00234                                                mbl_data_wrapper<vnl_vector<double> >& data,
00235                                                const vcl_vector<double>& wts) const
00236 {
00237   vpdfl_pc_gaussian& g = gaussian(model);
00238 
00239   unsigned long n_samples = data.size();
00240 
00241   if (n_samples<2L)
00242   {
00243     vcl_cerr<<"vpdfl_gaussian_builder::weighted_build() Too few examples available.\n";
00244     vcl_abort();
00245   }
00246 
00247   data.reset();
00248   const int n = data.current().size();
00249   vnl_vector<double> sum(n);
00250   sum.fill(0.0);
00251   vnl_matrix<double> evecs(n,n);
00252   vnl_vector<double> evals(n);
00253   vnl_matrix<double> S;
00254   double w_sum = 0.0;
00255   double w;
00256   unsigned actual_samples = 0;
00257 
00258   for (unsigned long i=0;i<n_samples;i++)
00259   {
00260     w = wts[i];
00261     if (w != 0.0) // Common case - save time.
00262     {
00263       actual_samples ++;
00264       w_sum += w;
00265       data.current().assert_finite();
00266       sum += w*data.current();
00267       updateCovar(S,data.current(),w);
00268     }
00269     data.next();
00270   }
00271 
00272   updateCovar(S,sum,-1.0/w_sum);
00273   S*=actual_samples/((actual_samples - 1) *w_sum);
00274   sum/=w_sum;
00275   // now sum = weighted mean
00276   // and S = weighted covariance corrected for unbiased rather than ML result.
00277 
00278 
00279   vnl_symmetric_eigensystem_compute(S, evecs, evals);
00280   // eigenvalues are lowest first here
00281   evals.flip();
00282   evecs.fliplr();
00283   // eigenvalues are highest first now
00284 
00285 #if 0
00286   vcl_cerr << 'S' << S <<vcl_endl
00287            << "evals" << evals <<vcl_endl
00288            << "evecs" << evecs <<vcl_endl;
00289 #endif
00290 
00291   eValsFloorZero(evals);
00292 
00293   int n_principle_components = decide_partition(evals, n);
00294 
00295   vnl_vector<double> principleEVals(n_principle_components);
00296 
00297   // Apply threshold to variance
00298   for (int i=0;i<n_principle_components;++i)
00299     if (evals(i)<min_var())
00300       principleEVals(i)=min_var();
00301     else
00302       principleEVals(i)=evals(i);
00303   double eVsum = 0.0; // The sum of the complementary space eigenvalues.
00304   for (int i=n_principle_components; i < n; i++)
00305     eVsum += evals(i);
00306 
00307     // The Eigenvalue of the complementary space basis vectors
00308   double complementaryEVals;
00309   if (n_principle_components != n) // avoid divide by 0
00310     complementaryEVals = eVsum / (n - n_principle_components);
00311   else
00312     complementaryEVals = 0.0; // actual could be any value.
00313 
00314   if (complementaryEVals < min_var()) complementaryEVals = min_var();
00315 
00316   g.set(sum, evecs, principleEVals, complementaryEVals);
00317 }
00318 
00319 
00320 //: Decide where to partition an Eigenvector space
00321 // Returns the number of principle components to be used.
00322 // Pass in the Eigenvalues (eVals), the number of samples
00323 // that went to make up this Gaussian (nSamples), and the noise floor
00324 // for the dataset. The method may use simplified algorithms if
00325 // you indicate that the number of samples or noise floor is unknown
00326 // (by setting the latter parameters to 0.)
00327 unsigned vpdfl_pc_gaussian_builder::decide_partition(const vnl_vector<double>& eVals,
00328                                                      unsigned /*nSamples =0*/,
00329                                                      double   /*noise =0.0*/) const
00330 {
00331   assert (eVals.size() > 0);
00332   if (partitionMethod_ == vpdfl_pc_gaussian_builder::fixed)
00333   {
00334     return vnl_math_min(eVals.size(), (unsigned)fixed_partition()+1);;
00335   }
00336   else if (partitionMethod_ == proportionate)
00337   {
00338     double sum = vnl_c_vector<double>::sum(eVals.data_block(), eVals.size());
00339     assert (proportionOfVariance_ < 1.0 && proportionOfVariance_ > 0.0);
00340     double stopWhen = sum * proportionOfVariance_;
00341     sum = eVals(0);
00342     unsigned i=0;
00343     while (sum <= stopWhen)
00344     {
00345       i++;
00346       sum += eVals(i);
00347     }
00348     return i;
00349   }
00350   else
00351   {
00352     vcl_cerr << "vpdfl_pc_gaussian_builder::decide_partition(): Unexpected partition method: "
00353              << (short)partitionMethod_ << '\n';
00354     vcl_abort();
00355     return 0;
00356   }
00357 }
00358 
00359 //: Read initialisation settings from a stream.
00360 // Parameters:
00361 // \verbatim
00362 // {
00363 //   mode_choice: fixed  // Alternative: proportionate
00364 //   var_prop: 0.95
00365 //   n_modes: 3
00366 //   min_var: 1.0e-6
00367 // }
00368 // \endverbatim
00369 // \throw mbl_exception_parse_error if the parse fails.
00370 void vpdfl_pc_gaussian_builder::config_from_stream(vcl_istream & is)
00371 {
00372   vcl_string s = mbl_parse_block(is);
00373 
00374   vcl_istringstream ss(s);
00375   mbl_read_props_type props = mbl_read_props_ws(ss);
00376 
00377   if (props.find("mode_choice")!=props.end())
00378   {
00379     if (props["mode_choice"]=="fixed")
00380       partitionMethod_=fixed;
00381     else
00382     if (props["mode_choice"]=="proportionate")
00383       partitionMethod_=proportionate;
00384     else
00385     {
00386       vcl_string err_msg = "Unknown mode_choice: "+props["mode_choice"];
00387       throw mbl_exception_parse_error(err_msg);
00388     }
00389 
00390     props.erase("mode_choice");
00391   }
00392 
00393   if (props.find("var_prop")!=props.end())
00394   {
00395     proportionOfVariance_=vul_string_atof(props["var_prop"]);
00396     props.erase("var_prop");
00397   }
00398 
00399   if (props.find("n_modes")!=props.end())
00400   {
00401     fixed_partition_=vul_string_atoi(props["n_modes"]);
00402     props.erase("n_modes");
00403   }
00404 
00405   double mv=1.0e-6;
00406   if (props.find("min_var")!=props.end())
00407   {
00408     mv=vul_string_atof(props["min_var"]);
00409     props.erase("min_var");
00410   }
00411   set_min_var(mv);
00412 
00413   try
00414   {
00415     mbl_read_props_look_for_unused_props(
00416         "vpdfl_axis_gaussian_builder::config_from_stream", props);
00417   }
00418   catch(mbl_exception_unused_props &e)
00419   {
00420     throw mbl_exception_parse_error(e.what());
00421   }
00422 }
00423 
00424 
00425 //=======================================================================
00426 
00427 vcl_string vpdfl_pc_gaussian_builder::is_a() const
00428 {
00429   static vcl_string class_name_ = "vpdfl_pc_gaussian_builder";
00430   return class_name_;
00431 }
00432 
00433 //=======================================================================
00434 // Method: is_class
00435 //=======================================================================
00436 
00437 bool vpdfl_pc_gaussian_builder::is_class(vcl_string const& s) const
00438 {
00439   return vpdfl_gaussian_builder::is_class(s) || s==vpdfl_pc_gaussian_builder::is_a();
00440 }
00441 
00442 //=======================================================================
00443 // Method: version_no
00444 //=======================================================================
00445 
00446 short vpdfl_pc_gaussian_builder::version_no() const
00447 {
00448   return 2;
00449 }
00450 
00451 //=======================================================================
00452 // Method: clone
00453 //=======================================================================
00454 
00455 vpdfl_builder_base* vpdfl_pc_gaussian_builder::clone() const
00456 {
00457   return new vpdfl_pc_gaussian_builder(*this);
00458 }
00459 
00460 //=======================================================================
00461 // Method: print
00462 //=======================================================================
00463 
00464 void vpdfl_pc_gaussian_builder::print_summary(vcl_ostream& os) const
00465 {
00466   vpdfl_gaussian_builder::print_summary(os);
00467   if (partitionMethod_==fixed) os<<" mode_choice: fixed ";
00468   if (partitionMethod_==proportionate)
00469     os<<" mode_choice: proportionate ";
00470   os<<" var_prop: "<<proportionOfVariance_
00471     <<" n_fixed: "<<fixed_partition_<<' ';
00472 }
00473 
00474 //=======================================================================
00475 // Method: save
00476 //=======================================================================
00477 
00478 void vpdfl_pc_gaussian_builder::b_write(vsl_b_ostream& bfs) const
00479 {
00480   vsl_b_write(bfs, is_a());
00481   vsl_b_write(bfs, version_no());
00482   vpdfl_gaussian_builder::b_write(bfs);
00483   vsl_b_write(bfs,(short)partitionMethod_);
00484   vsl_b_write(bfs, proportionOfVariance_);
00485   vsl_b_write(bfs, fixed_partition_);
00486 }
00487 
00488 //=======================================================================
00489 // Method: load
00490 //=======================================================================
00491 
00492 void vpdfl_pc_gaussian_builder::b_read(vsl_b_istream& bfs)
00493 {
00494   if (!bfs) return;
00495 
00496   vcl_string name;
00497   vsl_b_read(bfs,name);
00498   if (name != is_a())
00499   {
00500     vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_pc_gaussian_builder &)\n"
00501              << "           Attempted to load object of type "
00502              << name <<" into object of type " << is_a() << '\n';
00503     bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00504     return;
00505   }
00506 
00507   short temp;
00508   short version;
00509   vsl_b_read(bfs,version);
00510   switch (version)
00511   {
00512     case 1:
00513       vpdfl_gaussian_builder::b_read(bfs);
00514       vsl_b_read(bfs, temp);
00515       partitionMethod_ = partitionMethods(temp);
00516       vsl_b_read(bfs, proportionOfVariance_);
00517       fixed_partition_ = 75;
00518       break;
00519     case 2:
00520       vpdfl_gaussian_builder::b_read(bfs);
00521       vsl_b_read(bfs, temp);
00522       partitionMethod_ = partitionMethods(temp);
00523       vsl_b_read(bfs, proportionOfVariance_);
00524       vsl_b_read(bfs, fixed_partition_);
00525       break;
00526     default:
00527       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_pc_gaussian_builder &)\n"
00528                << "           Unknown version number "<< version << '\n';
00529       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00530       return;
00531   }
00532 }