Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

vpdfl_kernel_pdf_builder.cxx

Go to the documentation of this file.
00001 // This is mul/vpdfl/vpdfl_kernel_pdf_builder.cxx
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005 //:
00006 // \file
00007 // \author Tim Cootes
00008 // \brief Initialises kernel pdfs
00009 
00010 #include "vpdfl_kernel_pdf_builder.h"
00011 
00012 #include <vcl_cassert.h>
00013 #include <vcl_string.h>
00014 #include <vcl_cstdlib.h> // vcl_abort()
00015 #include <vcl_cmath.h>
00016 #include <vcl_vector.h>
00017 
00018 #include <mbl/mbl_data_wrapper.h>
00019 #include <mbl/mbl_data_array_wrapper.h>
00020 #include <vpdfl/vpdfl_kernel_pdf.h>
00021 #include <vnl/vnl_vector.h>
00022 #include <vpdfl/vpdfl_calc_mean_var.h>
00023 #if 0
00024 #include <mbl/mbl_priority_bounded_queue.h>
00025 #endif
00026 
00027 #include <mbl/mbl_parse_block.h>
00028 #include <mbl/mbl_read_props.h>
00029 #include <vul/vul_string.h>
00030 #include <mbl/mbl_exception.h>
00031 
00032 
00033 //=======================================================================
00034 // Dflt ctor
00035 //=======================================================================
00036 
00037 vpdfl_kernel_pdf_builder::vpdfl_kernel_pdf_builder()
00038     : min_var_(1.0e-6), build_type_(select_equal), fixed_width_(1.0)
00039 {
00040 }
00041 
00042 //=======================================================================
00043 // Destructor
00044 //=======================================================================
00045 
00046 vpdfl_kernel_pdf_builder::~vpdfl_kernel_pdf_builder()
00047 {
00048 }
00049 
00050 //=======================================================================
00051 
00052 vpdfl_kernel_pdf& vpdfl_kernel_pdf_builder::kernel_pdf(vpdfl_pdf_base& model) const
00053 {
00054   // require a vpdfl_kernel_pdf
00055   assert(model.is_class("vpdfl_kernel_pdf"));
00056   return static_cast<vpdfl_kernel_pdf&>(model);
00057 }
00058 
00059 //: Use fixed width kernels of given width when building.
00060 void vpdfl_kernel_pdf_builder::set_use_fixed_width(double width)
00061 {
00062   build_type_ = fixed_width;
00063   fixed_width_ = width;
00064 }
00065 
00066 //: Use equal width kernels of width depending on number of samples.
00067 void vpdfl_kernel_pdf_builder::set_use_equal_width()
00068 {
00069   build_type_ = select_equal;
00070 }
00071 
00072 //: Kernel width proportional to distance to nearby samples.
00073 void vpdfl_kernel_pdf_builder::set_use_width_from_separation()
00074 {
00075   build_type_ = width_from_sep;
00076 }
00077 
00078 //: Build adaptive kernel estimate.
00079 void vpdfl_kernel_pdf_builder::set_use_adaptive()
00080 {
00081   build_type_ = adaptive;
00082 }
00083 
00084 //=======================================================================
00085 //: Define lower threshold on variance for built models
00086 //=======================================================================
00087 void vpdfl_kernel_pdf_builder::set_min_var(double min_var)
00088 {
00089   min_var_ = min_var;
00090 }
00091 
00092 //=======================================================================
00093 //: Get lower threshold on variance for built models
00094 //=======================================================================
00095 double vpdfl_kernel_pdf_builder::min_var() const
00096 {
00097   return min_var_;
00098 }
00099 
00100 void vpdfl_kernel_pdf_builder::build(vpdfl_pdf_base& model,
00101                                      const vnl_vector<double>& mean) const
00102 {
00103   vpdfl_kernel_pdf& kpdf = kernel_pdf(model);
00104 
00105   vcl_vector<vnl_vector<double> >m(1);
00106   m[0] = mean;
00107   kpdf.set_centres(&m[0],1,vcl_sqrt(min_var_));
00108 }
00109 
00110 //: Build kernel_pdf from n elements in data[i]
00111 void vpdfl_kernel_pdf_builder::build_from_array(vpdfl_pdf_base& model,
00112                                                 const vnl_vector<double>* data, int n) const
00113 {
00114   vpdfl_kernel_pdf& kpdf = kernel_pdf(model);
00115 
00116   if (n<1)
00117   {
00118     vcl_cerr<<"vpdfl_kernel_pdf_builder::build() No examples available.\n";
00119     vcl_abort();
00120   }
00121 
00122   switch (build_type_)
00123   {
00124     case fixed_width:
00125     build_fixed_width(kpdf,data,n,fixed_width_);
00126     break;
00127     case select_equal:
00128     build_select_equal_width(kpdf,data,n);
00129     break;
00130     case width_from_sep:
00131     build_width_from_separation(kpdf,data,n);
00132     break;
00133     case adaptive:
00134     build_adaptive(kpdf,data,n);
00135     break;
00136     default:
00137     vcl_cerr<<"vpdfl_kernel_pdf_builder::build_from_array() Unknown build type.\n";
00138     vcl_abort();
00139   }
00140 }
00141 
00142 void vpdfl_kernel_pdf_builder::build(vpdfl_pdf_base& model, mbl_data_wrapper<vnl_vector<double> >& data) const
00143 {
00144   /* vpdfl_kernel_pdf& kpdf = */ kernel_pdf(model);
00145 
00146   unsigned long n = data.size();
00147 
00148   if (n<1L)
00149   {
00150     vcl_cerr<<"vpdfl_kernel_pdf_builder::build() No examples available.\n";
00151     vcl_abort();
00152   }
00153 
00154   if (data.is_class("mbl_data_array_wrapper<T>"))
00155   {
00156     mbl_data_array_wrapper<vnl_vector<double> >& data_array =
00157                    static_cast<mbl_data_array_wrapper<vnl_vector<double> >&>( data);
00158     build_from_array(model,data_array.data(),n);
00159     return;
00160   }
00161 
00162   // Fill array with data
00163   vcl_vector<vnl_vector<double> >x(n);
00164   data.reset();
00165   for (unsigned long i=0;i<n;++i)
00166   {
00167     x[i]=data.current();
00168     data.next();
00169   }
00170 
00171   build_from_array(model,&x[0],n);
00172 }
00173 
00174 void vpdfl_kernel_pdf_builder::weighted_build(vpdfl_pdf_base& model,
00175                                             mbl_data_wrapper<vnl_vector<double> >& data,
00176                                             const vcl_vector<double>& /*wts*/) const
00177 {
00178   vcl_cerr<<"vpdfl_kernel_pdf_builder::weighted_build() Ignoring weights.\n";
00179   build(model,data);
00180 }
00181 
00182 //: Build from n elements in data[i]
00183 void vpdfl_kernel_pdf_builder::build_fixed_width(vpdfl_kernel_pdf& kpdf,
00184                              const vnl_vector<double>* data, int n, double width) const
00185 {
00186   kpdf.set_centres(data,n,width);
00187 }
00188 
00189 //: Build from n elements in data[i].  Chooses width.
00190 //  Same width selected for all points, using
00191 //  $w=(4/(2n+d.n)^{1/(d+4)}\sigma$, as suggested by Silverman
00192 //  Note: This value only suitable for gaussian kernels!
00193 void vpdfl_kernel_pdf_builder::build_select_equal_width(vpdfl_kernel_pdf& kpdf,
00194                               const vnl_vector<double>* data, int n) const
00195 {
00196   vnl_vector<double> m,var;
00197   vpdfl_calc_mean_var(m,var,data,n);
00198 
00199   double mean_var = var.mean();
00200   if (mean_var<min_var_) var=min_var_;
00201 
00202   double d = data[0].size();
00203 
00204   // See Silverman, p88-89 : This is suitable for Gaussian kernels
00205   double k_var = mean_var*vcl_pow(4.0/(n*(d+2)),2.0/(d+4));
00206   double w = vcl_sqrt(k_var);
00207 
00208   build_fixed_width(kpdf,data,n,w);
00209 }
00210 
00211 //: Kernel width proportional to distance to nearby samples.
00212 void vpdfl_kernel_pdf_builder::build_width_from_separation(vpdfl_kernel_pdf& kpdf,
00213                                const vnl_vector<double>* data, int n) const
00214 {
00215   vnl_vector<double> width(n);
00216   double* w=width.data_block();
00217   const double min_diff = 1e-6;
00218 
00219   //const unsigned k = 2;  // Second nearest neighbour
00220   for (int i=0;i<n;++i)
00221   {
00222 #if 0
00223     mbl_priority_bounded_queue<double,vcl_vector<double>,vcl_less<double> > d_sq(k);
00224 #endif
00225 
00226     // Number of repeats of the point
00227     // If resampling used, some points will be present several times
00228     int n_repeats=0;
00229     double min_d2= -1.0;
00230     for (int j=0;j<n;++j)
00231     {
00232       if (j!=i)
00233       {
00234         double d2 = vnl_vector_ssd(data[i],data[j]);
00235 
00236         if (d2<min_diff)
00237           n_repeats++;
00238         else
00239           if (d2<min_d2 || min_d2<0) min_d2=d2;
00240 #if 0
00241         d_sq.push(d2);
00242 #endif
00243       }
00244     }
00245 
00246     // Width set to distance to k-th nearest neighbour
00247 #if 0
00248     w[i] = vcl_sqrt(d_sq.top());
00249 #endif
00250 
00251     //: Width to nearest neighbour, allowing for repeats
00252     if (min_d2<min_var_) min_d2=min_var_;
00253     w[i] = vcl_sqrt(min_d2)/(n_repeats+1);
00254   }
00255 
00256   kpdf.set_centres(data,n,width);
00257 }
00258 
00259 //: Build adaptive kernel estimate.
00260 //  Use equal widths to create a pilot estimate, then use the prob at each
00261 //  data point to modify the widths
00262 void vpdfl_kernel_pdf_builder::build_adaptive(vpdfl_kernel_pdf& kpdf,
00263                                const vnl_vector<double>* data, int n) const
00264 {
00265   // First build the pilot estimate
00266   build_select_equal_width(kpdf,data,n);
00267 
00268   // Evaluate the pdf at each point
00269   vnl_vector<double> log_p(n);
00270   for (int i=0;i<n;++i)
00271   {
00272     log_p[i]=kpdf.log_p(data[i]);
00273   }
00274 
00275   double log_mean = log_p.mean();
00276 
00277   vnl_vector<double> new_width = kpdf.width();
00278 
00279   for (int i=0;i<n;++i)
00280   {
00281     // Scale each inversely by sqrt(prob)
00282     // Check: Should there be a power of d in there?
00283     new_width[i] *= vcl_exp(-0.5*(log_p[i]-log_mean));
00284   }
00285 
00286   kpdf.set_centres(data,n,new_width);
00287 }
00288 
00289 //=======================================================================
00290 // Method: is_a
00291 //=======================================================================
00292 
00293 vcl_string vpdfl_kernel_pdf_builder::is_a() const
00294 {
00295   return vcl_string("vpdfl_kernel_pdf_builder");
00296 }
00297 
00298 //=======================================================================
00299 // Method: is_class
00300 //=======================================================================
00301 
00302 bool vpdfl_kernel_pdf_builder::is_class(vcl_string const& s) const
00303 {
00304   return vpdfl_builder_base::is_class(s) || s==vpdfl_kernel_pdf_builder::is_a();
00305 }
00306 
00307 //=======================================================================
00308 // Method: version_no
00309 //=======================================================================
00310 
00311 short vpdfl_kernel_pdf_builder::version_no() const
00312 {
00313   return 1;
00314 }
00315 
00316 //=======================================================================
00317 // Method: print
00318 //=======================================================================
00319 
00320 void vpdfl_kernel_pdf_builder::print_summary(vcl_ostream& os) const
00321 {
00322   os << "Min. var.: "<< min_var_;
00323 }
00324 
00325 //=======================================================================
00326 // Method: save
00327 //=======================================================================
00328 
00329 void vpdfl_kernel_pdf_builder::b_write(vsl_b_ostream& bfs) const
00330 {
00331   vsl_b_write(bfs,version_no());
00332   vsl_b_write(bfs,min_var_);
00333 }
00334 
00335 //=======================================================================
00336 // Method: load
00337 //=======================================================================
00338 
00339 void vpdfl_kernel_pdf_builder::b_read(vsl_b_istream& bfs)
00340 {
00341   if (!bfs) return;
00342 
00343   short version;
00344   vsl_b_read(bfs,version);
00345   switch (version)
00346   {
00347     case (1):
00348       vsl_b_read(bfs,min_var_);
00349       break;
00350     default:
00351       vcl_cerr << "I/O ERROR: vsl_b_read(vsl_b_istream&, vpdfl_kernel_pdf_builder &)\n"
00352                << "           Unknown version number "<< version << vcl_endl;
00353       bfs.is().clear(vcl_ios::badbit); // Set an unrecoverable IO error on stream
00354       return;
00355   }
00356 }
00357 
00358 //: Read initialisation settings from a stream.
00359 // Parameters:
00360 // \verbatim
00361 // {
00362 //   min_var: 1.0e-6
00363 //   // kernel_widths can be fixed_width,select_equal,width_from_sep,adaptive
00364 //   kernel_widths: fixed_width
00365 //   // Width to be used when it is fixed_width
00366 //   fixed_width: 1.0
00367 // }
00368 // \endverbatim
00369 // \throw mbl_exception_parse_error if the parse fails.
00370 void vpdfl_kernel_pdf_builder::config_from_stream(vcl_istream & is)
00371 {
00372   vcl_string s = mbl_parse_block(is);
00373 
00374   vcl_istringstream ss(s);
00375   mbl_read_props_type props = mbl_read_props_ws(ss);
00376 
00377   double mv=1.0e-6;
00378   if (!props["min_var"].empty())
00379   {
00380     mv=vul_string_atof(props["min_var"]);
00381     props.erase("min_var");
00382   }
00383   set_min_var(mv);
00384 
00385   build_type bt=select_equal;
00386   if (!props["kernel_widths"].empty())
00387   {
00388     if (props["kernel_widths"]=="fixed_width") bt=fixed_width;
00389     else
00390     if (props["kernel_widths"]=="select_equal") bt=select_equal;
00391     else
00392     if (props["kernel_widths"]=="width_from_sep") bt=width_from_sep;
00393     else
00394     if (props["kernel_widths"]=="adaptive") bt=adaptive;
00395     else
00396     {
00397       vcl_string msg="Unknown kernel_width type : "+props["kernel_widths"];
00398       throw mbl_exception_parse_error(msg);
00399     }
00400     props.erase("kernel_widths");
00401   }
00402   build_type_ = bt;
00403 
00404   fixed_width_=1.0;
00405   if (!props["fixed_width"].empty())
00406   {
00407     fixed_width_=vul_string_atof(props["fixed_width"]);
00408     props.erase("fixed_width");
00409   }
00410 
00411   try
00412   {
00413     mbl_read_props_look_for_unused_props(
00414         "vpdfl_kernel_pdf_builder::config_from_stream", props);
00415   }
00416 
00417   catch(mbl_exception_unused_props &e)
00418   {
00419     throw mbl_exception_parse_error(e.what());
00420   }
00421 
00422 }
00423 

Generated on Thu Jan 10 14:43:32 2008 for contrib/mul/vpdfl by  doxygen 1.4.4