[vtkusers] kMeansStatistics: cluster center 0 in run 0 is degenerate. Attempting to perturb
Liam Kurmos
quantum.leaf at gmail.com
Sun Sep 4 06:58:56 EDT 2011
I'm getting lots of warnings like
kMeansStatistics: cluster center 0 in run 0 is degenerate. Attempting to perturb
when i increase the number of cluster above 3 (cluster a number of
~40dimensional points). Essentially this occurs when the population of
a cluster is 0. I'm not sure why this is happening, the data should be
well distributed.
When i increase the number of cluster further. New clusters are
populated (as well as new degenerate centres).
Eg with 4 cluster i only see 3 as one degenerate (at least i get the
warning and 0 population in one cluster)
At first i thought it might mean k is too high for the data causing
some means to converge, but if i increase k to say 10 i might have 6
populated cluster which proves that clustering beyond 3 is possible.
Does this make sense?
can anyone suggest what i might be doing wrong?
Should i be doing some thing with initialization? i realise now that
when i first looked at k-means i was confused between the
implementation in vtkKMeansStatistics and vtkKMeansClustering which i
think are separate implementations of k-means? In which case possibly
vtkKMeansStatistics does not use kmeans++ and i should switch to using
vtkKMeansClustering ?
best regards,
Liam
code for ref:
vector<int*>* ViewPathClusterFinder::getPathClusterMeans(int numberOfClusters) {
vector<int*>* meanPaths = new vector<int*>();
vtkSmartPointer<vtkTable> inputData =
vtkSmartPointer<vtkTable>::New();
for (int c = 0; c < pathLength; ++c) {
std::stringstream colName;
colName << "coord " << c*3;
vtkSmartPointer<vtkDoubleArray> doubleArrayX =
vtkSmartPointer<vtkDoubleArray>::New();
doubleArrayX->SetNumberOfComponents(1);
doubleArrayX->SetName(colName.str().c_str());
doubleArrayX->SetNumberOfTuples(viewPaths->size());
colName << "coord " << c*3+1;
vtkSmartPointer<vtkDoubleArray> doubleArrayY =
vtkSmartPointer<vtkDoubleArray>::New();
doubleArrayY->SetNumberOfComponents(1);
doubleArrayY->SetName(colName.str().c_str());
doubleArrayY->SetNumberOfTuples(viewPaths->size());
colName << "coord " << c*3+2;
vtkSmartPointer<vtkDoubleArray> doubleArrayZ =
vtkSmartPointer<vtkDoubleArray>::New();
doubleArrayZ->SetNumberOfComponents(1);
doubleArrayZ->SetName(colName.str().c_str());
doubleArrayZ->SetNumberOfTuples(viewPaths->size());
for (int r = 0; r < viewPaths->size(); ++r) {
vector<int*>::iterator it;
int* path = viewPaths->at(r); // int* path=viewPaths->
float vx = oc->geoSphere->getView(path[c])->getx();
//cout<<setprecision(16)<<" using x "<<vx<<endl;
float vy = oc->geoSphere->getView(path[c])->gety();
float vz = oc->geoSphere->getView(path[c])->getz();
//cout<<r<<" adding "<<val<<" to data"<<endl;
//cout<<"rt "<<randtest<<endl;
doubleArrayX->SetValue(r, vx);
doubleArrayY->SetValue(r, vy);
doubleArrayZ->SetValue(r, vz);
}
inputData->AddColumn(doubleArrayX);
inputData->AddColumn(doubleArrayY);
inputData->AddColumn(doubleArrayZ);
}
vtkSmartPointer<vtkKMeansStatistics> kMeansStatistics =
vtkSmartPointer<vtkKMeansStatistics>::New();
kMeansStatistics->SetInput(vtkStatisticsAlgorithm::INPUT_DATA, inputData);
for (int c = 0; c < pathLength*3; ++c) {
kMeansStatistics->SetColumnStatus(inputData->GetColumnName(c), 1);
}
//
// kMeansStatistics->SetLearnOption( 1 ); // This is on by default.
// kMeansStatistics->SetMaxNumIterations( 1 );
kMeansStatistics->RequestSelectedColumns();
kMeansStatistics->SetAssessOption(true);
kMeansStatistics->SetDefaultNumberOfClusters(numberOfClusters);
kMeansStatistics->Update();
//kMeansStatistics->GetOutput()->Dump();
float* minDistances=new float[numberOfClusters];
int* closestToMin=new int[numberOfClusters];
//find closest point to mean and add path to vector meanPaths.
pathClusters=new int[viewPaths->size()]();
for (int i = 0; i < numberOfClusters; i++) {
minDistances[i]=100;
closestToMin[i]=0;
int* pathPoints = new int[pathLength*3];
int clusterCount = 0;
for (unsigned int r = 0; r <
kMeansStatistics->GetOutput()->GetNumberOfRows(); r++) {
vtkVariant cluster =
kMeansStatistics->GetOutput()->GetValue(r,
kMeansStatistics->GetOutput()->GetNumberOfColumns() - 1);
vtkVariant distance =
kMeansStatistics->GetOutput()->GetValue(r,
kMeansStatistics->GetOutput()->GetNumberOfColumns() - 2);
pathClusters[r]=cluster.ToInt();
if (cluster.ToInt() == i) {
if(distance.ToFloat()<minDistances[i]){
minDistances[i]=distance.ToFloat();
closestToMin[i]=r;
}
}
}
int* path = viewPaths->at(closestToMin[i]);
meanPaths->push_back(path);
}
// vtkMultiBlockDataSet* outputData =
vtkMultiBlockDataSet::SafeDownCast(kMeansStatistics->GetOutputDataObject(vtkStatisticsAlgorithm::OUTPUT_MODEL));
// vtkTable* outputMeta =
vtkTable::SafeDownCast(outputData->GetBlock(0));
// outputMeta->Dump();
// cout << " num tabs " << outputData->GetNumberOfBlocks() << endl;
return meanPaths;
}
More information about the vtkusers
mailing list