void HashBase::Reserve(int n) { hash.Reserve(n); link.Reserve(n); if((int)HashBound(n) > mcount) Reindex(n); }
HashBase::HashBase(const HashBase& b, int) : hash(b.hash, 0) { unlinked = -1; mcount = 0; map = NULL; Reindex(); }
void HashBase::Serialize(Stream& s) { int version = 0; s / version; if(s.IsLoading()) ClearIndex(); hash.Serialize(s); Reindex(); }
// Sometimes deleting a cluster will improve the score, when you take into accout // the BIC. This function sees if this is the case. It will not delete more than // one cluster at a time. void KK::ConsiderDeletion() { int c, p, CandidateClass; float Loss, DeltaPen; Array<float> DeletionLoss(MaxPossibleClusters); // the increase in log P by deleting the cluster for(c=1; c<MaxPossibleClusters; c++) { if (ClassAlive[c]) DeletionLoss[c] = 0; else DeletionLoss[c] = HugeScore; // don't delete classes that are already there } // compute losses by deleting clusters for(p=0; p<nPoints; p++) { DeletionLoss[Class[p]] += LogP[p*MaxPossibleClusters + Class2[p]] - LogP[p*MaxPossibleClusters + Class[p]]; } // find class with least to lose Loss = HugeScore; for(c=1; c<MaxPossibleClusters; c++) { if (DeletionLoss[c]<Loss) { Loss = DeletionLoss[c]; CandidateClass = c; } } // what is the change in penalty? DeltaPen = Penalty(nClustersAlive) - Penalty(nClustersAlive-1); //Output("cand Class %d would lose %f gain is %f\n", CandidateClass, Loss, DeltaPen); // is it worth it? if (Loss<DeltaPen) { Output("Deleting Class %d. Lose %f but Gain %f\n", CandidateClass, Loss, DeltaPen); // set it to dead ClassAlive[CandidateClass] = 0; // re-allocate all of its points for(p=0;p<nPoints; p++) if(Class[p]==CandidateClass) Class[p] = Class2[p]; } Reindex(); }
// CEM(StartFile) - Does a whole CEM algorithm from a random start // optional start file loads this cluster file to start iteration // if Recurse is 0, it will not try and split. // if InitRand is 0, use cluster assignments already in structure float KK::CEM(const mxArray *InputClass/*= NULL*/, int Recurse /*=1*/, int InitRand /*=1*/) { int p, c; int nChanged; int Iter; Array<int> OldClass(nPoints); float Score = HugeScore, OldScore; int LastStepFull; // stores whether the last step was a full one int DidSplit; if (InputClass!= NULL) LoadClu(InputClass); else if (InitRand) { // initialize data to random if (nStartingClusters>1) for(p=0; p<nPoints; p++) Class[p] = irand(1, nStartingClusters-1); else for(p=0; p<nPoints; p++) Class[p] = 0; for(c=0; c<MaxPossibleClusters; c++) ClassAlive[c] = (c<nStartingClusters); } // set all clases to alive Reindex(); // main loop Iter = 0; FullStep = 1; do { // Store old classifications for(p=0; p<nPoints; p++) OldClass[p] = Class[p]; // M-step - calculate class weights, means, and covariance matrices for each class MStep(); // E-step - calculate scores for each point to belong to each class EStep(); // dump distances if required //if (DistDump) MatPrint(Distfp, LogP.m_Data, DistDump, MaxPossibleClusters); // C-step - choose best class for each CStep(); // Would deleting any classes improve things? if(Recurse) ConsiderDeletion(); // Calculate number changed nChanged = 0; for(p=0; p<nPoints; p++) nChanged += (OldClass[p] != Class[p]); // Calculate score OldScore = Score; Score = ComputeScore(); if(Verbose>=1) { if(Recurse==0) Output("\t"); Output("Iteration %d%c: %d clusters Score %.7g nChanged %d\n", Iter, FullStep ? 'F' : 'Q', nClustersAlive, Score, nChanged); } Iter++; /* if (Debug) { for(p=0;p<nPoints;p++) BestClass[p] = Class[p]; SaveOutput(BestClass); Output("Press return"); getchar(); }*/ // Next step a full step? LastStepFull = FullStep; FullStep = ( nChanged>ChangedThresh*nPoints || nChanged == 0 || Iter%FullStepEvery==0 // || Score > OldScore Doesn't help! // Score decreases are not because of quick steps! ) ; if (Iter>MaxIter) { Output("Maximum iterations exceeded\n"); break; } // try splitting if ((Recurse && SplitEvery>0) && (Iter%SplitEvery==SplitEvery-1 || (nChanged==0 && LastStepFull))) { DidSplit = TrySplits(); } else DidSplit = 0; } while (nChanged > 0 || !LastStepFull || DidSplit); //if (DistDump) fprintf(Distfp, "\n"); return Score; }
// M-step: Calculate mean, cov, and weight for each living class // also deletes any classes with less points than nDim void KK::MStep() { int p, c, cc, i, j; Array<int> nClassMembers(MaxPossibleClusters); Array<float> Vec2Mean(nDims); // clear arrays for(c=0; c<MaxPossibleClusters; c++) { nClassMembers[c] = 0; for(i=0; i<nDims; i++) Mean[c*nDims + i] = 0; for(i=0; i<nDims; i++) for(j=i; j<nDims; j++) { Cov[c*nDims2 + i*nDims + j] = 0; } } // Accumulate total number of points in each class for (p=0; p<nPoints; p++) nClassMembers[Class[p]]++; // check for any dead classes for (cc=0; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; if (c>0 && nClassMembers[c]<=nDims) { ClassAlive[c]=0; Output("Deleted class %d: not enough members\n", c); } } Reindex(); // Normalize by total number of points to give class weight // Also check for dead classes for (cc=0; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; // add "noise point" to make sure Weight for noise cluster never gets to zero if(c==0) { Weight[c] = ((float)nClassMembers[c]+NoisePoint) / (nPoints+NoisePoint); } else { Weight[c] = ((float)nClassMembers[c]) / (nPoints+NoisePoint); } } Reindex(); // Accumulate sums for mean caculation for (p=0; p<nPoints; p++) { c = Class[p]; for(i=0; i<nDims; i++) { Mean[c*nDims + i] += Data[p*nDims + i]; } } // and normalize for (cc=0; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; for (i=0; i<nDims; i++) Mean[c*nDims + i] /= nClassMembers[c]; } // Accumulate sums for covariance calculation for (p=0; p<nPoints; p++) { c = Class[p]; // calculate distance from mean for(i=0; i<nDims; i++) Vec2Mean[i] = Data[p*nDims + i] - Mean[c*nDims + i]; for(i=0; i<nDims; i++) for(j=i; j<nDims; j++) { Cov[c*nDims2 + i*nDims + j] += Vec2Mean[i] * Vec2Mean[j]; } } // and normalize for (cc=0; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; for(i=0; i<nDims; i++) for(j=i; j<nDims; j++) { Cov[c*nDims2 + i*nDims + j] /= (nClassMembers[c]-1); } } // That's it! // Diagnostics /* if (Debug) { for (cc=0; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; Output("Class %d - Weight %.2g\n", c, Weight[c]); Output("Mean: "); MatPrint(stdout, Mean.m_Data + c*nDims, 1, nDims); Output("\nCov:\n"); MatPrint(stdout, Cov.m_Data + c*nDims2, nDims, nDims); Output("\n"); } } */ }
void HashBase::operator<<=(const HashBase& b) { ClearIndex(); hash <<= b.hash; Reindex(); }
void HashBase::Insert(int i, unsigned _hash) { hash.Insert(i, _hash & ~UNSIGNED_HIBIT); ClearIndex(); Reindex(); }
void HashBase::Remove(const int *sorted_list, int count) { hash.Remove(sorted_list, count); ClearIndex(); Reindex(); }
void HashBase::Remove(int i, int count) { hash.Remove(i, count); ClearIndex(); Reindex(); }
void HashBase::Remove(int i) { hash.Remove(i); ClearIndex(); Reindex(); }
void HashBase::Reindex() { Reindex(hash.GetCount()); }