void InfoBitRanker::setMaskBits(RDKit::INT_VECT &maskBits) { delete dp_maskBits; dp_maskBits = new ExplicitBitVect(d_dims); for (RDKit::INT_VECT_CI bi = maskBits.begin(); bi != maskBits.end(); ++bi) { dp_maskBits->setBit(*bi); } }
RDKit::VECT_INT_VECT HierarchicalClusterPicker::cluster(const double *distMat, unsigned int poolSize, unsigned int pickSize) const { PRECONDITION(distMat, "Invalid Distance Matrix"); PRECONDITION((poolSize >= pickSize), "pickSize cannot be larger than the poolSize"); // Do the clustering long int method = (long int)d_method; long int len = poolSize*(poolSize-1); long int *ia = (long int *)calloc(poolSize, sizeof(long int)); long int *ib = (long int *)calloc(poolSize, sizeof(long int)); real *crit = (real *)calloc(poolSize,sizeof(real)); CHECK_INVARIANT(ia,"failed to allocate memory"); CHECK_INVARIANT(ib,"failed to allocate memory"); CHECK_INVARIANT(crit,"failed to allocate memory"); long int poolSize2=static_cast<long int>(poolSize); distdriver_(&poolSize2, // number of items in the pool &len, // number of entries in the distance matrix (real *)distMat, // distance matrix &method, // the clustering method (ward, slink etc.) ia, // int vector with clustering history ib, // one more clustering history matrix crit // I believe this is a vector the difference in heights of two clusters ); // we have the clusters now merge then until the number of clusters is same // as the number of picks we need // before we do that a bit of explanation on the vectors "ia" and "ib" // - We with each item in the pool as an individual cluster // - then we use the vectors ia and ib to merge them. // ia and ib provides the ids of the clusters that need to be merged // it is assumed that when a cluster ia[j] is merged with ib[j] // ia[j] is replaced by the new cluster in the cluster list // RDKit::VECT_INT_VECT clusters; for (unsigned int i = 0; i < poolSize; i++) { RDKit::INT_VECT cls; cls.push_back(i); clusters.push_back(cls); } // do the merging, each round of of this loop eleminates one cluster RDKit::INT_VECT removed; for (unsigned int i = 0; i < (poolSize - pickSize); i++) { int cx1 = ia[i] - 1; int cx2 = ib[i] - 1; // add the items from cluster cx2 to cx1 // REVIEW: merge function??? for (RDKit::INT_VECT_CI cx2i = clusters[cx2].begin(); cx2i != clusters[cx2].end(); cx2i++) { clusters[cx1].push_back(*cx2i); } // mark the second cluster as removed removed.push_back(cx2); } free(ia); free(ib); free(crit); // sort removed so that looping will be easier later std::sort(removed.begin(), removed.end()); //some error checking here, uniqueify removed and the vector should not changed // REVIEW can we put this inside a #ifdef DEBUG? RDKit::INT_VECT_CI nEnd = std::unique(removed.begin(), removed.end()); CHECK_INVARIANT(nEnd == removed.end(), "Somehow there are duplicates in the list of removed clusters"); RDKit::VECT_INT_VECT res; unsigned int j = 0; for (unsigned int i = 0; i < poolSize; i++) { if (static_cast<int>(i) == removed[j]) { j++; continue; } res.push_back(clusters[i]); } return res; }