void C_NNcluster2d::mergeClusters() { C_cluster2d_elements::iterator it,jt; int N0 = cluster.size(); int NM=0; for ( it=cluster.begin() ; it != cluster.end(); it++ ) { if (it != cluster.begin()) { jt = it; jt--; int i = (*it).first; int j = (*jt).first; bool morethan1 = cluster[j].N>1; bool thin = cluster[j].std[0]<dx[0]; bool close = (cluster[i].low[0]-cluster[j].high[0])<2*dx[0]; close = close & (fabs(cluster[i].mean[1]-cluster[j].mean[1])<dx[0]); if (thin&close&morethan1) { // int ix=cluster[i].inp[0]; why inp? // int jx=cluster[j].inp[0]; // connect(ix,jx); connect(i,j); NM++; cerr << "merge " << NM << endl; } } } cluster.clear(); makeClusters(); NC = cluster.size(); printf(" mergeCluster merged %d of %d clusters leaving NC %d\n",NM,N0,NC); }
// full constructor C_NNcluster2d::C_NNcluster2d(const vector<vector<double> > & x1,const vector<vector<double> > & wx1 , const vector<int> & ip1 , const double fx1[2] , const string & tn1, const string & sn1, const string & cn1) { typeName = tn1; setName = sn1; contigName = cn1; Nmin=1; Smin[0]=-1; Smin[1]=-1; NC=0; cluster.clear(); fx[0]=fx1[0]; // neighborhood scale fx[1]=fx1[1]; x=x1; wx=wx1; ip=ip1; N=x.size(); dx[0]=0; dx[1]=0; for (int i=0; i<N; i++) { wx[i][0]*=fx[0]; wx[i][1]*=fx[1]; if ( wx[i][0]>dx[0] ) { dx[0]=wx[i][0]; } if ( wx[i][1]>dx[1] ) { dx[1]=wx[i][1]; } } init(); makeConnections(); makeClusters(); }
//------------------------------------------------------------------------------ // merge near clusters //------------------------------------------------------------------------------ void C_NNcluster1d::mergeClusters() { C_cluster1d_elements::iterator it,jt; int N0 = cluster.size(); int NM=0; for ( it=cluster.begin() ; it != cluster.end(); it++ ) { if (it != cluster.begin()) { jt = it; jt--; int i = (*it).first; int j = (*jt).first; bool morethan1 = cluster[j].N>1; bool thin = cluster[j].std<dx; bool close = (cluster[i].low-cluster[j].high)<(dx); close = close & (fabs(cluster[i].mean-cluster[j].mean)<(2*dx)); if (thin&close&morethan1) { int ix=cluster[i].inp[0]; int jx=cluster[j].inp[0]; connect(ix,jx); NM++; } } } cluster.clear(); makeClusters(); NC = cluster.size(); printf(" mergeCluster merged %d of %d clusters leaving NC %d\n",NM,N0,NC); }
// constructor w/o label C_NNcluster1d::C_NNcluster1d(const vector<double> & x1, const double DX) { NC=0; cluster.clear(); dx=DX; // neighborhood scale x=x1; N=x.size(); init(); Nmin=1; Smin=0; makeConnections(); makeClusters(); }
// full constructor C_NNcluster1d::C_NNcluster1d(const vector<double> & x1, const double DX , const string & tn1, const string & sn1, const string & cn1) { typeName = tn1; setName = sn1; contigName = cn1; NC=0; cluster.clear(); dx=DX; // neighborhood scale x=x1; N=x.size(); init(); Nmin=1; Smin=0; makeConnections(); makeClusters(); }
void SeqSwapper::swapSequences() { int numNormal = m_cd->GetNumRows(); LOG_POST("Clustering"); vector< vector<int> * > clusters; makeClusters(m_clusteringThreshold, clusters); vector< pair<int, int> > replacementPairs; set<int> structures; LOG_POST("Clustering is done (made " << clusters.size() << " clusters)"); LOG_POST("Find replacements by BLAST in each cluster"); for (int i = 0; i < clusters.size(); i++) { vector<int>* cluster = clusters[i]; if (cluster) { findReplacements(*cluster, replacementPairs, structures); delete cluster; } } LOG_POST("Done with BLAST in each cluster"); set<int> usedPendings; vector<int> selectedNormalRows; int newMaster = -1; for (int p = 0; p < replacementPairs.size(); p++) { //debug CRef< CSeq_id > seqId; m_ac.GetSeqIDForRow(replacementPairs[p].first, seqId); string nid = seqId->AsFastaString(); m_ac.GetSeqIDForRow(replacementPairs[p].second, seqId); string pid = seqId->AsFastaString(); LOG_POST("replacing "<<nid<<" with "<<pid); //take care of master replacement if (replacementPairs[p].first == 0) newMaster = replacementPairs[p].second - numNormal; else { selectedNormalRows.push_back(replacementPairs[p].first); usedPendings.insert(replacementPairs[p].second - numNormal); } } m_cd->EraseTheseRows(selectedNormalRows); if (structures.size() > 0) { LOG_POST("Adding "<<structures.size()<<" structures"); for (set<int>::iterator sit = structures.begin(); sit != structures.end(); sit++) usedPendings.insert(*sit - numNormal); } if (newMaster >= 0) promotePendingRows(usedPendings, &newMaster); else promotePendingRows(usedPendings); //findStructuralPendings(structures); if (newMaster > 0) { ReMasterCdWithoutUnifiedBlocks(m_cd, newMaster, true); vector<int> rows; rows.push_back(newMaster); m_cd->EraseTheseRows(rows); } m_cd->ResetPending(); m_cd->EraseSequences(); }