int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size) { eintarray otuarr; int i,j; otuarr.init(size); for (i=0; i<size; ++i) otuarr[i]=i; // point at the seq we merged with for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i) otuarr[mergearr[i].y]=mergearr[i].x; eintarray otuind; otuind.init(otuarr.size(),-1); // translate all the merged seq ids to the last seq id of the otu for (i=0; i<otuarr.size(); ++i){ for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]); otuarr[i]=j; if (otuind[j]==-1){ otuind[j]=otus.size(); otus.add(eintarray()); } } for (i=0; i<otuarr.size(); ++i) otus[otuind[otuarr[i]]].add(i); return(otus.size()); }
void eseqclustersingle::init(INDTYPE count,const estr& ofilename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist) { ofile.open(ofilename,"w"); ofile.write("# seqsfile: "+seqsfile+"\n"); ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n"); long i,j; mergecount=0; scount.reserve(count); scluster.reserve(count); smerge.reserve(count); incluster.reserve(count); for (i=0; i<count; ++i){ scount.add(1); scluster.add(i); smerge.add(-1); incluster.add(list<INDTYPE>()); incluster[i].push_back(i); } for (i=0; i<dupslist.size(); ++i){ for (j=1; j<dupslist[i].size(); ++j){ ++mergecount; ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n"); clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0)); } } cout << "# initializing cluster with: "<< count<< " seqs" << endl; }
void eseqclusteravg::init(INDTYPE count,const estr& filename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist,float _thres,float (_fdist)(const estr&,const estr&,int),estrarray& _seqarr,int _seqlen) { thres=_thres; seqarr=&_seqarr; seqlen=_seqlen; fdist=_fdist; ofile.open(filename,"w"); ofile.write("# seqsfile: "+seqsfile+"\n"); ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n"); long i,j; incmaxdist=1.0; incmaxit=smatrix.end(); cf=0; lastdist=0.0; scount.reserve(count); scluster.reserve(count); smerge.reserve(count); inter.reserve(count); incluster.reserve(count); mergecount=0; for (i=0; i<count; ++i){ scount.add(1); scluster.add(i); smerge.add(-1); incluster.add(list<INDTYPE>()); incluster[i].push_back(i); inter.add(list<INDTYPE>()); } for (i=0; i<dupslist.size(); ++i){ for (j=1; j<dupslist[i].size(); ++j){ ++mergecount; ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n"); clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0)); } } cout << "# initializing cluster with: "<< count<< " seqs" << endl; cout << "# initializing smatrix with: " << (long)(count)*(long)(count)/20000l/2l<< " elements" << endl; smatrix.reserve((long)(count)*(long)(count)/20000l/2l); // cout << "# smatrix._hashitems = " << smatrix._hashitems << endl; }