int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size) { eintarray otuarr; int i,j; otuarr.init(size); for (i=0; i<size; ++i) otuarr[i]=i; // point at the seq we merged with for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i) otuarr[mergearr[i].y]=mergearr[i].x; eintarray otuind; otuind.init(otuarr.size(),-1); // translate all the merged seq ids to the last seq id of the otu for (i=0; i<otuarr.size(); ++i){ for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]); otuarr[i]=j; if (otuind[j]==-1){ otuind[j]=otus.size(); otus.add(eintarray()); } } for (i=0; i<otuarr.size(); ++i) otus[otuind[otuarr[i]]].add(i); return(otus.size()); }
void finduniq(ebasicarray<INDTYPE>& uniqind,earray<ebasicarray<INDTYPE> >& dupslist) { ebasicstrhashof<long> duphash; ebasicstrhashof<long>::iter it; if (!ignoreUnique){ duphash.reserve(arr.size()); for (long i=0; i<arr.size(); ++i){ if (i%1000==0) fprintf(stderr,"\r%li/%li",i,(long)arr.size()); it=duphash.get(arr.values(i)); if (it==duphash.end()) { uniqind.add(i); duphash.add(arr.values(i),uniqind.size()-1); dupslist.add(ebasicarray<INDTYPE>(i)); } else dupslist[it.value()].add(i); } fprintf(stderr,"\r%li\n",(long)arr.size()); }else{ uniqind.init(arr.size()); for (long i=0; i<uniqind.size(); ++i) uniqind[i]=i; } cout << endl; }