int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size) { eintarray otuarr; int i,j; otuarr.init(size); for (i=0; i<size; ++i) otuarr[i]=i; // point at the seq we merged with for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i) otuarr[mergearr[i].y]=mergearr[i].x; eintarray otuind; otuind.init(otuarr.size(),-1); // translate all the merged seq ids to the last seq id of the otu for (i=0; i<otuarr.size(); ++i){ for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]); otuarr[i]=j; if (otuind[j]==-1){ otuind[j]=otus.size(); otus.add(eintarray()); } } for (i=0; i<otuarr.size(); ++i) otus[otuind[otuarr[i]]].add(i); return(otus.size()); }
void eseqclustersingle::init(INDTYPE count,const estr& ofilename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist) { ofile.open(ofilename,"w"); ofile.write("# seqsfile: "+seqsfile+"\n"); ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n"); long i,j; mergecount=0; scount.reserve(count); scluster.reserve(count); smerge.reserve(count); incluster.reserve(count); for (i=0; i<count; ++i){ scount.add(1); scluster.add(i); smerge.add(-1); incluster.add(list<INDTYPE>()); incluster[i].push_back(i); } for (i=0; i<dupslist.size(); ++i){ for (j=1; j<dupslist[i].size(); ++j){ ++mergecount; ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n"); clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0)); } } cout << "# initializing cluster with: "<< count<< " seqs" << endl; }
void enter_file_indicator (char *f) { int last; if (!f [0]) return; if (Filez.nr) { last = Filez.nr - 1; if (!strcmp (f, Filez.x [last].file)) return; if (Filez.x [last].indx == indx + 1) { delete [] Filez.x [last].file; Filez.x [last].file = StrDup (f); return; } } last = Filez.alloc (); Filez.x [last].indx = indx + 1; Filez.x [last].file = StrDup (f); }
void eseqclusteravg::init(INDTYPE count,const estr& filename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist,float _thres,float (_fdist)(const estr&,const estr&,int),estrarray& _seqarr,int _seqlen) { thres=_thres; seqarr=&_seqarr; seqlen=_seqlen; fdist=_fdist; ofile.open(filename,"w"); ofile.write("# seqsfile: "+seqsfile+"\n"); ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n"); long i,j; incmaxdist=1.0; incmaxit=smatrix.end(); cf=0; lastdist=0.0; scount.reserve(count); scluster.reserve(count); smerge.reserve(count); inter.reserve(count); incluster.reserve(count); mergecount=0; for (i=0; i<count; ++i){ scount.add(1); scluster.add(i); smerge.add(-1); incluster.add(list<INDTYPE>()); incluster[i].push_back(i); inter.add(list<INDTYPE>()); } for (i=0; i<dupslist.size(); ++i){ for (j=1; j<dupslist[i].size(); ++j){ ++mergecount; ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n"); clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0)); } } cout << "# initializing cluster with: "<< count<< " seqs" << endl; cout << "# initializing smatrix with: " << (long)(count)*(long)(count)/20000l/2l<< " elements" << endl; smatrix.reserve((long)(count)*(long)(count)/20000l/2l); // cout << "# smatrix._hashitems = " << smatrix._hashitems << endl; }
void finduniq(ebasicarray<INDTYPE>& uniqind,earray<ebasicarray<INDTYPE> >& dupslist) { ebasicstrhashof<long> duphash; ebasicstrhashof<long>::iter it; if (!ignoreUnique){ duphash.reserve(arr.size()); for (long i=0; i<arr.size(); ++i){ if (i%1000==0) fprintf(stderr,"\r%li/%li",i,(long)arr.size()); it=duphash.get(arr.values(i)); if (it==duphash.end()) { uniqind.add(i); duphash.add(arr.values(i),uniqind.size()-1); dupslist.add(ebasicarray<INDTYPE>(i)); } else dupslist[it.value()].add(i); } fprintf(stderr,"\r%li\n",(long)arr.size()); }else{ uniqind.init(arr.size()); for (long i=0; i<uniqind.size(); ++i) uniqind[i]=i; } cout << endl; }
void make_norm () { if (string) _enter_token (enter_string ()); used_builtins (); // C_Ntok = tmpCODE.nr (); CODE = new int [C_Ntok + 3]; tmpCODE.copy (&CODE); CODE [C_Ntok] = FORCEERROR; CODE [C_Ntok + 1] = ';'; CODE [C_Ntok + 2] = FORCEERROR; tmpCODE.destroy (); // C_Syms = new char* [C_Nsyms = symtree.nnodes - nreserved]; symtree.deltree (symtoarray); Filez.freeze (); C_Files = Filez.x; C_Nfiles = Filez.nr; C_Nlines = Linez.nr (); Linez.copy (&C_Lines); Linez.destroy (); C_Strings = new char* [C_Nstrings = strtree.nnodes]; strtree.deltree (strtoarray); // TFloat.copy (&C_Floats); TFloat.destroy (); TsInt8.copy (&C_Chars); TsInt8.destroy (); TsInt16.copy (&C_Shortints); TsInt16.destroy (); TsInt32.copy (&C_Ints); TsInt32.destroy (); TuInt32.copy (&C_Unsigned); TuInt32.destroy (); }
friend bool operator == (const earray& l, const earray& r) { return std::equal(l.begin(), l.end(), r.begin()); }
int eseqclusterData::getTree(const efloatarray& dists,earray<earray<eintarray> >& otus,earray<eintarray>& otusanc,int size) { if (dists.size()==0) return(0); eintarray otuarr; int i,j,k; otuarr.init(size); for (i=0; i<size; ++i) otuarr[i]=i; otus.init(dists.size()); otusanc.init(dists.size()); // point at the seq we merged with for (k=0; k<mergearr.size() && mergearr[k].dist>=dists[0]; ++k) otuarr[mergearr[k].y]=mergearr[k].x; eintarray otuind,otuind2; otuind.init(otuarr.size(),-1); // translate all the merged seq ids to the last seq id of the otu for (i=0; i<otuarr.size(); ++i){ for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]); otuarr[i]=j; if (otuind[j]==-1){ otuind[j]=otus[0].size(); otus[0].add(eintarray()); } otus[0][otuind[j]].add(i); } otusanc[0].init(otus[0].size()); for (i=0; i<otusanc[0].size(); ++i) otusanc[0][i]=i; int l; eintarray otuarr2; for (l=1; l<dists.size(); ++l){ otusanc[l].init(otus[0].size()); otuarr2.init(otus[l-1].size()); for (i=0; i<otuarr2.size(); ++i) otuarr2[i]=i; for (; k<mergearr.size() && mergearr[k].dist>=dists[l]; ++k) otuarr2[otusanc[l-1][otuind[otuarr[mergearr[k].y]]]]=otusanc[l-1][otuind[otuarr[mergearr[k].x]]]; otuind2.init(otuarr2.size(),-1); for (i=0; i<otuarr2.size(); ++i){ for (j=otuarr2[i]; j!=otuarr2[j]; j=otuarr2[j]); otuarr2[i]=j; if (otuind2[j]==-1){ otuind2[j]=otus[l].size(); otus[l].add(eintarray()); } otus[l][otuind2[j]].add(i); } for (i=0; i<otus[0].size(); ++i){ j=otuarr2[otusanc[l-1][i]]; otusanc[l][i]=otuind2[j]; } } return(otus[0].size()); }