Exemple #1
0
int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size)
{
  eintarray otuarr;
  int i,j;
  otuarr.init(size);
  for (i=0; i<size; ++i)
    otuarr[i]=i;

  // point at the seq we merged with
  for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i)
    otuarr[mergearr[i].y]=mergearr[i].x;

  eintarray otuind;
  otuind.init(otuarr.size(),-1);

  // translate all the merged seq ids to the last seq id of the otu
  for (i=0; i<otuarr.size(); ++i){
    for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]);
    otuarr[i]=j;
    if (otuind[j]==-1){
      otuind[j]=otus.size();
      otus.add(eintarray());
    }
  }
  for (i=0; i<otuarr.size(); ++i)
    otus[otuind[otuarr[i]]].add(i);
  return(otus.size());
}
void eseqclustersingle::init(INDTYPE count,const estr& ofilename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist) {
  ofile.open(ofilename,"w");
  ofile.write("# seqsfile: "+seqsfile+"\n");
  ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n");
  long i,j;
  mergecount=0;
  scount.reserve(count);
  scluster.reserve(count);
  smerge.reserve(count);
  incluster.reserve(count);
  for (i=0; i<count; ++i){
    scount.add(1);
    scluster.add(i);
    smerge.add(-1);
    incluster.add(list<INDTYPE>());
    incluster[i].push_back(i);
  }
  for (i=0; i<dupslist.size(); ++i){
    for (j=1; j<dupslist[i].size(); ++j){
      ++mergecount;
      ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n");
      clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0));
    }
  }
  cout << "# initializing cluster with: "<< count<< " seqs" << endl; 
}
void eseqclusteravg::init(INDTYPE count,const estr& filename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist,float _thres,float (_fdist)(const estr&,const estr&,int),estrarray& _seqarr,int _seqlen)
{
  thres=_thres;
  seqarr=&_seqarr;
  seqlen=_seqlen;
  fdist=_fdist;
  ofile.open(filename,"w");
  ofile.write("# seqsfile: "+seqsfile+"\n");
  ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n");
  long i,j;
  incmaxdist=1.0;
  incmaxit=smatrix.end();
  cf=0;
  lastdist=0.0;
  scount.reserve(count);
  scluster.reserve(count);
  smerge.reserve(count);
  inter.reserve(count);
  incluster.reserve(count);
  mergecount=0;
  for (i=0; i<count; ++i){
    scount.add(1);
    scluster.add(i);
    smerge.add(-1);
    incluster.add(list<INDTYPE>());
    incluster[i].push_back(i);
    inter.add(list<INDTYPE>());
  }
  for (i=0; i<dupslist.size(); ++i){
    for (j=1; j<dupslist[i].size(); ++j){
      ++mergecount;
      ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n");
      clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0));
    }
  }
  cout << "# initializing cluster with: "<< count<< " seqs" << endl; 
  cout << "# initializing smatrix with: " << (long)(count)*(long)(count)/20000l/2l<< " elements" << endl; 
  smatrix.reserve((long)(count)*(long)(count)/20000l/2l);
//  cout << "# smatrix._hashitems = " << smatrix._hashitems << endl;
}