Пример #1
0
int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size)
{
  eintarray otuarr;
  int i,j;
  otuarr.init(size);
  for (i=0; i<size; ++i)
    otuarr[i]=i;

  // point at the seq we merged with
  for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i)
    otuarr[mergearr[i].y]=mergearr[i].x;

  eintarray otuind;
  otuind.init(otuarr.size(),-1);

  // translate all the merged seq ids to the last seq id of the otu
  for (i=0; i<otuarr.size(); ++i){
    for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]);
    otuarr[i]=j;
    if (otuind[j]==-1){
      otuind[j]=otus.size();
      otus.add(eintarray());
    }
  }
  for (i=0; i<otuarr.size(); ++i)
    otus[otuind[otuarr[i]]].add(i);
  return(otus.size());
}
Пример #2
0
void finduniq(ebasicarray<INDTYPE>& uniqind,earray<ebasicarray<INDTYPE> >& dupslist)
{
  ebasicstrhashof<long> duphash;
  ebasicstrhashof<long>::iter it;
  if (!ignoreUnique){
    duphash.reserve(arr.size());
    for (long i=0; i<arr.size(); ++i){
      if (i%1000==0)
        fprintf(stderr,"\r%li/%li",i,(long)arr.size());
      it=duphash.get(arr.values(i));
      if (it==duphash.end())
        { uniqind.add(i); duphash.add(arr.values(i),uniqind.size()-1); dupslist.add(ebasicarray<INDTYPE>(i)); }
      else 
        dupslist[it.value()].add(i);
    }
    fprintf(stderr,"\r%li\n",(long)arr.size());
  }else{
    uniqind.init(arr.size());
    for (long i=0; i<uniqind.size(); ++i)
      uniqind[i]=i;
  }
  cout << endl;
}