예제 #1
0
int eseqclusterData::getOTU(float dist,earray<eintarray>& otus,int size)
{
  eintarray otuarr;
  int i,j;
  otuarr.init(size);
  for (i=0; i<size; ++i)
    otuarr[i]=i;

  // point at the seq we merged with
  for (i=0; i<mergearr.size() && mergearr[i].dist>=dist; ++i)
    otuarr[mergearr[i].y]=mergearr[i].x;

  eintarray otuind;
  otuind.init(otuarr.size(),-1);

  // translate all the merged seq ids to the last seq id of the otu
  for (i=0; i<otuarr.size(); ++i){
    for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]);
    otuarr[i]=j;
    if (otuind[j]==-1){
      otuind[j]=otus.size();
      otus.add(eintarray());
    }
  }
  for (i=0; i<otuarr.size(); ++i)
    otus[otuind[otuarr[i]]].add(i);
  return(otus.size());
}
예제 #2
0
void eseqclustersingle::init(INDTYPE count,const estr& ofilename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist) {
  ofile.open(ofilename,"w");
  ofile.write("# seqsfile: "+seqsfile+"\n");
  ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n");
  long i,j;
  mergecount=0;
  scount.reserve(count);
  scluster.reserve(count);
  smerge.reserve(count);
  incluster.reserve(count);
  for (i=0; i<count; ++i){
    scount.add(1);
    scluster.add(i);
    smerge.add(-1);
    incluster.add(list<INDTYPE>());
    incluster[i].push_back(i);
  }
  for (i=0; i<dupslist.size(); ++i){
    for (j=1; j<dupslist[i].size(); ++j){
      ++mergecount;
      ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n");
      clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0));
    }
  }
  cout << "# initializing cluster with: "<< count<< " seqs" << endl; 
}
예제 #3
0
void enter_file_indicator (char *f)
{
	int last;

	if (!f [0]) return;

	if (Filez.nr) {
		last = Filez.nr - 1;
		if (!strcmp (f, Filez.x [last].file)) return;
		if (Filez.x [last].indx == indx + 1) {
			delete [] Filez.x [last].file;
			Filez.x [last].file = StrDup (f);
			return;
		}
	}

	last = Filez.alloc ();
	Filez.x [last].indx = indx + 1;
	Filez.x [last].file = StrDup (f);
}
예제 #4
0
void eseqclusteravg::init(INDTYPE count,const estr& filename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist,float _thres,float (_fdist)(const estr&,const estr&,int),estrarray& _seqarr,int _seqlen)
{
  thres=_thres;
  seqarr=&_seqarr;
  seqlen=_seqlen;
  fdist=_fdist;
  ofile.open(filename,"w");
  ofile.write("# seqsfile: "+seqsfile+"\n");
  ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n");
  long i,j;
  incmaxdist=1.0;
  incmaxit=smatrix.end();
  cf=0;
  lastdist=0.0;
  scount.reserve(count);
  scluster.reserve(count);
  smerge.reserve(count);
  inter.reserve(count);
  incluster.reserve(count);
  mergecount=0;
  for (i=0; i<count; ++i){
    scount.add(1);
    scluster.add(i);
    smerge.add(-1);
    incluster.add(list<INDTYPE>());
    incluster[i].push_back(i);
    inter.add(list<INDTYPE>());
  }
  for (i=0; i<dupslist.size(); ++i){
    for (j=1; j<dupslist[i].size(); ++j){
      ++mergecount;
      ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n");
      clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0));
    }
  }
  cout << "# initializing cluster with: "<< count<< " seqs" << endl; 
  cout << "# initializing smatrix with: " << (long)(count)*(long)(count)/20000l/2l<< " elements" << endl; 
  smatrix.reserve((long)(count)*(long)(count)/20000l/2l);
//  cout << "# smatrix._hashitems = " << smatrix._hashitems << endl;
}
예제 #5
0
void finduniq(ebasicarray<INDTYPE>& uniqind,earray<ebasicarray<INDTYPE> >& dupslist)
{
  ebasicstrhashof<long> duphash;
  ebasicstrhashof<long>::iter it;
  if (!ignoreUnique){
    duphash.reserve(arr.size());
    for (long i=0; i<arr.size(); ++i){
      if (i%1000==0)
        fprintf(stderr,"\r%li/%li",i,(long)arr.size());
      it=duphash.get(arr.values(i));
      if (it==duphash.end())
        { uniqind.add(i); duphash.add(arr.values(i),uniqind.size()-1); dupslist.add(ebasicarray<INDTYPE>(i)); }
      else 
        dupslist[it.value()].add(i);
    }
    fprintf(stderr,"\r%li\n",(long)arr.size());
  }else{
    uniqind.init(arr.size());
    for (long i=0; i<uniqind.size(); ++i)
      uniqind[i]=i;
  }
  cout << endl;
}
예제 #6
0
void make_norm ()
{
	if (string) _enter_token (enter_string ());

	used_builtins ();
	//
	C_Ntok = tmpCODE.nr ();
	CODE = new int [C_Ntok + 3];
	tmpCODE.copy (&CODE);
	CODE  [C_Ntok] = FORCEERROR;
	CODE  [C_Ntok + 1] = ';';
	CODE  [C_Ntok + 2] = FORCEERROR;
	tmpCODE.destroy ();
	//
	C_Syms = new char* [C_Nsyms = symtree.nnodes - nreserved];
	symtree.deltree (symtoarray);
	Filez.freeze ();
	C_Files = Filez.x;
	C_Nfiles = Filez.nr;
	C_Nlines = Linez.nr ();
	Linez.copy (&C_Lines);
	Linez.destroy ();
	C_Strings = new char* [C_Nstrings = strtree.nnodes];
	strtree.deltree (strtoarray);
	//
	TFloat.copy (&C_Floats);
	TFloat.destroy ();
	TsInt8.copy (&C_Chars);
	TsInt8.destroy ();
	TsInt16.copy (&C_Shortints);
	TsInt16.destroy ();
	TsInt32.copy (&C_Ints);
	TsInt32.destroy ();
	TuInt32.copy (&C_Unsigned);
	TuInt32.destroy ();
}
예제 #7
0
파일: utils2.hpp 프로젝트: qiuhw/tmwa
 friend bool operator == (const earray& l, const earray& r)
 {
     return std::equal(l.begin(), l.end(), r.begin());
 }
예제 #8
0
int eseqclusterData::getTree(const efloatarray& dists,earray<earray<eintarray> >& otus,earray<eintarray>& otusanc,int size)
{
  if (dists.size()==0) return(0);

  eintarray otuarr;
  int i,j,k;
  otuarr.init(size);
  for (i=0; i<size; ++i)
    otuarr[i]=i;

  otus.init(dists.size());
  otusanc.init(dists.size());
  // point at the seq we merged with
  for (k=0; k<mergearr.size() && mergearr[k].dist>=dists[0]; ++k)
    otuarr[mergearr[k].y]=mergearr[k].x;

  eintarray otuind,otuind2;
  otuind.init(otuarr.size(),-1);

  // translate all the merged seq ids to the last seq id of the otu
  for (i=0; i<otuarr.size(); ++i){
    for (j=otuarr[i]; j!=otuarr[j]; j=otuarr[j]);
    otuarr[i]=j;
    if (otuind[j]==-1){
      otuind[j]=otus[0].size();
      otus[0].add(eintarray());
    }
    otus[0][otuind[j]].add(i);
  }

  otusanc[0].init(otus[0].size());
  for (i=0; i<otusanc[0].size(); ++i)
    otusanc[0][i]=i;
  int l;
  eintarray otuarr2;

  for (l=1; l<dists.size(); ++l){
    otusanc[l].init(otus[0].size());

    otuarr2.init(otus[l-1].size());
    for (i=0; i<otuarr2.size(); ++i)
      otuarr2[i]=i;

    for (; k<mergearr.size() && mergearr[k].dist>=dists[l]; ++k)
      otuarr2[otusanc[l-1][otuind[otuarr[mergearr[k].y]]]]=otusanc[l-1][otuind[otuarr[mergearr[k].x]]];

    otuind2.init(otuarr2.size(),-1);
    for (i=0; i<otuarr2.size(); ++i){
      for (j=otuarr2[i]; j!=otuarr2[j]; j=otuarr2[j]);
      otuarr2[i]=j;
      if (otuind2[j]==-1){
        otuind2[j]=otus[l].size();
        otus[l].add(eintarray());
      }
      otus[l][otuind2[j]].add(i);
    }

    for (i=0; i<otus[0].size(); ++i){
      j=otuarr2[otusanc[l-1][i]];
      otusanc[l][i]=otuind2[j];
    }
  }
  return(otus[0].size());
}