Пример #1
0
 /** \brief named c-tor normalizing given value to [0;1] range.
  *  \param v value to normalize.
  *  \return valid object representing similarity.
  */
 static const Similarity normalize(const double v)
 {
   if(v<0)
     return Similarity(0);
   if(1<v)
     return Similarity(1);
   return Similarity(v);
 }
Пример #2
0
//1 means it is a gap, 0 otherwise
//assume the length of Cors, Type must be of length this->inseqs.size()
float Alignment::pos_score(MtxE* mtx,int* Cors,bool* Type){
	float score_sum=0;
	for(unsigned int i=0; i < this->inseqs.size();i++){
		if(Type[i] == 1){
			score_sum += this->Gap*((int)this->inseqs.size()-i-1);
			 continue;
		}
		for(unsigned int j=i+1;j < this->inseqs.size();j++){
			if(Type[j] == 1)
				score_sum += this->Gap;
			else{		
				float Percentage=Similarity(this->inseqs[i][Cors[i]-1],this->inseqs[j][Cors[j]-1]);
				if(FloatEqual(Percentage,1.0) == true ){
					score_sum+=this->Match;
				}else if(FloatEqual(Percentage,0.9) == true){
					score_sum+=this->Identical;
				}else if(Percentage >= 0.66 ){
					score_sum+=this->Similar;
				}else{
					score_sum+=this->Mismatch;
				}
			}
		}
	}
	return (2.0*score_sum)/(((int)this->inseqs.size())*((int)this->inseqs.size()-1));
}
Пример #3
0
void DaoxDebugger::Matching( QList<DaoVmCode> & x, QList<DaoVmCode> & y )
{
	align1.clear();
	align2.clear();
	Similarity( x, y );
	Matching( x.size(), y.size() );
	int i, j, k, m, n = align1.size();
#if 0
	//printf( "x: %i; y: %i\n", x.size(), y.size() );
	for(i=0; i<x.size(); i++) DaoVmCode_Print( x[i], NULL );
	for(i=0; i<y.size(); i++) DaoVmCode_Print( y[i], NULL );
#endif
	for(i=0; i<n; i++){
		DaoCnode xnode, ynode;
		int a1 = align1[i];
		int a2 = align2[i];
#if 0
		//printf( "%3i:  %3i  %3i\n", i, a1, a2 );
		if( a1 >=0 ) DaoVmCode_Print( x[a1], NULL );
		if( a2 >=0 ) DaoVmCode_Print( y[a2], NULL );
#endif
		if( a1 < 0 || a2 < 0 ) continue;

		memset( & xnode, 0, sizeof(DaoCnode) );
		memset( & ynode, 0, sizeof(DaoCnode) );
		DaoCnode_InitOperands( & xnode, & x[a1] );
		DaoCnode_InitOperands( & ynode, & y[a2] );

		switch( xnode.type ){
		case DAO_OP_NONE : 
			break;
		case DAO_OP_SINGLE :
			if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first;
			break;
		case DAO_OP_PAIR :
			if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first;
			if( regmap.find( xnode.second ) == regmap.end() ) regmap[ xnode.second ] = ynode.second;
			break;
		case DAO_OP_TRIPLE :
			if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first;
			if( regmap.find( xnode.second ) == regmap.end() ) regmap[ xnode.second ] = ynode.second;
			if( regmap.find( xnode.third ) == regmap.end() ) regmap[ xnode.third ] = ynode.third;
			break;
		case DAO_OP_RANGE :
		case DAO_OP_RANGE2 :
			for(i=xnode.first; i<=xnode.second; i++){
				if( regmap.find( i ) == regmap.end() ) regmap[i] = ynode.first + i - xnode.first;
			}
			if( xnode.type == DAO_OP_RANGE2 ){
				if( regmap.find( xnode.third ) == regmap.end() ) regmap[ xnode.third ] = ynode.third;
			}
			break;
		}
	}
}
Пример #4
0
float MAlignment::align(){
	profile.push_back(seqs[0]);
	float FinalScore;
	int total_length=seqs[0].size();
	
	for(unsigned int i=1;i < seqs.size();i++){
		total_length+=seqs[i].size();
		/*******Calculate the TABLE********/		
		int ydim=profile[0].size()+1;
		int xdim=seqs[i].size()+1;
		float score[xdim*ydim];
		int trace[xdim*ydim]; // to record traces for finding the aligned sequences 1 for gap in x,2 for match,3 for gap in y
		for(int l=0; l < ydim;l++){
			score[l*xdim]=(float)l*gap;
			trace[l*xdim]=3;
		}
		for(int l=0; l < xdim;l++){
			score[l]=(float)l*gap;
			trace[l]=1;
		}

		for(int j=1; j <xdim;j++){
			for(int k=1; k <ydim;k++){
				float  temp=0.0;
				score[k*xdim+j]=score[(k-1)*xdim+j]+gap;//initialize min with adding gap in y direction
				trace[k*xdim+j]=3;
				for(unsigned int m=0;m < profile.size();m++){
					float Percentage=Similarity(profile[m][k-1],seqs[i][j-1]);
					if(FloatEqual(Percentage,1.0) == true){
						temp=temp+score[(k-1)*xdim+j-1]+identical;
						//cout<<"Identical "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl;
					}else if(FloatEqual(Percentage,0.9) == true){						
						temp=temp+score[(k-1)*xdim+j-1]+verysimilar;
						//cout<<"Almost "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl;
					}else if(Percentage >= 0.66 ){
						//cout<<"Temp now is "<<temp<<endl;
						temp=temp+score[(k-1)*xdim+j-1]+similar;
						//cout<<"Similar "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl;
					}else if(Percentage < 0.0 ){
						temp=temp+score[(k-1)*xdim+j-1]+gap;
						//cout<<"MatchToGap "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl;
					}else{						
						temp=temp+score[(k-1)*xdim+j-1]+mismatch;
						//cout<<"Mismatch "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl;
					}
				}
				temp=temp/float(profile.size());
				//cout<<"Finally the score is "<<temp<<endl;
				if(temp < score[k*xdim+j]){
					//cout<<temp << " is smaller than "<< score[k*xdim+j] <<endl;
					score[k*xdim+j]=temp;
					trace[k*xdim+j]=2;
				}

				temp=score[k*xdim+j-1]+gap; //adding gap in x direction
				if(temp < score[k*xdim+j]){
					//cout<<temp << " is smaller than "<< score[k*xdim+j] <<endl;
					score[k*xdim+j]=temp;
					trace[k*xdim+j]=1;
				}
			}
		}
		//cout<<"Finish Scoring?"<<endl;
		
		/**************  Output score matrix*************
		for(int k=ydim-1; k >=0;k--){
			if( k >= 1){
				cout<<profile[0][k-1];
			}else{
				cout<<"-";
			}
			for(int j=0; j <xdim;j++){
				cout<<"\t"<<score[k*xdim+j];
			}
			cout<<endl;
		}
		cout<<"\t"<<"-";
		for(unsigned int j=0; j < seqs[i].size();j++){
			cout<<"\t"<<seqs[i][j];
		}
		cout<<endl;
		cout<<"Final Score :"<<score[xdim*ydim-1]<<endl;
		*************************************************/
		
		/**************  Out put matrix
		for(int l=ydim-1; l >=0;l--){			
			for(int m=0; m <xdim;m++){
				cout<<"\t"<<trace[l*xdim+m];
			}
			cout<<endl;
		}
		*/
		
		/**Trace back find the alignment*/
		int xInd=xdim-1;
		int yInd=ydim-1;
		while(xInd > 0 || yInd > 0 ){
			//cout <<"Checking "<<xInd<<","<<yInd<<endl;
			if(xInd-1 >=0 && trace[yInd*xdim+xInd] == 1){
				for(unsigned int m=0;m < profile.size();m++){
					if(alignment.size() > m){
						alignment[m].insert(alignment[m].begin(),"-");
					}else{
						vector<string> temp;
						temp.push_back("-");
						alignment.push_back(temp);
					}
				}
					
				if(alignment.size() > profile.size() ){
					alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),seqs[i][xInd-1]);
				}else{
					vector<string> temp;
					temp.push_back(seqs[i][xInd-1]);
					alignment.push_back(temp);
				}
				xInd=xInd-1;
				continue;
			}
			
			if(yInd-1 >=0 && trace[yInd*xdim+xInd] == 3){
				for(unsigned int m=0;m < profile.size();m++){
					if(alignment.size() > m){
						alignment[m].insert(alignment[m].begin(),profile[m][yInd-1]);
					}else{
						vector<string> temp;
						temp.push_back(profile[m][yInd-1]);
						alignment.push_back(temp);
					}
				}
					
				if(alignment.size() > profile.size()){
					alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),"-");
				}else{
					vector<string> temp;
					temp.push_back("-");
					alignment.push_back(temp);
				}			
				yInd=yInd-1;
				continue;
			}
			if(trace[yInd*xdim+xInd] == 2){
				for(unsigned int m=0;m < profile.size();m++){
					if(alignment.size() > m){
						alignment[m].insert(alignment[m].begin(),profile[m][yInd-1]);
					}else{
						vector<string> temp;
						temp.push_back(profile[m][yInd-1]);
						alignment.push_back(temp);
					}
				}
					
				if(alignment.size() > profile.size()){
					alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),seqs[i][xInd-1]);
				}else{
					vector<string> temp;
					temp.push_back(seqs[i][xInd-1]);
					alignment.push_back(temp);
				}				
				xInd=xInd-1;
				yInd=yInd-1;
			}
		}
		
		 //~ while (i > 0)
  //~ {
    //~ AlignmentA <- Ai + AlignmentA
    //~ AlignmentB <- "-" + AlignmentB
    //~ i <- i - 1
  //~ }
  //~ while (j > 0)
  //~ {
    //~ AlignmentA <- "-" + AlignmentA
    //~ AlignmentB <- Bj + AlignmentB
    //~ j <- j - 1
  //~ }

		UpdateProfile();
		FinalScore=score[xdim*ydim-1];
	}
	
	return (FinalScore)/(int)(profile[0].size())+2;
	
}
Пример #5
0
/**
 * \brief Main function.
 */
int main(int argc, char **argv)
{
  char            *s,*t;
  int             size,sizes,sizet;
  int             i,j,k,P;
  int             cond;
  int             *simi,res,Paux;
  int             *a,*b;
  FILE            *f,*f2;
  fpos_t          filepos;
  int             my_rank,set;
  struct timeval  ini, fi;
  struct timezone tz;


  bsp_begin(atoi(argv[1]));

  size = atoi(argv[1]);

  f=fopen(argv[2],"r");
  if (f==NULL) Exit("Error: File %s not found\n",argv[2]);
  fscanf(f,"%d",&sizes);

  if (sizes%size != 0)
    Exit("Error: The sequences have to have multiple of "
         "processes quantity size");

  f2=fopen(argv[3],"r");

  if (f2==NULL) Exit("Error: File %s not found\n",argv[3]);

  fscanf(f2,"%d",&sizet);

  if (bsp_pid() == 0)
    if (sizet%size != 0)
      Exit("Error: The sequences have to have multiple of "
         "processes quantity size");

  P = atoi(argv[4]);

  if (bsp_pid() == 0)
    printf("align %d %s %s %d\n",size,argv[2],argv[3],P);

  sizes /= size;
  sizet /= size;

  s = (char*) malloc (sizes*sizeof(char));
  t = (char*) malloc (sizet*sizeof(char));

  if (s == NULL || t == NULL)
    Exit("No memory\n");


  a = (int*)malloc ((sizet+1)*sizeof(int));
  b = (int*)malloc ((sizes+1)*sizeof(int));


  if (a == NULL || b == NULL)
    Exit("No memory\n");


  if (bsp_pid() == size-1)
  {
    simi = (int*) malloc(P*sizeof(int));
    if (simi == NULL) Exit("No memory\n");
  }

  Paux = 0;

  bsp_push_reg(s,sizes*sizeof(char));
  bsp_push_reg(b,(sizes+1)*sizeof(int));
  bsp_push_reg(&filepos,sizeof(long int));
  bsp_push_reg(&i,sizeof(int));

  bsp_sync();

  gettimeofday(&ini,&tz);
  
  for (k = 0; k < P*size + size -1; k++)
  {
    if (k >= bsp_pid() && k <= P*size+bsp_pid()-1)
      cond = 1;
    else
      cond = 0;

    set = 0;
    if (cond==1 && (k-bsp_pid())%size == 0)/*start of a reading*/
    {
      if (bsp_pid() == 0 && k < size);
      else if (bsp_pid() == 0)
      {
	bsp_get(size-1,&filepos,0,&filepos,sizeof(long int));
      }
      else
      {
	bsp_get(bsp_pid()-1,&filepos,0,&filepos,sizeof(long int));
      }
      set = 1;
    }

    bsp_sync();

    if (cond==1 && (k-bsp_pid())%size == 0)/*start of a reading*/
    {
      if (set == 1) fsetpos(f2,&filepos);
      for (i = 0; i < sizet; i++)
      {
	fscanf(f2,"%c",&t[i]);
	if (t[i] == 'A' ||t[i] == 'T' ||t[i] == 'C' ||t[i] == 'G');
	else
	{
	  if (t[i] == EOF) Exit("Error: End of file reached without"
			   "read all sequence in %s\n",argv[3]);
	  i--;
	}
      }
      fgetpos(f2,&filepos);
      for (i = 0; i <= sizet; i++)
	a[i] = (i+bsp_pid()*sizet)*gap;

    }

    if (cond==1)
    {
      if (bsp_pid() == 0)
      {
	for (i = 0; i < sizes; i++)
	{
	  fscanf(f,"%c",&s[i]);
	  if (s[i] == 'A' ||s[i] == 'T' ||s[i] == 'C' ||s[i] == 'G');
	  else
	  {
	  if (s[i] == EOF) Exit("Error: End of file reached without"
			   "read all sequence in %s\n",argv[2]);
	    i--;
	  }
	}
	for (j = 0; j <= sizes; j++)
	  b[j] = (j + (k%size)*sizes)*gap;
      }

      res = Similarity (s, sizes, t, sizet, a, b);

      if (bsp_pid() == size-1 && (k-bsp_pid()+1)%size == 0)
      {
	simi[Paux++] = res;
      }
    }
    if (cond)
      {
	if (bsp_pid() != size -1)
	{
	  bsp_put(bsp_pid()+1,s,s,0,sizes*sizeof(char));
	  bsp_put(bsp_pid()+1,b,b,0,(sizes+1)*sizeof(int));
	}
      }
    bsp_sync();
  }

  gettimeofday(&fi,&tz);

  printf("process %d ended\n",bsp_pid());

  fclose(f);
  fclose(f2);

  if (bsp_pid() == size-1)
  {
    printf("Similarities: ");
    for (i = 0; i < P; i++)
      printf("%d ",simi[i]);
    printf("\n");
  }
  if (bsp_pid() == 0)
  {
    printf("Computation time: %f\n", (fi.tv_sec - ini.tv_sec + (double)(fi.tv_usec -
ini.tv_usec)/1000000)/60);
  }

  bsp_pop_reg(&filepos);
  bsp_pop_reg(b);
  bsp_pop_reg(s);
  bsp_sync();

  return 0;
}