/** \brief named c-tor normalizing given value to [0;1] range. * \param v value to normalize. * \return valid object representing similarity. */ static const Similarity normalize(const double v) { if(v<0) return Similarity(0); if(1<v) return Similarity(1); return Similarity(v); }
//1 means it is a gap, 0 otherwise //assume the length of Cors, Type must be of length this->inseqs.size() float Alignment::pos_score(MtxE* mtx,int* Cors,bool* Type){ float score_sum=0; for(unsigned int i=0; i < this->inseqs.size();i++){ if(Type[i] == 1){ score_sum += this->Gap*((int)this->inseqs.size()-i-1); continue; } for(unsigned int j=i+1;j < this->inseqs.size();j++){ if(Type[j] == 1) score_sum += this->Gap; else{ float Percentage=Similarity(this->inseqs[i][Cors[i]-1],this->inseqs[j][Cors[j]-1]); if(FloatEqual(Percentage,1.0) == true ){ score_sum+=this->Match; }else if(FloatEqual(Percentage,0.9) == true){ score_sum+=this->Identical; }else if(Percentage >= 0.66 ){ score_sum+=this->Similar; }else{ score_sum+=this->Mismatch; } } } } return (2.0*score_sum)/(((int)this->inseqs.size())*((int)this->inseqs.size()-1)); }
void DaoxDebugger::Matching( QList<DaoVmCode> & x, QList<DaoVmCode> & y ) { align1.clear(); align2.clear(); Similarity( x, y ); Matching( x.size(), y.size() ); int i, j, k, m, n = align1.size(); #if 0 //printf( "x: %i; y: %i\n", x.size(), y.size() ); for(i=0; i<x.size(); i++) DaoVmCode_Print( x[i], NULL ); for(i=0; i<y.size(); i++) DaoVmCode_Print( y[i], NULL ); #endif for(i=0; i<n; i++){ DaoCnode xnode, ynode; int a1 = align1[i]; int a2 = align2[i]; #if 0 //printf( "%3i: %3i %3i\n", i, a1, a2 ); if( a1 >=0 ) DaoVmCode_Print( x[a1], NULL ); if( a2 >=0 ) DaoVmCode_Print( y[a2], NULL ); #endif if( a1 < 0 || a2 < 0 ) continue; memset( & xnode, 0, sizeof(DaoCnode) ); memset( & ynode, 0, sizeof(DaoCnode) ); DaoCnode_InitOperands( & xnode, & x[a1] ); DaoCnode_InitOperands( & ynode, & y[a2] ); switch( xnode.type ){ case DAO_OP_NONE : break; case DAO_OP_SINGLE : if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first; break; case DAO_OP_PAIR : if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first; if( regmap.find( xnode.second ) == regmap.end() ) regmap[ xnode.second ] = ynode.second; break; case DAO_OP_TRIPLE : if( regmap.find( xnode.first ) == regmap.end() ) regmap[ xnode.first ] = ynode.first; if( regmap.find( xnode.second ) == regmap.end() ) regmap[ xnode.second ] = ynode.second; if( regmap.find( xnode.third ) == regmap.end() ) regmap[ xnode.third ] = ynode.third; break; case DAO_OP_RANGE : case DAO_OP_RANGE2 : for(i=xnode.first; i<=xnode.second; i++){ if( regmap.find( i ) == regmap.end() ) regmap[i] = ynode.first + i - xnode.first; } if( xnode.type == DAO_OP_RANGE2 ){ if( regmap.find( xnode.third ) == regmap.end() ) regmap[ xnode.third ] = ynode.third; } break; } } }
float MAlignment::align(){ profile.push_back(seqs[0]); float FinalScore; int total_length=seqs[0].size(); for(unsigned int i=1;i < seqs.size();i++){ total_length+=seqs[i].size(); /*******Calculate the TABLE********/ int ydim=profile[0].size()+1; int xdim=seqs[i].size()+1; float score[xdim*ydim]; int trace[xdim*ydim]; // to record traces for finding the aligned sequences 1 for gap in x,2 for match,3 for gap in y for(int l=0; l < ydim;l++){ score[l*xdim]=(float)l*gap; trace[l*xdim]=3; } for(int l=0; l < xdim;l++){ score[l]=(float)l*gap; trace[l]=1; } for(int j=1; j <xdim;j++){ for(int k=1; k <ydim;k++){ float temp=0.0; score[k*xdim+j]=score[(k-1)*xdim+j]+gap;//initialize min with adding gap in y direction trace[k*xdim+j]=3; for(unsigned int m=0;m < profile.size();m++){ float Percentage=Similarity(profile[m][k-1],seqs[i][j-1]); if(FloatEqual(Percentage,1.0) == true){ temp=temp+score[(k-1)*xdim+j-1]+identical; //cout<<"Identical "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl; }else if(FloatEqual(Percentage,0.9) == true){ temp=temp+score[(k-1)*xdim+j-1]+verysimilar; //cout<<"Almost "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl; }else if(Percentage >= 0.66 ){ //cout<<"Temp now is "<<temp<<endl; temp=temp+score[(k-1)*xdim+j-1]+similar; //cout<<"Similar "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl; }else if(Percentage < 0.0 ){ temp=temp+score[(k-1)*xdim+j-1]+gap; //cout<<"MatchToGap "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl; }else{ temp=temp+score[(k-1)*xdim+j-1]+mismatch; //cout<<"Mismatch "<<profile[m][k-1]<<","<<seqs[i][j-1]<<" Score:"<<temp<<endl; } } temp=temp/float(profile.size()); //cout<<"Finally the score is "<<temp<<endl; if(temp < score[k*xdim+j]){ //cout<<temp << " is smaller than "<< score[k*xdim+j] <<endl; score[k*xdim+j]=temp; trace[k*xdim+j]=2; } temp=score[k*xdim+j-1]+gap; //adding gap in x direction if(temp < score[k*xdim+j]){ //cout<<temp << " is smaller than "<< score[k*xdim+j] <<endl; score[k*xdim+j]=temp; trace[k*xdim+j]=1; } } } //cout<<"Finish Scoring?"<<endl; /************** Output score matrix************* for(int k=ydim-1; k >=0;k--){ if( k >= 1){ cout<<profile[0][k-1]; }else{ cout<<"-"; } for(int j=0; j <xdim;j++){ cout<<"\t"<<score[k*xdim+j]; } cout<<endl; } cout<<"\t"<<"-"; for(unsigned int j=0; j < seqs[i].size();j++){ cout<<"\t"<<seqs[i][j]; } cout<<endl; cout<<"Final Score :"<<score[xdim*ydim-1]<<endl; *************************************************/ /************** Out put matrix for(int l=ydim-1; l >=0;l--){ for(int m=0; m <xdim;m++){ cout<<"\t"<<trace[l*xdim+m]; } cout<<endl; } */ /**Trace back find the alignment*/ int xInd=xdim-1; int yInd=ydim-1; while(xInd > 0 || yInd > 0 ){ //cout <<"Checking "<<xInd<<","<<yInd<<endl; if(xInd-1 >=0 && trace[yInd*xdim+xInd] == 1){ for(unsigned int m=0;m < profile.size();m++){ if(alignment.size() > m){ alignment[m].insert(alignment[m].begin(),"-"); }else{ vector<string> temp; temp.push_back("-"); alignment.push_back(temp); } } if(alignment.size() > profile.size() ){ alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),seqs[i][xInd-1]); }else{ vector<string> temp; temp.push_back(seqs[i][xInd-1]); alignment.push_back(temp); } xInd=xInd-1; continue; } if(yInd-1 >=0 && trace[yInd*xdim+xInd] == 3){ for(unsigned int m=0;m < profile.size();m++){ if(alignment.size() > m){ alignment[m].insert(alignment[m].begin(),profile[m][yInd-1]); }else{ vector<string> temp; temp.push_back(profile[m][yInd-1]); alignment.push_back(temp); } } if(alignment.size() > profile.size()){ alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),"-"); }else{ vector<string> temp; temp.push_back("-"); alignment.push_back(temp); } yInd=yInd-1; continue; } if(trace[yInd*xdim+xInd] == 2){ for(unsigned int m=0;m < profile.size();m++){ if(alignment.size() > m){ alignment[m].insert(alignment[m].begin(),profile[m][yInd-1]); }else{ vector<string> temp; temp.push_back(profile[m][yInd-1]); alignment.push_back(temp); } } if(alignment.size() > profile.size()){ alignment[int(profile.size())].insert(alignment[int(profile.size())].begin(),seqs[i][xInd-1]); }else{ vector<string> temp; temp.push_back(seqs[i][xInd-1]); alignment.push_back(temp); } xInd=xInd-1; yInd=yInd-1; } } //~ while (i > 0) //~ { //~ AlignmentA <- Ai + AlignmentA //~ AlignmentB <- "-" + AlignmentB //~ i <- i - 1 //~ } //~ while (j > 0) //~ { //~ AlignmentA <- "-" + AlignmentA //~ AlignmentB <- Bj + AlignmentB //~ j <- j - 1 //~ } UpdateProfile(); FinalScore=score[xdim*ydim-1]; } return (FinalScore)/(int)(profile[0].size())+2; }
/** * \brief Main function. */ int main(int argc, char **argv) { char *s,*t; int size,sizes,sizet; int i,j,k,P; int cond; int *simi,res,Paux; int *a,*b; FILE *f,*f2; fpos_t filepos; int my_rank,set; struct timeval ini, fi; struct timezone tz; bsp_begin(atoi(argv[1])); size = atoi(argv[1]); f=fopen(argv[2],"r"); if (f==NULL) Exit("Error: File %s not found\n",argv[2]); fscanf(f,"%d",&sizes); if (sizes%size != 0) Exit("Error: The sequences have to have multiple of " "processes quantity size"); f2=fopen(argv[3],"r"); if (f2==NULL) Exit("Error: File %s not found\n",argv[3]); fscanf(f2,"%d",&sizet); if (bsp_pid() == 0) if (sizet%size != 0) Exit("Error: The sequences have to have multiple of " "processes quantity size"); P = atoi(argv[4]); if (bsp_pid() == 0) printf("align %d %s %s %d\n",size,argv[2],argv[3],P); sizes /= size; sizet /= size; s = (char*) malloc (sizes*sizeof(char)); t = (char*) malloc (sizet*sizeof(char)); if (s == NULL || t == NULL) Exit("No memory\n"); a = (int*)malloc ((sizet+1)*sizeof(int)); b = (int*)malloc ((sizes+1)*sizeof(int)); if (a == NULL || b == NULL) Exit("No memory\n"); if (bsp_pid() == size-1) { simi = (int*) malloc(P*sizeof(int)); if (simi == NULL) Exit("No memory\n"); } Paux = 0; bsp_push_reg(s,sizes*sizeof(char)); bsp_push_reg(b,(sizes+1)*sizeof(int)); bsp_push_reg(&filepos,sizeof(long int)); bsp_push_reg(&i,sizeof(int)); bsp_sync(); gettimeofday(&ini,&tz); for (k = 0; k < P*size + size -1; k++) { if (k >= bsp_pid() && k <= P*size+bsp_pid()-1) cond = 1; else cond = 0; set = 0; if (cond==1 && (k-bsp_pid())%size == 0)/*start of a reading*/ { if (bsp_pid() == 0 && k < size); else if (bsp_pid() == 0) { bsp_get(size-1,&filepos,0,&filepos,sizeof(long int)); } else { bsp_get(bsp_pid()-1,&filepos,0,&filepos,sizeof(long int)); } set = 1; } bsp_sync(); if (cond==1 && (k-bsp_pid())%size == 0)/*start of a reading*/ { if (set == 1) fsetpos(f2,&filepos); for (i = 0; i < sizet; i++) { fscanf(f2,"%c",&t[i]); if (t[i] == 'A' ||t[i] == 'T' ||t[i] == 'C' ||t[i] == 'G'); else { if (t[i] == EOF) Exit("Error: End of file reached without" "read all sequence in %s\n",argv[3]); i--; } } fgetpos(f2,&filepos); for (i = 0; i <= sizet; i++) a[i] = (i+bsp_pid()*sizet)*gap; } if (cond==1) { if (bsp_pid() == 0) { for (i = 0; i < sizes; i++) { fscanf(f,"%c",&s[i]); if (s[i] == 'A' ||s[i] == 'T' ||s[i] == 'C' ||s[i] == 'G'); else { if (s[i] == EOF) Exit("Error: End of file reached without" "read all sequence in %s\n",argv[2]); i--; } } for (j = 0; j <= sizes; j++) b[j] = (j + (k%size)*sizes)*gap; } res = Similarity (s, sizes, t, sizet, a, b); if (bsp_pid() == size-1 && (k-bsp_pid()+1)%size == 0) { simi[Paux++] = res; } } if (cond) { if (bsp_pid() != size -1) { bsp_put(bsp_pid()+1,s,s,0,sizes*sizeof(char)); bsp_put(bsp_pid()+1,b,b,0,(sizes+1)*sizeof(int)); } } bsp_sync(); } gettimeofday(&fi,&tz); printf("process %d ended\n",bsp_pid()); fclose(f); fclose(f2); if (bsp_pid() == size-1) { printf("Similarities: "); for (i = 0; i < P; i++) printf("%d ",simi[i]); printf("\n"); } if (bsp_pid() == 0) { printf("Computation time: %f\n", (fi.tv_sec - ini.tv_sec + (double)(fi.tv_usec - ini.tv_usec)/1000000)/60); } bsp_pop_reg(&filepos); bsp_pop_reg(b); bsp_pop_reg(s); bsp_sync(); return 0; }