PUBLIC float profile_aln(const float *T1, const char *seq1, const float *T2, const char *seq2) { /* align the 2 probability profiles T1, T2 */ /* This is like a Needleman-Wunsch alignment, with affine gap-costs ala Gotoh. The score looks at both seq and pair profile */ float **S, **E, **F, tot_score=0.; int i, j, length1, length2; length1 = strlen(seq1); length2 = strlen(seq2); S = newmat(length1, length2); E = newmat(length1, length2); F = newmat(length1, length2); E[0][0] = F[0][0] = open - ext; S[0][0] = 0; for (i=1; i<=length1; i++) F[i][0] = -9999; /* impossible */ for (j=1; j<=length2; j++) E[0][j] = -9999; /* impossible */ if (!free_ends) { for (i=1; i<=length1; i++) S[i][0] = E[i][0] = E[i-1][0] +ext; for (j=1; j<=length2; j++) S[0][j] = F[0][j] = F[0][j-1] +ext; } for (i=1; i<=length1; i++) { for (j=1; j<=length2; j++) { float M; E[i][j] = eMAX(E[i-1][j]+ext, S[i-1][j]+open); F[i][j] = eMAX(F[i][j-1]+ext, S[i][j-1]+open); M = S[i-1][j-1] + PrfEditScore(T1+3*i,T2+3*j, seq1[i-1], seq2[j-1]); S[i][j] = MAX3(M, E[i][j], F[i][j]); } } if (edit_backtrack) { double score=0; char state = 'S'; int pos, i,j; alignment[0] = (int *) space((length1+length2+1)*sizeof(int)); alignment[1] = (int *) space((length1+length2+1)*sizeof(int)); pos = length1+length2; i = length1; j = length2; tot_score = S[length1][length2]; if (free_ends) { /* find starting point for backtracking, search for highest entry in last row or column */ int imax=0; for (i=1; i<=length1; i++) { if (S[i][length2]>score) { score=S[i][length2]; imax=i; } } for (j=1; j<=length2; j++) { if (S[length1][j]>score) { score=S[length1][j]; imax=-j; } } if (imax<0) { for (j=length2; j> -imax; j--) { alignment[0][pos] = 0; alignment[1][pos--] = j; } i=length1; } else { for (i=length1; i>imax; i--) { alignment[0][pos] = i; alignment[1][pos--] = 0; } j=length2; } tot_score=score; } while (i>0 && j>0) { switch (state) { case 'E': score = E[i][j]; alignment[0][pos] = i; alignment[1][pos--] = 0; if (EQUAL(score, S[i-1][j] + open)) state = 'S'; i--; break; case 'F': score = F[i][j]; alignment[0][pos] = 0; alignment[1][pos--] = j; if (EQUAL(score, S[i][j-1] + open)) state = 'S'; j--; break; case 'S': score = S[i][j]; if (EQUAL(score, E[i][j])) state = 'E'; else if (EQUAL(score, F[i][j])) state = 'F'; else if (EQUAL(score, S[i-1][j-1] + PrfEditScore(T1+3*i,T2+3*j, seq1[i-1], seq2[j-1]))) { alignment[0][pos] = i; alignment[1][pos--] = j; i--; j--; } else nrerror("backtrack of alignment failed"); break; } } for (; j>0; j--) { alignment[0][pos] = 0; alignment[1][pos--] = j; } for (; i>0; i--) { alignment[0][pos] = i; alignment[1][pos--] = 0; } for(i=pos+1; i<=length1+length2; i++){ alignment[0][i-pos] = alignment[0][i]; alignment[1][i-pos] = alignment[1][i]; } alignment[0][0] = length1+length2-pos; /* length of alignment */ sprint_aligned_bppm(T1,seq1, T2,seq2); free(alignment[0]); free(alignment[1]); } for (i=0; i<=length1; i++) { free(S[i]); free(E[i]); free(F[i]); } free(S); free(E); free(F); return tot_score; }
PUBLIC float profile_edit_distance(const float *T1, const float *T2) { /* align the 2 probability profiles T1, T2 */ /* This is like a Needleman-Wunsch alignment, we should really use affine gap-costs ala Gotoh */ float **distance; short **i_point, **j_point; int i, j, i1, j1, pos, length1,length2; float minus, plus, change, temp; length1 = (int) T1[0]; length2 = (int) T2[0]; distance = (float **) vrna_alloc((length1 +1)*sizeof(float *)); if(edit_backtrack){ i_point = (short **) vrna_alloc((length1 +1)*sizeof(short *)); j_point = (short **) vrna_alloc((length1 +1)*sizeof(short *)); } for(i=0; i<= length1; i++){ distance[i] = (float *) vrna_alloc( (length2+1)*sizeof(float)); if(edit_backtrack){ i_point[i] = (short *) vrna_alloc( (length2+1)*sizeof(short)); j_point[i] = (short *) vrna_alloc( (length2+1)*sizeof(short)); } } for(i = 1; i <= length1; i++) { distance[i][0] = distance[i-1][0]+PrfEditCost(i,0,T1,T2); if(edit_backtrack){ i_point[i][0] = (short) i-1; j_point[i][0] = 0; } } for(j = 1; j <= length2; j++) { distance[0][j] = distance[0][j-1]+PrfEditCost(0,j,T1,T2); if(edit_backtrack){ i_point[0][j] = 0; j_point[0][j] = (short) j-1; } } for (i = 1; i <= length1; i++) { for (j = 1; j <= length2 ; j++) { minus = distance[i-1][j] + PrfEditCost(i,0,T1,T2); plus = distance[i][j-1] + PrfEditCost(0,j,T1,T2); change = distance[i-1][j-1]+ PrfEditCost(i,j,T1,T2); distance[i][j] = MIN3(minus, plus, change); /* printf("%g ", distance[i][j]); */ if(edit_backtrack){ if(distance[i][j] == change) { i_point[i][j]= (short)i-1; j_point[i][j]= (short) j-1; } else if(distance[i][j] == plus) { i_point[i][j]= (short)i ; j_point[i][j]= (short)j-1; } else { i_point[i][j]= (short)i-1; j_point[i][j]= (short)j ; } } } /* printf("\n"); */ } /* printf("\n"); */ temp = distance[length1][length2]; for(i=0;i<=length1;i++) free(distance[i]); free(distance); if(edit_backtrack){ alignment[0] = (int *) vrna_alloc((length1+length2+1)*sizeof(int)); alignment[1] = (int *) vrna_alloc((length1+length2+1)*sizeof(int)); pos = length1+length2; i = length1; j = length2; while( (i>0)||(j>0) ) { i1 = i_point[i][j]; j1 = j_point[i][j]; if( ((i-i1)==1)&&((j-j1)==1) ) { /* substitution */ alignment[0][pos] = i; alignment[1][pos] = j; } if( ((i-i1)==1)&&(j==j1) ) { /* Deletion in [1] */ alignment[0][pos] = i; alignment[1][pos] = 0; } if( (i==i1)&&((j-j1)==1) ) { /* Deletion in [0] */ alignment[0][pos] = 0; alignment[1][pos] = j; } pos--; i = i1; j = j1; } for(i=pos+1; i<=length1+length2; i++){ alignment[0][i-pos] = alignment[0][i]; alignment[1][i-pos] = alignment[1][i]; } alignment[0][0] = length1+length2-pos; /* length of alignment */ for(i=0; i<=length1; i++){ free(i_point[i]); free(j_point[i]); } free(i_point); free(j_point); sprint_aligned_bppm(T1,T2); free(alignment[0]); free(alignment[1]); } return temp; }