void FreeDoubleMtx( double **mtx ) { int i; for( i=0; mtx[i]; i++ ) FreeDoubleVec( mtx[i] ); free( mtx ); }
void blockAlign3( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut ) // memory complexity = O(n^3), time complexity = O(n^2) { int i, j, shift, cur1, cur2, count; static TLS int crossscoresize = 0; static TLS int jumpposi, *jumppos; static TLS double jumpscorei, *jumpscore; static TLS int *result1 = NULL; static TLS int *result2 = NULL; static TLS int *ocut1 = NULL; static TLS int *ocut2 = NULL; double maximum; static TLS double **crossscore = NULL; static TLS int **track = NULL; if( result1 == NULL ) { result1 = AllocateIntVec( MAXSEG ); result2 = AllocateIntVec( MAXSEG ); ocut1 = AllocateIntVec( MAXSEG ); ocut2 = AllocateIntVec( MAXSEG ); } if( crossscoresize < *ncut+2 ) { crossscoresize = *ncut+2; if( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\n", crossscoresize ); if( track ) FreeIntMtx( track ); if( crossscore ) FreeDoubleMtx( crossscore ); if( jumppos ) FreeIntVec( jumppos ); if( jumpscore ) FreeDoubleVec( jumpscore ); track = AllocateIntMtx( crossscoresize, crossscoresize ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); jumppos = AllocateIntVec( crossscoresize ); jumpscore = AllocateDoubleVec( crossscoresize ); } #if 0 for( i=0; i<*ncut-2; i++ ) fprintf( stderr, "%d.start = %d, score = %f\n", i, seg1[i]->start, seg1[i]->score ); for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%#4.0f ", ocrossscore[i][j] ); fprintf( stderr, "\n" ); } #endif for( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */ crossscore[i][j] = ocrossscore[i][j]; for( i=0; i<*ncut; i++ ) { ocut1[i] = cut1[i]; ocut2[i] = cut2[i]; } for( j=0; j<*ncut; j++ ) { jumpscore[j] = -999.999; jumppos[j] = -1; } for( i=1; i<*ncut; i++ ) { jumpscorei = -999.999; jumpposi = -1; for( j=1; j<*ncut; j++ ) { #if 1 fprintf( stderr, "in blockalign3, ### i=%d, j=%d\n", i, j ); #endif #if 0 for( k=0; k<j-2; k++ ) { /* fprintf( stderr, "k=%d, i=%d\n", k, i ); */ if( k && k<*ncut-1 && j<*ncut-1 && !permit( seg1[k-1], seg1[j-1] ) ) continue; if( crossscore[i-1][k] > maxj ) { pointi = k; maxi = crossscore[i-1][k]; } } pointj = 0; maxj = 0.0; for( k=0; k<i-2; k++ ) { if( k && k<*ncut-1 && i<*ncut-1 && !permit( seg2[k-1], seg2[i-1] ) ) continue; if( crossscore[k][j-1] > maxj ) { pointj = k; maxj = crossscore[k][j-1]; } } maxi += penalty; maxj += penalty; #endif maximum = crossscore[i-1][j-1]; track[i][j] = 0; if( maximum < jumpscorei && permit( seg1[jumpposi], seg1[i] ) ) { maximum = jumpscorei; track[i][j] = j - jumpposi; } if( maximum < jumpscore[j] && permit( seg2[jumppos[j]], seg2[j] ) ) { maximum = jumpscore[j]; track[i][j] = jumpscore[j] - i; } crossscore[i][j] += maximum; if( jumpscorei < crossscore[i-1][j] ) { jumpscorei = crossscore[i-1][j]; jumpposi = j; } if( jumpscore[j] < crossscore[i][j-1] ) { jumpscore[j] = crossscore[i][j-1]; jumppos[j] = i; } } } #if 0 for( i=0; i<*ncut; i++ ) { for( j=0; j<*ncut; j++ ) fprintf( stderr, "%3d ", track[i][j] ); fprintf( stderr, "\n" ); } #endif result1[MAXSEG-1] = *ncut-1; result2[MAXSEG-1] = *ncut-1; for( i=MAXSEG-1; i>=1; i-- ) { cur1 = result1[i]; cur2 = result2[i]; if( cur1 == 0 || cur2 == 0 ) break; shift = track[cur1][cur2]; if( shift == 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - 1; continue; } else if( shift > 0 ) { result1[i-1] = cur1 - 1; result2[i-1] = cur2 - shift; } else if( shift < 0 ) { result1[i-1] = cur1 + shift; result2[i-1] = cur2 - 1; } } count = 0; for( j=i; j<MAXSEG; j++ ) { if( ocrossscore[result1[j]][result2[j]] == 0.0 ) continue; if( result1[j] == result1[j-1] || result2[j] == result2[j-1] ) if( ocrossscore[result1[j]][result2[j]] > ocrossscore[result1[j-1]][result2[j-1]] ) count--; cut1[count] = ocut1[result1[j]]; cut2[count] = ocut2[result2[j]]; count++; } *ncut = count; #if 0 for( i=0; i<*ncut; i++ ) fprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\n", i, cut1[i], cut2[i] ); #endif }
int alignableReagion( int clus1, int clus2, char **seq1, char **seq2, double *eff1, double *eff2, Segment *seg ) { int i, j, k; int status, starttmp = 0; // by D.Mathog, a gess double score; int value = 0; int len, maxlen; int length = 0; // by D.Mathog, a gess static TLS double *stra = NULL; static TLS int alloclen = 0; double totaleff; double cumscore; static TLS double threshold; static TLS double *prf1 = NULL; static TLS double *prf2 = NULL; static TLS int *hat1 = NULL; static TLS int *hat2 = NULL; int pre1, pre2; #if 0 char **seq1pt; char **seq2pt; double *eff1pt; double *eff2pt; #endif #if 0 fprintf( stderr, "### In alignableRegion, clus1=%d, clus2=%d \n", clus1, clus2 ); fprintf( stderr, "seq1[0] = %s\n", seq1[0] ); fprintf( stderr, "seq2[0] = %s\n", seq2[0] ); fprintf( stderr, "eff1[0] = %f\n", eff1[0] ); fprintf( stderr, "eff2[0] = %f\n", eff2[0] ); #endif if( clus1 == 0 ) { if( stra ) FreeDoubleVec( stra ); stra = NULL; if( prf1 ) FreeDoubleVec( prf1 ); prf1 = NULL; if( prf2 ) FreeDoubleVec( prf2 ); prf2 = NULL; if( hat1 ) FreeIntVec( hat1 ); hat1 = NULL; if( hat2 ) FreeIntVec( hat2 ); hat2 = NULL; alloclen = 0; return( 0 ); } if( prf1 == NULL ) { prf1 = AllocateDoubleVec( nalphabets ); prf2 = AllocateDoubleVec( nalphabets ); hat1 = AllocateIntVec( nalphabets+1 ); hat2 = AllocateIntVec( nalphabets+1 ); } len = MIN( strlen( seq1[0] ), strlen( seq2[0] ) ); maxlen = MAX( strlen( seq1[0] ), strlen( seq2[0] ) ) + fftWinSize; if( alloclen < maxlen ) { if( alloclen ) { FreeDoubleVec( stra ); } else { threshold = (int)fftThreshold / 100.0 * 600.0 * fftWinSize; } stra = AllocateDoubleVec( maxlen ); alloclen = maxlen; } totaleff = 0.0; for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) totaleff += eff1[i] * eff2[j]; for( i=0; i<len; i++ ) { /* make prfs */ for( j=0; j<nalphabets; j++ ) { prf1[j] = 0.0; prf2[j] = 0.0; } #if 0 seq1pt = seq1; eff1pt = eff1; j = clus1; while( j-- ) prf1[amino_n[(*seq1pt++)[i]]] += *eff1pt++; #else for( j=0; j<clus1; j++ ) prf1[amino_n[(int)seq1[j][i]]] += eff1[j]; #endif for( j=0; j<clus2; j++ ) prf2[amino_n[(int)seq2[j][i]]] += eff2[j]; /* make hats */ pre1 = pre2 = nalphabets; for( j=25; j>=0; j-- ) { if( prf1[j] ) { hat1[pre1] = j; pre1 = j; } if( prf2[j] ) { hat2[pre2] = j; pre2 = j; } } hat1[pre1] = -1; hat2[pre2] = -1; /* make site score */ stra[i] = 0.0; for( k=hat1[nalphabets]; k!=-1; k=hat1[k] ) for( j=hat2[nalphabets]; j!=-1; j=hat2[j] ) // stra[i] += n_dis[k][j] * prf1[k] * prf2[j]; stra[i] += n_disFFT[k][j] * prf1[k] * prf2[j]; stra[i] /= totaleff; } (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; status = 0; cumscore = 0.0; score = 0.0; for( j=0; j<fftWinSize; j++ ) score += stra[j]; for( i=1; i<len-fftWinSize; i++ ) { score = score - stra[i-1] + stra[i+fftWinSize-1]; #if TMPTMPTMP fprintf( stderr, "%d %10.0f ? %10.0f\n", i, score, threshold ); #endif if( score > threshold ) { #if 0 seg->start = i; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = score; status = 0; value++; #else if( !status ) { status = 1; starttmp = i; length = 0; cumscore = 0.0; } length++; cumscore += score; #endif } if( score <= threshold || length > SEGMENTSIZE ) { if( status ) { if( length > fftWinSize ) { seg->start = starttmp; seg->end = i; seg->center = ( seg->start + seg->end + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d, score = %f, value = %d\n", seg->start, seg->end, length, cumscore, value ); #endif if( length > SEGMENTSIZE ) { (seg+0)->skipForeward = 1; (seg+1)->skipBackward = 1; } else { (seg+0)->skipForeward = 0; (seg+1)->skipBackward = 0; } value++; seg++; } length = 0; cumscore = 0.0; status = 0; starttmp = i; if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); } } } if( status && length > fftWinSize ) { seg->end = i; seg->start = starttmp; seg->center = ( starttmp + i + fftWinSize ) / 2 ; seg->score = cumscore; #if 0 fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); #endif value++; } #if TMPTMPTMP exit( 0 ); #endif // fprintf( stderr, "returning %d\n", value ); return( value ); }
void constants( int nseq, char **seq ) { int i, j, x; // double tmp; if( dorp == 'd' ) /* DNA */ { int k, m; double average; double **pamx = AllocateDoubleMtx( 11,11 ); double **pam1 = AllocateDoubleMtx( 4, 4 ); double *freq = AllocateDoubleVec( 4 ); scoremtx = -1; if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N; if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N; if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N; if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N; if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N; if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N; if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_N; if( RNApthr == NOTSPECIFIED ) RNApthr = DEFAULTRNATHR_N; if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN; if( kimuraR == NOTSPECIFIED ) kimuraR = 2; RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 ); RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 ); // fprintf( stderr, "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N ); // fprintf( stderr, "RNAppenalty = %d\n", RNAppenalty ); // fprintf( stderr, "RNApenalty = %d\n", RNApenalty ); RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 ); penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5); penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5); penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5); penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5); offset = (int)( 3 * 600.0 / 1000.0 * poffset + 0.5); offsetFFT = (int)( 3 * 600.0 / 1000.0 * (-0) + 0.5); offsetLN = (int)( 3 * 600.0 / 1000.0 * 100 + 0.5); penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5); penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5); sprintf( modelname, "%s%d (%d), %6.3f (%6.3f), %6.3f (%6.3f)", rnakozo?"RNA":"DNA", pamN, kimuraR, -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003 ); if( kimuraR == 9999 ) { for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] = (double)locn_disn[i][j]; #if NORMALIZE1 average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += pamx[i][j]; average /= 16.0; if( disp ) fprintf( stderr, "average = %f\n", average ); for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] *= 600.0 / average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= offset; #endif } else { double f = 0.99; double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01; double v = (double)1 / ( 2 + kimuraR ) * 0.01; pam1[0][0] = f; pam1[0][1] = s; pam1[0][2] = v; pam1[0][3] = v; pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v; pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s; pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f; fprintf( stderr, "generating %dPAM scoring matrix for nucleotides ... ", pamN ); if( disp ) { fprintf( stderr, " TPM \n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", pam1[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } MtxuntDouble( pamx, 4 ); for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 ); for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] /= 1.0 / 4.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) { if( pamx[i][j] == 0.0 ) { fprintf( stderr, "WARNING: pamx[i][j] = 0.0 ?\n" ); pamx[i][j] = 0.00001; /* by J. Thompson */ } pamx[i][j] = log10( pamx[i][j] ) * 1000.0; } if( disp ) { fprintf( stderr, " after log\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", pamx[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } // ????? for( i=0; i<26; i++ ) amino[i] = locaminon[i]; for( i=0; i<0x80; i++ ) amino_n[i] = -1; for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i; if( fmodel == 1 ) calcfreq_nuc( nseq, seq, freq ); else { freq[0] = 0.25; freq[1] = 0.25; freq[2] = 0.25; freq[3] = 0.25; } // fprintf( stderr, "a, freq[0] = %f\n", freq[0] ); // fprintf( stderr, "g, freq[1] = %f\n", freq[1] ); // fprintf( stderr, "c, freq[2] = %f\n", freq[2] ); // fprintf( stderr, "t, freq[3] = %f\n", freq[3] ); average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += pamx[i][j] * freq[i] * freq[j]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) average += pamx[i][i] * 1.0 / 4.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] *= 600.0 / average; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] -= offset; /* extending gap cost */ for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) pamx[i][j] = shishagonyuu( pamx[i][j] ); if( disp ) { fprintf( stderr, " after shishagonyuu\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", pamx[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } fprintf( stderr, "done\n" ); } for( i=0; i<5; i++ ) { pamx[4][i] = pamx[3][i]; pamx[i][4] = pamx[i][3]; } for( i=5; i<10; i++ ) for( j=5; j<10; j++ ) { pamx[i][j] = pamx[i-5][j-5]; } if( disp ) { fprintf( stderr, " before dis\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", pamx[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } if( disp ) { fprintf( stderr, " score matrix \n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", pamx[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } for( i=0; i<26; i++ ) amino[i] = locaminon[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i]; for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0; for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] ); if( disp ) { fprintf( stderr, " score matrix \n" ); for( i=0; i<26; i++ ) { for( j=0; j<26; j++ ) fprintf( stderr, "%+6d", n_dis[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } // RIBOSUM #if 1 average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) average += ribosum4[i][j] * freq[i] * freq[j]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) for( k=0; k<4; k++ ) for( m=0; m<4; m++ ) { // if( i%4==0&&j%4==3 || i%4==3&&j%4==0 || i%4==1&&j%4==2 || i%4==2&&j%4==1 || i%4==1&&j%4==3 || i%4==3&&j%4==1 ) // if( k%4==0&&m%4==3 || k%4==3&&m%4==0 || k%4==1&&m%4==2 || k%4==2&&m%4==1 || k%4==1&&m%4==3 || k%4==3&&m%4==1 ) average += ribosum16[i*4+j][k*4+m] * freq[i] * freq[j] * freq[k] * freq[m]; } for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] -= average; average = 0.0; for( i=0; i<4; i++ ) average += ribosum4[i][i] * freq[i]; for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] *= 600.0 / average; average = 0.0; average += ribosum16[0*4+3][0*4+3] * freq[0] * freq[3]; // AU average += ribosum16[3*4+0][3*4+0] * freq[3] * freq[0]; // UA average += ribosum16[1*4+2][1*4+2] * freq[1] * freq[2]; // CG average += ribosum16[2*4+1][2*4+1] * freq[2] * freq[1]; // GC average += ribosum16[1*4+3][1*4+3] * freq[1] * freq[3]; // GU average += ribosum16[3*4+1][3*4+1] * freq[3] * freq[1]; // UG for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] *= 600.0 / average; #if 1 for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] -= offset; /* extending gap cost ?????*/ for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] -= offset; /* extending gap cost ?????*/ #endif for( i=0; i<4; i++ ) for( j=0; j<4; j++ ) ribosum4[i][j] = shishagonyuu( ribosum4[i][j] ); for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosum16[i][j] = shishagonyuu( ribosum16[i][j] ); if( disp ) { fprintf( stderr, "ribosum after shishagonyuu\n" ); for( i=0; i<4; i++ ) { for( j=0; j<4; j++ ) fprintf( stderr, "%+#6.10f", ribosum4[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); fprintf( stderr, "ribosum16 after shishagonyuu\n" ); for( i=0; i<16; i++ ) { for( j=0; j<16; j++ ) fprintf( stderr, "%+#7.0f", ribosum16[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } fprintf( stderr, "done\n" ); #if 1 for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = ribosum4[i][j]; // loop-loop // for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+4][j+4] = ribosum16[i][j]; // stem5-stem5 for( i=0; i<16; i++ ) for( j=0; j<16; j++ ) ribosumdis[i+20][j+20] = ribosum16[i][j]; // stem5-stem5 #else // do not use ribosum for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru for( m=0; m<9; m++ ) for( i=0; i<4; i++ ) // loop for( k=0; k<9; k++ ) for( j=0; j<4; j++ ) ribosumdis[m*4+i][k*4+j] = n_dis[i][j]; // loop-loop #endif if( disp ) { fprintf( stderr, "ribosumdis\n" ); for( i=0; i<37; i++ ) { for( j=0; j<37; j++ ) fprintf( stderr, "%+5d", ribosumdis[i][j] ); fprintf( stderr, "\n" ); } fprintf( stderr, "\n" ); } fprintf( stderr, "done\n" ); #endif FreeDoubleMtx( pam1 ); FreeDoubleMtx( pamx ); free( freq ); } else if( dorp == 'p' && scoremtx == 1 ) /* Blosum */ { double *freq; double *freq1; double *datafreq; double average; // double tmp; double **n_distmp; n_distmp = AllocateDoubleMtx( 20, 20 ); datafreq = AllocateDoubleVec( 20 ); freq = AllocateDoubleVec( 20 ); if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B; if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B; if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B; if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B; if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B; if( pamN == NOTSPECIFIED ) pamN = 0; if( kimuraR == NOTSPECIFIED ) kimuraR = 1; penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 ); penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 ); penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 ); penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 ); offset = (int)( 600.0 / 1000.0 * poffset + 0.5 ); offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5); offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5); penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5); penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5); BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp ); if( nblosum == -1 ) sprintf( modelname, "User-defined, %6.3f, %+6.3f, %+6.3f", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 ); else sprintf( modelname, "BLOSUM%d, %6.3f, %+6.3f, %+6.3f", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 ); #if 0 for( i=0; i<26; i++ ) amino[i] = locaminod[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i]; for( i=0; i<0x80; i++ ) amino_n[i] = 0; for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i; #endif for( i=0; i<0x80; i++ )amino_n[i] = -1; for( i=0; i<26; i++) amino_n[(int)amino[i]] = i; if( fmodel == 1 ) { calcfreq( nseq, seq, datafreq ); freq1 = datafreq; } else freq1 = freq; #if TEST fprintf( stderr, "raw scoreing matrix : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%6.2f", n_distmp[i][j] ); } fprintf( stdout, "\n" ); } #endif if( fmodel == -1 ) average = 0.0; else { for( i=0; i<20; i++ ) #if TEST fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] ); #endif average = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) average += n_distmp[i][j] * freq1[i] * freq1[j]; } #if TEST fprintf( stdout, "####### average2 = %f\n", average ); #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_distmp[i][j] -= average; #if TEST fprintf( stdout, "average2 = %f\n", average ); fprintf( stdout, "after average substruction : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%6.2f", n_distmp[i][j] ); } fprintf( stdout, "\n" ); } #endif average = 0.0; for( i=0; i<20; i++ ) average += n_distmp[i][i] * freq1[i]; #if TEST fprintf( stdout, "####### average1 = %f\n", average ); #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_distmp[i][j] *= 600.0 / average; #if TEST fprintf( stdout, "after average division : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stdout, "%7.1f", n_distmp[i][j] ); } fprintf( stdout, "\n" ); } #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_distmp[i][j] -= offset; #if TEST fprintf( stdout, "after offset substruction (offset = %d): \n", offset ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stdout, "%7.1f", n_distmp[i][j] ); } fprintf( stdout, "\n" ); } #endif #if 0 /* 注意 !!!!!!!!!! */ penalty -= offset; #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_distmp[i][j] = shishagonyuu( n_distmp[i][j] ); if( disp ) { fprintf( stdout, " scoring matrix \n" ); for( i=0; i<20; i++ ) { fprintf( stdout, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( stdout, "%5.0f", n_distmp[i][j] ); fprintf( stdout, "\n" ); } fprintf( stdout, " " ); for( i=0; i<20; i++ ) fprintf( stdout, " %c", amino[i] ); average = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) average += n_distmp[i][j] * freq1[i] * freq1[j]; fprintf( stdout, "average = %f\n", average ); average = 0.0; for( i=0; i<20; i++ ) average += n_distmp[i][i] * freq1[i]; fprintf( stdout, "itch average = %f\n", average ); fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset ); exit( 1 ); } for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)n_distmp[i][j]; FreeDoubleMtx( n_distmp ); FreeDoubleVec( datafreq ); FreeDoubleVec( freq ); fprintf( stderr, "done.\n" ); } else if( dorp == 'p' && scoremtx == 2 ) /* Miyata-Yasunaga */ { fprintf( stderr, "Not supported\n" ); exit( 1 ); for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = locn_dism[i][j]; for( i=0; i<26; i++ ) if( i != 24 ) n_dis[i][24] = n_dis[24][i] = exgpm; n_dis[24][24] = 0; if( ppenalty == NOTSPECIFIED ) ppenalty = locpenaltym; if( poffset == NOTSPECIFIED ) poffset = -20; if( pamN == NOTSPECIFIED ) pamN = 0; if( kimuraR == NOTSPECIFIED ) kimuraR = 1; penalty = ppenalty; offset = poffset; sprintf( modelname, "Miyata-Yasunaga, %6.3f, %6.3f", -(double)ppenalty/1000, -(double)poffset/1000 ); for( i=0; i<26; i++ ) amino[i] = locaminom[i]; for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpm[i]; #if DEBUG fprintf( stdout, "scoreing matrix : \n" ); for( i=0; i<26; i++ ) { for( j=0; j<26; j++ ) { fprintf( stdout, "%#5d", n_dis[i][j] ); } fprintf( stdout, "\n" ); } #endif } else /* JTT */ { double **rsr; double **pam1; double **pamx; double *freq; double *freq1; double *mutab; double *datafreq; double average; double tmp; double delta; rsr = AllocateDoubleMtx( 20, 20 ); pam1 = AllocateDoubleMtx( 20, 20 ); pamx = AllocateDoubleMtx( 20, 20 ); freq = AllocateDoubleVec( 20 ); mutab = AllocateDoubleVec( 20 ); datafreq = AllocateDoubleVec( 20 ); if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_J; if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_J; if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_J; if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_J; if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_J; if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN; if( kimuraR == NOTSPECIFIED ) kimuraR = 1; penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 ); penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 ); penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 ); penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 ); offset = (int)( 600.0 / 1000.0 * poffset + 0.5 ); offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5 ); offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5); penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5); penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5); sprintf( modelname, "%s %dPAM, %6.3f, %6.3f", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000 ); JTTmtx( rsr, freq, amino, amino_grp, (int)(TMorJTT==TM) ); #if TEST fprintf( stdout, "rsr = \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%9.2f ", rsr[i][j] ); } fprintf( stdout, "\n" ); } #endif for( i=0; i<0x80; i++ ) amino_n[i] = -1; for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i; if( fmodel == 1 ) { calcfreq( nseq, seq, datafreq ); freq1 = datafreq; } else freq1 = freq; fprintf( stderr, "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" ); tmp = 0.0; for( i=0; i<20; i++ ) { mutab[i] = 0.0; for( j=0; j<20; j++ ) mutab[i] += rsr[i][j] * freq[j]; tmp += mutab[i] * freq[i]; } #if TEST fprintf( stdout, "mutability = \n" ); for( i=0; i<20; i++ ) fprintf( stdout, "%5.3f\n", mutab[i] ); fprintf( stdout, "tmp = %f\n", tmp ); #endif delta = 0.01 / tmp; for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { if( i != j ) pam1[i][j] = delta * rsr[i][j] * freq[i]; else pam1[i][j] = 1.0 - delta * mutab[i]; } } if( disp ) { fprintf( stdout, "pam1 = \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%9.6f ", pam1[i][j] ); } fprintf( stdout, "\n" ); } } MtxuntDouble( pamx, 20 ); for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 20 ); for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) pamx[i][j] /= freq[j]; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) { if( pamx[i][j] == 0.0 ) { fprintf( stderr, "WARNING: pamx[%d][%d] = 0.0?\n", i, j ); pamx[i][j] = 0.00001; /* by J. Thompson */ } pamx[i][j] = log10( pamx[i][j] ) * 1000.0; } #if TEST fprintf( stdout, "raw scoring matrix : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%5.0f", pamx[i][j] ); } fprintf( stdout, "\n" ); } average = tmp = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) { average += pamx[i][j] * freq1[i] * freq1[j]; tmp += freq1[i] * freq1[j]; } average /= tmp; fprintf( stdout, "Zenbu average = %f, tmp = %f \n", average, tmp ); average = tmp = 0.0; for( i=0; i<20; i++ ) for( j=i; j<20; j++ ) { average += pamx[i][j] * freq1[i] * freq1[j]; tmp += freq1[i] * freq1[j]; } average /= tmp; fprintf( stdout, "Zenbu average2 = %f, tmp = %f \n", average, tmp ); average = tmp = 0.0; for( i=0; i<20; i++ ) { average += pamx[i][i] * freq1[i]; tmp += freq1[i]; } average /= tmp; fprintf( stdout, "Itch average = %f, tmp = %f \n", average, tmp ); #endif #if NORMALIZE1 if( fmodel == -1 ) average = 0.0; else { #if TEST for( i=0; i<20; i++ ) fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] ); #endif average = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) average += pamx[i][j] * freq1[i] * freq1[j]; } #if TEST fprintf( stdout, "####### average2 = %f\n", average ); #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) pamx[i][j] -= average; #if TEST fprintf( stdout, "average2 = %f\n", average ); fprintf( stdout, "after average substruction : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stdout, "%5.0f", pamx[i][j] ); } fprintf( stdout, "\n" ); } #endif average = 0.0; for( i=0; i<20; i++ ) average += pamx[i][i] * freq1[i]; #if TEST fprintf( stdout, "####### average1 = %f\n", average ); #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) pamx[i][j] *= 600.0 / average; #if TEST fprintf( stdout, "after average division : \n" ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stdout, "%5.0f", pamx[i][j] ); } fprintf( stdout, "\n" ); } #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) pamx[i][j] -= offset; #if TEST fprintf( stdout, "after offset substruction (offset = %d): \n", offset ); for( i=0; i<20; i++ ) { for( j=0; j<=i; j++ ) { fprintf( stdout, "%5.0f", pamx[i][j] ); } fprintf( stdout, "\n" ); } #endif #if 0 /* 注意 !!!!!!!!!! */ penalty -= offset; #endif for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) pamx[i][j] = shishagonyuu( pamx[i][j] ); #else average = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) average += pamx[i][j]; average /= 400.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) { pamx[i][j] -= average; pamx[i][j] = shishagonyuu( pamx[i][j] ); } #endif if( disp ) { fprintf( stdout, " scoring matrix \n" ); for( i=0; i<20; i++ ) { fprintf( stdout, "%c ", amino[i] ); for( j=0; j<20; j++ ) fprintf( stdout, "%5.0f", pamx[i][j] ); fprintf( stdout, "\n" ); } fprintf( stdout, " " ); for( i=0; i<20; i++ ) fprintf( stdout, " %c", amino[i] ); average = 0.0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) average += pamx[i][j] * freq1[i] * freq1[j]; fprintf( stdout, "average = %f\n", average ); average = 0.0; for( i=0; i<20; i++ ) average += pamx[i][i] * freq1[i]; fprintf( stdout, "itch average = %f\n", average ); fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset ); exit( 1 ); } for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0; for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)pamx[i][j]; fprintf( stderr, "done.\n" ); FreeDoubleMtx( rsr ); FreeDoubleMtx( pam1 ); FreeDoubleMtx( pamx ); FreeDoubleVec( freq ); FreeDoubleVec( mutab ); FreeDoubleVec( datafreq ); } fprintf( stderr, "scoremtx = %d\n", scoremtx ); #if DEBUG fprintf( stderr, "scoremtx = %d\n", scoremtx ); fprintf( stderr, "amino[] = %s\n", amino ); #endif for( i=0; i<0x80; i++ )amino_n[i] = -1; for( i=0; i<26; i++) amino_n[(int)amino[i]] = i; for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis[i][j] = 0; for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_disLN[i][j] = 0; for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis_consweight_multi[i][j] = 0.0; for( i=0; i<26; i++) for( j=0; j<26; j++ ) { amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j]; n_dis_consweight_multi[i][j] = (float)n_dis[i][j] * consweight_multi; amino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi; } if( dorp == 'd' ) /* DNA */ { for( i=0; i<5; i++) for( j=0; j<5; j++ ) amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN; for( i=5; i<10; i++) for( j=5; j<10; j++ ) amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN; for( i=0; i<5; i++) for( j=0; j<5; j++ ) n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT; for( i=5; i<10; i++) for( j=5; j<10; j++ ) n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT; } else // protein { for( i=0; i<20; i++) for( j=0; j<20; j++ ) amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN; for( i=0; i<20; i++) for( j=0; j<20; j++ ) n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT; } #if 0 fprintf( stderr, "amino_dis (offset = %d): \n", offset ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stderr, "%5d", amino_dis[(int)amino[i]][(int)amino[j]] ); } fprintf( stderr, "\n" ); } fprintf( stderr, "amino_disLN (offsetLN = %d): \n", offsetLN ); for( i=0; i<20; i++ ) { for( j=0; j<20; j++ ) { fprintf( stderr, "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] ); } fprintf( stderr, "\n" ); } fprintf( stderr, "n_dis (offset = %d): \n", offset ); for( i=0; i<26; i++ ) { for( j=0; j<26; j++ ) { fprintf( stderr, "%5d", n_dis[i][j] ); } fprintf( stderr, "\n" ); } fprintf( stderr, "n_disFFT (offsetFFT = %d): \n", offsetFFT ); for( i=0; i<26; i++ ) { for( j=0; j<26; j++ ) { fprintf( stderr, "%5d", n_disFFT[i][j] ); } fprintf( stderr, "\n" ); } exit( 1 ); #endif ppid = 0; if( fftThreshold == NOTSPECIFIED ) { fftThreshold = FFT_THRESHOLD; } if( fftWinSize == NOTSPECIFIED ) { if( dorp == 'd' ) fftWinSize = FFT_WINSIZE_D; else fftWinSize = FFT_WINSIZE_P; } if( fftscore ) { double av, sd; for( i=0; i<20; i++ ) polarity[i] = polarity_[i]; for( av=0.0, i=0; i<20; i++ ) av += polarity[i]; av /= 20.0; for( sd=0.0, i=0; i<20; i++ ) sd += ( polarity[i]-av ) * ( polarity[i]-av ); sd /= 20.0; sd = sqrt( sd ); for( i=0; i<20; i++ ) polarity[i] -= av; for( i=0; i<20; i++ ) polarity[i] /= sd; for( i=0; i<20; i++ ) volume[i] = volume_[i]; for( av=0.0, i=0; i<20; i++ ) av += volume[i]; av /= 20.0; for( sd=0.0, i=0; i<20; i++ ) sd += ( volume[i]-av ) * ( volume[i]-av ); sd /= 20.0; sd = sqrt( sd ); for( i=0; i<20; i++ ) volume[i] -= av; for( i=0; i<20; i++ ) volume[i] /= sd; #if 0 for( i=0; i<20; i++ ) fprintf( stdout, "amino=%c, pol = %f<-%f, vol = %f<-%f\n", amino[i], polarity[i], polarity_[i], volume[i], volume_[i] ); for( i=0; i<20; i++ ) fprintf( stdout, "%c %+5.3f %+5.3f\n", amino[i], volume[i], polarity[i] ); #endif } }
float Falign_localhom( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, LocalHom ***localhom, float *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres ) { // tditeration.c deha alloclen ha huhen nanode // prevalloclen ha iranai. int i, j, k, l, m, maxk; int nlen, nlen2, nlen4; static TLS int crossscoresize = 0; static TLS char **tmpseq1 = NULL; static TLS char **tmpseq2 = NULL; static TLS char **tmpptr1 = NULL; static TLS char **tmpptr2 = NULL; static TLS char **tmpres1 = NULL; static TLS char **tmpres2 = NULL; static TLS char **result1 = NULL; static TLS char **result2 = NULL; #if RND static TLS char **rndseq1 = NULL; static TLS char **rndseq2 = NULL; #endif static TLS Fukusosuu **seqVector1 = NULL; static TLS Fukusosuu **seqVector2 = NULL; static TLS Fukusosuu **naiseki = NULL; static TLS Fukusosuu *naisekiNoWa = NULL; static TLS double *soukan = NULL; static TLS double **crossscore = NULL; int nlentmp; static TLS int *kouho = NULL; static TLS Segment *segment = NULL; static TLS Segment *segment1 = NULL; static TLS Segment *segment2 = NULL; static TLS Segment **sortedseg1 = NULL; static TLS Segment **sortedseg2 = NULL; static TLS int *cut1 = NULL; static TLS int *cut2 = NULL; static TLS char *sgap1, *egap1, *sgap2, *egap2; static TLS int localalloclen = 0; int lag; int tmpint; int count, count0; int len1, len2; int totallen; float totalscore; float impmatch; extern Fukusosuu *AllocateFukusosuuVec(); extern Fukusosuu **AllocateFukusosuuMtx(); if( seq1 == NULL ) { if( result1 ) { // fprintf( stderr, "Freeing localarrays in Falign\n" ); localalloclen = 0; mymergesort( 0, 0, NULL ); alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL ); fft( 0, NULL, 1 ); A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); G__align11( NULL, NULL, 0, 0, 0 ); partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL ); blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL ); if( crossscore ) FreeDoubleMtx( crossscore ); FreeCharMtx( result1 ); FreeCharMtx( result2 ); FreeCharMtx( tmpres1 ); FreeCharMtx( tmpres2 ); FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); free( sgap1 ); free( egap1 ); free( sgap2 ); free( egap2 ); free( kouho ); free( cut1 ); free( cut2 ); free( tmpptr1 ); free( tmpptr2 ); free( segment ); free( segment1 ); free( segment2 ); free( sortedseg1 ); free( sortedseg2 ); if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } } else { // fprintf( stderr, "Did not allocate localarrays in Falign\n" ); } return( 0.0 ); } len1 = strlen( seq1[0] ); len2 = strlen( seq2[0] ); nlentmp = MAX( len1, len2 ); nlen = 1; while( nlentmp >= nlen ) nlen <<= 1; #if 0 fprintf( stderr, "### nlen = %d\n", nlen ); #endif nlen2 = nlen/2; nlen4 = nlen2 / 2; #if DEBUG fprintf( stderr, "len1 = %d, len2 = %d\n", len1, len2 ); fprintf( stderr, "nlentmp = %d, nlen = %d\n", nlentmp, nlen ); #endif if( !localalloclen ) { sgap1 = AllocateCharVec( njob ); egap1 = AllocateCharVec( njob ); sgap2 = AllocateCharVec( njob ); egap2 = AllocateCharVec( njob ); kouho = AllocateIntVec( NKOUHO ); cut1 = AllocateIntVec( MAXSEG ); cut2 = AllocateIntVec( MAXSEG ); tmpptr1 = AllocateCharMtx( njob, 0 ); tmpptr2 = AllocateCharMtx( njob, 0 ); result1 = AllocateCharMtx( njob, alloclen ); result2 = AllocateCharMtx( njob, alloclen ); tmpres1 = AllocateCharMtx( njob, alloclen ); tmpres2 = AllocateCharMtx( njob, alloclen ); // crossscore = AllocateDoubleMtx( MAXSEG, MAXSEG ); segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment1 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); segment2 = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); sortedseg1 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); sortedseg2 = (Segment **)calloc( MAXSEG, sizeof( Segment * ) ); if( !( segment && segment1 && segment2 && sortedseg1 && sortedseg2 ) ) ErrorExit( "Allocation error\n" ); if ( scoremtx == -1 ) n20or4or2 = 4; else if( fftscore == 1 ) n20or4or2 = 2; else n20or4or2 = 20; } if( localalloclen < nlen ) { if( localalloclen ) { #if 1 if( !kobetsubunkatsu ) { FreeFukusosuuMtx ( seqVector1 ); FreeFukusosuuMtx ( seqVector2 ); FreeFukusosuuVec( naisekiNoWa ); FreeFukusosuuMtx( naiseki ); FreeDoubleVec( soukan ); } FreeCharMtx( tmpseq1 ); FreeCharMtx( tmpseq2 ); #endif #if RND FreeCharMtx( rndseq1 ); FreeCharMtx( rndseq2 ); #endif } tmpseq1 = AllocateCharMtx( njob, nlen ); tmpseq2 = AllocateCharMtx( njob, nlen ); if( !kobetsubunkatsu ) { naisekiNoWa = AllocateFukusosuuVec( nlen ); naiseki = AllocateFukusosuuMtx( n20or4or2, nlen ); seqVector1 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); seqVector2 = AllocateFukusosuuMtx( n20or4or2+1, nlen+1 ); soukan = AllocateDoubleVec( nlen+1 ); } #if RND rndseq1 = AllocateCharMtx( njob, nlen ); rndseq2 = AllocateCharMtx( njob, nlen ); for( i=0; i<njob; i++ ) { generateRndSeq( rndseq1[i], nlen ); generateRndSeq( rndseq2[i], nlen ); } #endif localalloclen = nlen; } for( j=0; j<clus1; j++ ) strcpy( tmpseq1[j], seq1[j] ); for( j=0; j<clus2; j++ ) strcpy( tmpseq2[j], seq2[j] ); #if 0 fftfp = fopen( "input_of_Falign", "w" ); fprintf( fftfp, "nlen = %d\n", nlen ); fprintf( fftfp, "seq1: ( %d sequences ) \n", clus1 ); for( i=0; i<clus1; i++ ) fprintf( fftfp, "%s\n", seq1[i] ); fprintf( fftfp, "seq2: ( %d sequences ) \n", clus2 ); for( i=0; i<clus2; i++ ) fprintf( fftfp, "%s\n", seq2[i] ); fclose( fftfp ); system( "less input_of_Falign < /dev/tty > /dev/tty" ); #endif if( !kobetsubunkatsu ) { fprintf( stderr, "FFT ... " ); for( j=0; j<n20or4or2; j++ ) vec_init( seqVector1[j], nlen ); if( fftscore && scoremtx != -1 ) { for( i=0; i<clus1; i++ ) { seq_vec_2( seqVector1[0], polarity, eff1[i], tmpseq1[i] ); seq_vec_2( seqVector1[1], volume, eff1[i], tmpseq1[i] ); } } else { #if 0 for( i=0; i<clus1; i++ ) for( j=0; j<n20or4or2; j++ ) seq_vec( seqVector1[j], amino[j], eff1[i], tmpseq1[i] ); #else for( i=0; i<clus1; i++ ) seq_vec_3( seqVector1, eff1[i], tmpseq1[i] ); #endif } #if RND for( i=0; i<clus1; i++ ) { vec_init2( seqVector1, rndseq1[i], eff1[i], len1, nlen ); } #endif #if 0 fftfp = fopen( "seqVec", "w" ); fprintf( fftfp, "before transform\n" ); for( k=0; k<n20or4or2; k++ ) { fprintf( fftfp, "nlen=%d\n", nlen ); fprintf( fftfp, "%c\n", amino[k] ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%f %f\n", seqVector1[k][l].R, seqVector1[k][l].I ); } fclose( fftfp ); system( "less seqVec < /dev/tty > /dev/tty" ); #endif for( j=0; j<n20or4or2; j++ ) vec_init( seqVector2[j], nlen ); if( fftscore && scoremtx != -1 ) { for( i=0; i<clus2; i++ ) { seq_vec_2( seqVector2[0], polarity, eff2[i], tmpseq2[i] ); seq_vec_2( seqVector2[1], volume, eff2[i], tmpseq2[i] ); } } else { #if 0 for( i=0; i<clus2; i++ ) for( j=0; j<n20or4or2; j++ ) seq_vec( seqVector2[j], amino[j], eff2[i], tmpseq2[i] ); #else for( i=0; i<clus2; i++ ) seq_vec_3( seqVector2, eff2[i], tmpseq2[i] ); #endif } #if RND for( i=0; i<clus2; i++ ) { vec_init2( seqVector2, rndseq2[i], eff2[i], len2, nlen ); } #endif #if 0 fftfp = fopen( "seqVec2", "w" ); fprintf( fftfp, "before fft\n" ); for( k=0; k<n20or4or2; k++ ) { fprintf( fftfp, "%c\n", amino[k] ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%f %f\n", seqVector2[k][l].R, seqVector2[k][l].I ); } fclose( fftfp ); system( "less seqVec2 < /dev/tty > /dev/tty" ); #endif for( j=0; j<n20or4or2; j++ ) { fft( nlen, seqVector2[j], (j==0) ); fft( nlen, seqVector1[j], 0 ); } #if 0 fftfp = fopen( "seqVec2", "w" ); fprintf( fftfp, "#after fft\n" ); for( k=0; k<n20or4or2; k++ ) { fprintf( fftfp, "#%c\n", amino[k] ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%f %f\n", seqVector2[k][l].R, seqVector2[k][l].I ); } fclose( fftfp ); system( "less seqVec2 < /dev/tty > /dev/tty" ); #endif for( k=0; k<n20or4or2; k++ ) { for( l=0; l<nlen; l++ ) calcNaiseki( naiseki[k]+l, seqVector1[k]+l, seqVector2[k]+l ); } for( l=0; l<nlen; l++ ) { naisekiNoWa[l].R = 0.0; naisekiNoWa[l].I = 0.0; for( k=0; k<n20or4or2; k++ ) { naisekiNoWa[l].R += naiseki[k][l].R; naisekiNoWa[l].I += naiseki[k][l].I; } } #if 0 fftfp = fopen( "naisekiNoWa", "w" ); fprintf( fftfp, "#Before fft\n" ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%d %f %f\n", l, naisekiNoWa[l].R, naisekiNoWa[l].I ); fclose( fftfp ); system( "less naisekiNoWa < /dev/tty > /dev/tty " ); #endif fft( -nlen, naisekiNoWa, 0 ); for( m=0; m<=nlen2; m++ ) soukan[m] = naisekiNoWa[nlen2-m].R; for( m=nlen2+1; m<nlen; m++ ) soukan[m] = naisekiNoWa[nlen+nlen2-m].R; #if 0 fftfp = fopen( "naisekiNoWa", "w" ); fprintf( fftfp, "#After fft\n" ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%d %f\n", l, naisekiNoWa[l].R ); fclose( fftfp ); fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'naisekiNoWa'\npause -1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot &" ); #endif #if 0 fprintf( stderr, "frt write start\n" ); fftfp = fopen( "frt", "w" ); for( l=0; l<nlen; l++ ) fprintf( fftfp, "%d %f\n", l-nlen2, soukan[l] ); fclose( fftfp ); system( "less frt < /dev/tty > /dev/tty" ); #if 0 fftfp = fopen( "list.plot", "w" ); fprintf( fftfp, "plot 'frt'\n pause +1" ); fclose( fftfp ); system( "/usr/bin/gnuplot list.plot" ); #endif #endif getKouho( kouho, NKOUHO, soukan, nlen ); #if 0 for( i=0; i<NKOUHO; i++ ) { fprintf( stderr, "kouho[%d] = %d\n", i, kouho[i] ); } #endif } #if KEIKA fprintf( stderr, "Searching anchors ... " ); #endif count = 0; #define CAND 0 #if CAND fftfp = fopen( "cand", "w" ); fclose( fftfp ); #endif if( kobetsubunkatsu ) { maxk = 1; kouho[0] = 0; } else { maxk = NKOUHO; } for( k=0; k<maxk; k++ ) { lag = kouho[k]; zurasu2( lag, clus1, clus2, seq1, seq2, tmpptr1, tmpptr2 ); #if CAND fftfp = fopen( "cand", "a" ); fprintf( fftfp, "Candidate No.%d lag = %d\n", k+1, lag ); fprintf( fftfp, "%s\n", tmpptr1[0] ); fprintf( fftfp, "%s\n", tmpptr2[0] ); fclose( fftfp ); #endif tmpint = alignableReagion( clus1, clus2, tmpptr1, tmpptr2, eff1, eff2, segment+count ); if( count+tmpint > MAXSEG -3 ) ErrorExit( "TOO MANY SEGMENTS.\n" ); while( tmpint-- > 0 ) { if( lag > 0 ) { segment1[count].start = segment[count].start ; segment1[count].end = segment[count].end ; segment1[count].center = segment[count].center; segment1[count].score = segment[count].score; segment2[count].start = segment[count].start + lag; segment2[count].end = segment[count].end + lag; segment2[count].center = segment[count].center + lag; segment2[count].score = segment[count].score ; } else { segment1[count].start = segment[count].start - lag; segment1[count].end = segment[count].end - lag; segment1[count].center = segment[count].center - lag; segment1[count].score = segment[count].score ; segment2[count].start = segment[count].start ; segment2[count].end = segment[count].end ; segment2[count].center = segment[count].center; segment2[count].score = segment[count].score ; } #if 0 fftfp = fopen( "cand", "a" ); fprintf( fftfp, "Goukaku=%dko\n", tmpint ); fprintf( fftfp, "in 1 %d\n", segment1[count].center ); fprintf( fftfp, "in 2 %d\n", segment2[count].center ); fclose( fftfp ); #endif segment1[count].pair = &segment2[count]; segment2[count].pair = &segment1[count]; count++; #if 0 fprintf( stderr, "count=%d\n", count ); #endif } } #if 1 if( !kobetsubunkatsu ) fprintf( stderr, "%d segments found\n", count ); #endif if( !count && fftNoAnchStop ) ErrorExit( "Cannot detect anchor!" ); #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT before sort:\n" ); for( l=0; l<count; l++ ) { fprintf( fftfp, "cut[%d]=%d, ", l, segment1[l].center ); fprintf( fftfp, "%d score = %f\n", segment2[l].center, segment1[l].score ); } fclose( fftfp ); #endif #if KEIKA fprintf( stderr, "Aligning anchors ... " ); #endif for( i=0; i<count; i++ ) { sortedseg1[i] = &segment1[i]; sortedseg2[i] = &segment2[i]; } #if 0 tmpsort( count, sortedseg1 ); tmpsort( count, sortedseg2 ); qsort( sortedseg1, count, sizeof( Segment * ), segcmp ); qsort( sortedseg2, count, sizeof( Segment * ), segcmp ); #else mymergesort( 0, count-1, sortedseg1 ); mymergesort( 0, count-1, sortedseg2 ); #endif for( i=0; i<count; i++ ) sortedseg1[i]->number = i; for( i=0; i<count; i++ ) sortedseg2[i]->number = i; if( kobetsubunkatsu ) { for( i=0; i<count; i++ ) { cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } cut1[0] = 0; cut2[0] = 0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; } else { if( crossscoresize < count+2 ) { crossscoresize = count+2; #if 1 fprintf( stderr, "######allocating crossscore, size = %d\n", crossscoresize ); #endif if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize ); } for( i=0; i<count+2; i++ ) for( j=0; j<count+2; j++ ) crossscore[i][j] = 0.0; for( i=0; i<count; i++ ) { crossscore[segment1[i].number+1][segment1[i].pair->number+1] = segment1[i].score; cut1[i+1] = sortedseg1[i]->center; cut2[i+1] = sortedseg2[i]->center; } #if DEBUG fprintf( stderr, "AFTER SORT\n" ); for( i=0; i<count; i++ ) fprintf( stderr, "%d, %d\n", segment1[i].start, segment2[i].start ); #endif crossscore[0][0] = 10000000.0; cut1[0] = 0; cut2[0] = 0; crossscore[count+1][count+1] = 10000000.0; cut1[count+1] = len1; cut2[count+1] = len2; count += 2; count0 = count; blockAlign2( cut1, cut2, sortedseg1, sortedseg2, crossscore, &count ); if( count0 > count ) { #if 0 fprintf( stderr, "\7 REPEAT!? \n" ); #else fprintf( stderr, "REPEAT!? \n" ); #endif if( fftRepeatStop ) exit( 1 ); } #if KEIKA else fprintf( stderr, "done\n" ); #endif } #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l<count; l++ ) { fprintf( fftfp, "cut[%d]=%d, ", l, segment1[l].center ); fprintf( fftfp, "%d\n", segment2[l].center ); } fclose( fftfp ); #endif #if 0 fftfp = fopen( "fft", "a" ); fprintf( fftfp, "RESULT after sort:\n" ); for( l=0; l<count; l++ ) { fprintf( fftfp, "cut : %d %d\n", cut1[l], cut2[l] ); } fclose( fftfp ); #endif #if KEIKA fprintf( trap_g, "Devided to %d segments\n", count-1 ); fprintf( trap_g, "%d %d forg\n", MIN( clus1, clus2 ), count-1 ); #endif totallen = 0; for( j=0; j<clus1; j++ ) result1[j][0] = 0; for( j=0; j<clus2; j++ ) result2[j][0] = 0; totalscore = 0.0; *totalimpmatch = 0.0; for( i=0; i<count-1; i++ ) { #if DEBUG fprintf( stderr, "DP %03d / %03d %4d to ", i+1, count-1, totallen ); #else #if KEIKA fprintf( stderr, "DP %03d / %03d\r", i+1, count-1 ); #endif #endif if( cut1[i] ) { getkyokaigap( sgap1, seq1, cut1[i]-1, clus1 ); getkyokaigap( sgap2, seq2, cut2[i]-1, clus2 ); } else { for( j=0; j<clus1; j++ ) sgap1[j] = 'o'; for( j=0; j<clus2; j++ ) sgap2[j] = 'o'; } if( cut1[i+1] != len1 ) { getkyokaigap( egap1, seq1, cut1[i+1], clus1 ); getkyokaigap( egap2, seq2, cut2[i+1], clus2 ); } else { for( j=0; j<clus1; j++ ) egap1[j] = 'o'; for( j=0; j<clus2; j++ ) egap2[j] = 'o'; } for( j=0; j<clus1; j++ ) { strncpy( tmpres1[j], seq1[j]+cut1[i], cut1[i+1]-cut1[i] ); tmpres1[j][cut1[i+1]-cut1[i]] = 0; } if( kobetsubunkatsu ) commongappick_record( clus1, tmpres1, gapmap1 ); for( j=0; j<clus2; j++ ) { strncpy( tmpres2[j], seq2[j]+cut2[i], cut2[i+1]-cut2[i] ); tmpres2[j][cut2[i+1]-cut2[i]] = 0; } if( kobetsubunkatsu ) commongappick_record( clus2, tmpres2, gapmap2 ); #if 0 fprintf( stderr, "count = %d\n", count ); fprintf( stderr, "### reg1 = %d-%d\n", cut1[i], cut1[i+1]-1 ); fprintf( stderr, "### reg2 = %d-%d\n", cut2[i], cut2[i+1]-1 ); #endif switch( alg ) { case( 'a' ): totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen ); break; case( 'Q' ): totalscore += partQ__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2 ); *totalimpmatch += impmatch; // fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch ); break; case( 'A' ): totalscore += partA__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres ); *totalimpmatch += impmatch; // fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch ); break; default: fprintf( stderr, "alg = %c\n", alg ); ErrorExit( "ERROR IN SOURCE FILE Falign.c" ); break; } #ifdef enablemultithread if( chudanres && *chudanres ) { // fprintf( stderr, "\n\n## CHUUDAN!!! at Falign_localhom\n" ); return( -1.0 ); } #endif nlen = strlen( tmpres1[0] ); if( totallen + nlen > alloclen ) { fprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\n", totallen, nlen, alloclen ); ErrorExit( "LENGTH OVER in Falign\n " ); } for( j=0; j<clus1; j++ ) strcat( result1[j], tmpres1[j] ); for( j=0; j<clus2; j++ ) strcat( result2[j], tmpres2[j] ); totallen += nlen; #if 0 fprintf( stderr, "%4d\r", totallen ); fprintf( stderr, "\n\n" ); for( j=0; j<clus1; j++ ) { fprintf( stderr, "%s\n", tmpres1[j] ); } fprintf( stderr, "-------\n" ); for( j=0; j<clus2; j++ ) { fprintf( stderr, "%s\n", tmpres2[j] ); } #endif } #if KEIKA fprintf( stderr, "DP ... done \n" ); #endif for( j=0; j<clus1; j++ ) strcpy( seq1[j], result1[j] ); for( j=0; j<clus2; j++ ) strcpy( seq2[j], result2[j] ); #if 0 for( j=0; j<clus1; j++ ) { fprintf( stderr, "%s\n", result1[j] ); } fprintf( stderr, "- - - - - - - - - - -\n" ); for( j=0; j<clus2; j++ ) { fprintf( stderr, "%s\n", result2[j] ); } #endif return( totalscore ); }