PRIVATE void make_pscores(vrna_fold_compound_t *vc){ /* calculate co-variance bonus for each pair depending on */ /* compensatory/consistent mutations and incompatible seqs */ /* should be 0 for conserved pairs, >0 for good pairs */ #define NONE -10000 /* score for forbidden pairs */ char *structure = NULL; int i,j,k,l,s, max_span, turn; float **dm; int olddm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */ {0,0,2,2,1,2,2} /* CG */, {0,2,0,1,2,2,2} /* GC */, {0,2,1,0,2,1,2} /* GU */, {0,1,2,2,0,2,1} /* UG */, {0,2,2,1,2,0,2} /* AU */, {0,2,2,2,1,2,0} /* UA */}; short **S = vc->S; char **AS = vc->sequences; int n_seq = vc->n_seq; vrna_md_t *md = (vc->params) ? &(vc->params->model_details) : &(vc->exp_params->model_details); int *pscore = vc->pscore; /* precomputed array of pair types */ int *indx = vc->jindx; int *my_iindx = vc->iindx; int n = vc->length; turn = md->min_loop_size; if (md->ribo) { if (RibosumFile !=NULL) dm=readribosum(RibosumFile); else dm=get_ribosum((const char **)AS, n_seq, n); } else { /*use usual matrix*/ dm = vrna_alloc(7*sizeof(float*)); for (i=0; i<7;i++) { dm[i] = vrna_alloc(7*sizeof(float)); for (j=0; j<7; j++) dm[i][j] = (float) olddm[i][j]; } } max_span = md->max_bp_span; if((max_span < turn+2) || (max_span > n)) max_span = n; for (i=1; i<n; i++) { for (j=i+1; (j<i+turn+1) && (j<=n); j++) pscore[indx[j]+i] = NONE; for (j=i+turn+1; j<=n; j++) { int pfreq[8]={0,0,0,0,0,0,0,0}; double score; for (s=0; s<n_seq; s++) { int type; if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */ else { if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7; else type = md->pair[S[s][i]][S[s][j]]; } pfreq[type]++; } if (pfreq[0]*2+pfreq[7]>n_seq) { pscore[indx[j]+i] = NONE; continue;} for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */ for (l=k; l<=6; l++) score += pfreq[k]*pfreq[l]*dm[k][l]; /* counter examples score -1, gap-gap scores -0.25 */ pscore[indx[j]+i] = md->cv_fact * ((UNIT*score)/n_seq - md->nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25)); if((j - i + 1) > max_span){ pscore[indx[j]+i] = NONE; } } } if (md->noLP) /* remove unwanted pairs */ for (k=1; k<n-turn-1; k++) for (l=1; l<=2; l++) { int type,ntype=0,otype=0; i=k; j = i+turn+l; type = pscore[indx[j]+i]; while ((i>=1)&&(j<=n)) { if ((i>1)&&(j<n)) ntype = pscore[indx[j+1]+i-1]; if ((otype<md->cv_fact*MINPSCORE)&&(ntype<md->cv_fact*MINPSCORE)) /* too many counterexamples */ pscore[indx[j]+i] = NONE; /* i.j can only form isolated pairs */ otype = type; type = ntype; i--; j++; } } if (fold_constrained&&(structure!=NULL)) { int psij, hx, hx2, *stack, *stack2; stack = vrna_alloc(sizeof(int)*(n+1)); stack2 = vrna_alloc(sizeof(int)*(n+1)); for(hx=hx2=0, j=1; j<=n; j++) { switch (structure[j-1]) { case 'x': /* can't pair */ for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE; for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; case '(': stack[hx++]=j; /* fallthrough */ case '[': stack2[hx2++]=j; /* fallthrough */ case '<': /* pairs upstream */ for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE; break; case ']': if (hx2<=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraints"); } i = stack2[--hx2]; pscore[indx[j]+i]=NONE; break; case ')': if (hx<=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraints"); } i = stack[--hx]; psij = pscore[indx[j]+i]; /* store for later */ for (k=j; k<=n; k++) for (l=i; l<=j; l++) pscore[indx[k]+l] = NONE; for (l=i; l<=j; l++) for (k=1; k<=i; k++) pscore[indx[l]+k] = NONE; for (k=i+1; k<j; k++) pscore[indx[k]+i] = pscore[indx[j]+k] = NONE; pscore[indx[j]+i] = (psij>0) ? psij : 0; /* fallthrough */ case '>': /* pairs downstream */ for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; } } if (hx!=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraint string"); } free(stack); free(stack2); } /*free dm */ for (i=0; i<7;i++) { free(dm[i]); } free(dm); /* copy over pscores for backward compatibility */ if(vc->pscore_pf_compat){ for(i = 1; i < n; i++) for(j = i; j <= n; j++){ vc->pscore_pf_compat[my_iindx[i] - j] = (short)pscore[indx[j] + i]; } } }
PRIVATE void make_pscores(const short *const* S, const char **AS, int n_seq, const char *structure) { /* calculate co-variance bonus for each pair depending on */ /* compensatory/consistent mutations and incompatible seqs */ /* should be 0 for conserved pairs, >0 for good pairs */ #define NONE -10000 /* score for forbidden pairs */ int n,i,j,k,l,s; double score; int olddm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */ {0,0,2,2,1,2,2} /* CG */, {0,2,0,1,2,2,2} /* GC */, {0,2,1,0,2,1,2} /* GU */, {0,1,2,2,0,2,1} /* UG */, {0,2,2,1,2,0,2} /* AU */, {0,2,2,2,1,2,0} /* UA */}; float **dm; n=S[0][0]; /* length of seqs */ if (ribo) { if (RibosumFile !=NULL) dm=readribosum(RibosumFile); else dm=get_ribosum(AS,n_seq,n); } else { /*use usual matrix*/ dm=(float **)space(7*sizeof(float*)); for (i=0; i<7;i++) { dm[i]=(float *)space(7*sizeof(float)); for (j=0; j<7; j++) dm[i][j] = (float) olddm[i][j]; } } n=S[0][0]; /* length of seqs */ for (i=1; i<n; i++) { for (j=i+1; (j<i+TURN+1) && (j<=n); j++) pscore[indx[j]+i] = NONE; for (j=i+TURN+1; j<=n; j++) { int pfreq[8]={0,0,0,0,0,0,0,0}; for (s=0; s<n_seq; s++) { int type; if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */ else { if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7; else type = pair[S[s][i]][S[s][j]]; } pfreq[type]++; } if (pfreq[0]*2+pfreq[7]>n_seq) { pscore[indx[j]+i] = NONE; continue;} for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */ for (l=k; l<=6; l++) /* scores for replacements between pairtypes */ /* consistent or compensatory mutations score 1 or 2 */ score += pfreq[k]*pfreq[l]*dm[k][l]; /* counter examples score -1, gap-gap scores -0.25 */ pscore[indx[j]+i] = cv_fact * ((UNIT*score)/n_seq - nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25)); } } if (noLonelyPairs) /* remove unwanted pairs */ for (k=1; k<n-TURN-1; k++) for (l=1; l<=2; l++) { int type,ntype=0,otype=0; i=k; j = i+TURN+l; type = pscore[indx[j]+i]; while ((i>=1)&&(j<=n)) { if ((i>1)&&(j<n)) ntype = pscore[indx[j+1]+i-1]; if ((otype<cv_fact*MINPSCORE)&&(ntype<cv_fact*MINPSCORE)) /* too many counterexamples */ pscore[indx[j]+i] = NONE; /* i.j can only form isolated pairs */ otype = type; type = ntype; i--; j++; } } if (fold_constrained&&(structure!=NULL)) { int psij, hx, hx2, *stack, *stack2; stack = (int *) space(sizeof(int)*(n+1)); stack2 = (int *) space(sizeof(int)*(n+1)); for(hx=hx2=0, j=1; j<=n; j++) { switch (structure[j-1]) { case 'x': /* can't pair */ for (l=1; l<j-TURN; l++) pscore[indx[j]+l] = NONE; for (l=j+TURN+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; case '(': stack[hx++]=j; /* fallthrough */ case '[': stack2[hx2++]=j; /* fallthrough */ case '<': /* pairs upstream */ for (l=1; l<j-TURN; l++) pscore[indx[j]+l] = NONE; break; case ']': if (hx2<=0) { fprintf(stderr, "%s\n", structure); nrerror("unbalanced brackets in constraints"); } i = stack2[--hx2]; pscore[indx[j]+i]=NONE; break; case ')': if (hx<=0) { fprintf(stderr, "%s\n", structure); nrerror("unbalanced brackets in constraints"); } i = stack[--hx]; psij = pscore[indx[j]+i]; /* store for later */ for (k=j; k<=n; k++) for (l=i; l<=j; l++) pscore[indx[k]+l] = NONE; for (l=i; l<=j; l++) for (k=1; k<=i; k++) pscore[indx[l]+k] = NONE; for (k=i+1; k<j; k++) pscore[indx[k]+i] = pscore[indx[j]+k] = NONE; pscore[indx[j]+i] = (psij>0) ? psij : 0; /* fallthrough */ case '>': /* pairs downstream */ for (l=j+TURN+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; } } if (hx!=0) { fprintf(stderr, "%s\n", structure); nrerror("unbalanced brackets in constraint string"); } free(stack); free(stack2); } /*free dm */ for (i=0; i<7;i++) { free(dm[i]); } free(dm); }