PRIVATE char *backtrack_XS(int k, int l, const int i, const int j, const int max_interaction_length) { /* backtrack structure going backwards from i, and forwards from j return structure in bracket notation with & as separator */ int p, q, type, type2, E, traced, i0, j0; char *st1, *st2, *struc; st1 = (char *) vrna_alloc(sizeof(char)*(i-k+2)); st1[i-k+1]='\0'; st2 = (char *) vrna_alloc(sizeof(char)*(l-j+2)); st2[l-j+1]='\0'; i0=k; j0=l; while (k<=i && l>=j) { E = c3[j-11][max_interaction_length-i+k-1][l-j]; traced=0; st1[k-i0] = '('; st2[l-j] = ')'; type=ptype[indx[l]+k]; if (!type) vrna_message_error("backtrack failed in fold duplex bli"); for (p=k+1; p<=i; p++) { for (q=l-1; q>=j; q--) { int LE; if (p-k+l-q-2>MAXLOOP) break; type2=ptype[indx[q]+p]; if (!type2) continue; LE = E_IntLoop(p-k-1, l-q-1, type, rtype[type2], SS1[k+1], SS1[l-1], SS1[p-1], SS1[q+1], P); if (E == c3[j-11][max_interaction_length-i+p-1][q-j]+LE) { traced=1; k=p; l=q; break; } } if (traced) break; } if (!traced) { E-=E_ExtLoop(type2, ((k<i)?SS1[k+1]:-1), ((l>j-1)? SS1[l-1]:-1), P); break; if (E != P->DuplexInit) { vrna_message_error("backtrack failed in fold duplex bal"); } else break; } } struc = (char *) vrna_alloc(k-i0+1+j0-l+1+2); for (p=0; p<=i-i0; p++){ if (!st1[p]) st1[p] = '.'; } for (p=0; p<=j0-j; p++) { if (!st2[p]) { st2[p] = '.'; } } strcpy(struc, st1); strcat(struc, "&"); strcat(struc, st2); free(st1); free(st2); return struc; }
PUBLIC vrna_plist_t * stackProb(double cutoff){ if(!(backward_compat_compound && backward_compat)){ vrna_message_error("stackProb: run pf_fold() first!"); } else if( !backward_compat_compound->exp_matrices->probs){ vrna_message_error("stackProb: probs==NULL!"); } return vrna_stack_prob(backward_compat_compound, cutoff); }
PRIVATE int decode(char *id) { int n, quit, i; char label[100], *code; n = 0; quit = 0; code = coding; while (!quit) { for (i = 0; code[i] != sep; i++) { if (code[i] == '\0') { quit = 1; break; } label[i] = code[i]; } label[i] = '\0'; if (strcmp(id, label) == 0) return (n); code += (i+1); n++; } vrna_message_error("Syntax error: node identifier \"%s\" not found " "in coding string \"%s\"\n" "Exiting", id, coding); exit(0); }
PUBLIC void * vrna_alloc(unsigned size){ void *pointer; if ( (pointer = (void *) calloc(1, (size_t) size)) == NULL) { #ifdef EINVAL if (errno==EINVAL) { fprintf(stderr,"vrna_alloc: requested size: %d\n", size); vrna_message_error("Memory allocation failure -> EINVAL"); } if (errno==ENOMEM) #endif vrna_message_error("Memory allocation failure -> no memory"); } return pointer; }
PUBLIC void * vrna_realloc(void *p, unsigned size){ if (p == NULL) return vrna_alloc(size); p = (void *) realloc(p, size); if (p == NULL) { #ifdef EINVAL if (errno==EINVAL) { fprintf(stderr,"vrna_realloc: requested size: %d\n", size); vrna_message_error("vrna_realloc allocation failure -> EINVAL"); } if (errno==ENOMEM) #endif vrna_message_error("vrna_realloc allocation failure -> no memory"); } return p; }
PUBLIC char * centroid( int length, double *dist) { if (pr==NULL) vrna_message_error("pr==NULL. You need to call pf_fold() before centroid()"); return vrna_centroid_from_probs(length, dist, pr); }
PUBLIC double mean_bp_distance(int length){ if(backward_compat_compound) if(backward_compat_compound->exp_matrices) if(backward_compat_compound->exp_matrices->probs) return vrna_mean_bp_distance(backward_compat_compound); vrna_message_error("mean_bp_distance: you need to call vrna_pf_fold first"); return 0.; /* we will never get to this point */ }
PRIVATE char *get_array1(int *arr, int size) { int i, p, pos, pp, r, last; char *line, buf[16]; i = last = 0; while( i<size ) { line = get_line(fp); if (!line) vrna_message_error("unexpected end of file in get_array1"); ignore_comment(line); pos=0; while ((i<size)&&(sscanf(line+pos,"%15s%n", buf, &pp)==1)) { pos += pp; if (buf[0]=='*') {i++; continue;} else if (buf[0]=='x') { /* should only be used for loop parameters */ if (i==0) vrna_message_error("can't extrapolate first value"); p = arr[last] + (int) (0.5+ lxc37*log(((double) i)/(double)(last))); } else if (strcmp(buf,"DEF") == 0) p = DEF; else if (strcmp(buf,"INF") == 0) p = INF; else if (strcmp(buf,"NST") == 0) p = NST; else { r=sscanf(buf,"%d", &p); if (r!=1) { return line+pos; fprintf(stderr, "can't interpret `%s' in get_array1\n", buf); exit(1); } last = i; } arr[i++]=p; } free(line); } return NULL; }
PUBLIC void alloc_sequence_arrays(const char **sequences, short ***S, short ***S5, short ***S3, unsigned short ***a2s, char ***Ss, int circ){ unsigned int s, n_seq, length; if(sequences[0] != NULL){ length = strlen(sequences[0]); for (s=0; sequences[s] != NULL; s++); n_seq = s; *S = (short **) vrna_alloc((n_seq+1) * sizeof(short *)); *S5 = (short **) vrna_alloc((n_seq+1) * sizeof(short *)); *S3 = (short **) vrna_alloc((n_seq+1) * sizeof(short *)); *a2s = (unsigned short **) vrna_alloc((n_seq+1) * sizeof(unsigned short *)); *Ss = (char **) vrna_alloc((n_seq+1) * sizeof(char *)); for (s=0; s<n_seq; s++) { if(strlen(sequences[s]) != length) vrna_message_error("uneqal seqence lengths"); (*S5)[s] = (short *) vrna_alloc((length + 2) * sizeof(short)); (*S3)[s] = (short *) vrna_alloc((length + 2) * sizeof(short)); (*a2s)[s] = (unsigned short *)vrna_alloc((length + 2) * sizeof(unsigned short)); (*Ss)[s] = (char *) vrna_alloc((length + 2) * sizeof(char)); (*S)[s] = (short *) vrna_alloc((length + 2) * sizeof(short)); encode_ali_sequence(sequences[s], (*S)[s], (*S5)[s], (*S3)[s], (*Ss)[s], (*a2s)[s], circ); } (*S5)[n_seq] = NULL; (*S3)[n_seq] = NULL; (*a2s)[n_seq] = NULL; (*Ss)[n_seq] = NULL; (*S)[n_seq] = NULL; } else vrna_message_error("alloc_sequence_arrays: no sequences in the alignment!"); }
PUBLIC double mean_bp_distance_pr(int length, FLT_OR_DBL *p){ double d=0; int *index = vrna_idx_row_wise((unsigned int) length); if (p==NULL) vrna_message_error("p==NULL. You need to supply a valid probability matrix for mean_bp_distance_pr()"); d = wrap_mean_bp_distance(p, length, index, TURN); free(index); return d; }
/* get the free energy of a subsequence from the q[] array */ PUBLIC double get_subseq_F( int i, int j){ if(backward_compat_compound) if(backward_compat_compound->exp_matrices) if(backward_compat_compound->exp_matrices->q){ int *my_iindx = backward_compat_compound->iindx; vrna_exp_param_t *pf_params = backward_compat_compound->exp_params; FLT_OR_DBL *q = backward_compat_compound->exp_matrices->q; return ((-log(q[my_iindx[i]-j])-(j-i+1)*log(pf_params->pf_scale))*pf_params->kT/1000.0); } vrna_message_error("call pf_fold() to fill q[] array before calling get_subseq_F()"); return 0.; /* we will never get to this point */ }
PRIVATE void ignore_comment(char * line) { /* excise C style comments */ /* only one comment per line, no multiline comments */ char *cp1, *cp2; if ((cp1=strstr(line, "/*"))) { cp2 = strstr(cp1, "*/"); if (cp2==NULL) vrna_message_error("unclosed comment in parameter file"); /* can't use strcpy for overlapping strings */ for (cp2+=2; *cp2!='\0'; cp2++, cp1++) *cp1 = *cp2; *cp1 = '\0'; } return; }
PUBLIC char *settype(enum parset s){ switch(s){ case S: return "stack"; case S_H: return "stack_enthalpies"; case HP: return "hairpin"; case HP_H: return "hairpin_enthalpies"; case B: return "bulge"; case B_H: return "bulge_enthalpies"; case IL: return "interior"; case IL_H: return "interior_enthalpies"; case MME: return "mismatch_exterior"; case MME_H: return "mismatch_exterior_enthalpies"; case MMH: return "mismatch_hairpin"; case MMH_H: return "mismatch_hairpin_enthalpies"; case MMI: return "mismatch_interior"; case MMI_H: return "mismatch_interior_enthalpies"; case MMI1N: return "mismatch_interior_1n"; case MMI1N_H: return "mismatch_interior_1n_enthalpies"; case MMI23: return "mismatch_interior_23"; case MMI23_H: return "mismatch_interior_23_enthalpies"; case MMM: return "mismatch_multi"; case MMM_H: return "mismatch_multi_enthalpies"; case D5: return "dangle5"; case D5_H: return "dangle5_enthalpies"; case D3: return "dangle3"; case D3_H: return "dangle3_enthalpies"; case INT11: return "int11"; case INT11_H: return "int11_enthalpies"; case INT21: return "int21"; case INT21_H: return "int21_enthalpies"; case INT22: return "int22"; case INT22_H: return "int22_enthalpies"; case ML: return "ML_params"; case NIN: return "NINIO"; case TRI: return "Triloops"; case TL: return "Tetraloops"; case HEX: return "Hexaloops"; case QUIT: return "END"; case MISC: return "Misc"; default: vrna_message_error("\nThe answer is: 42\n"); } return ""; }
PRIVATE double PrfEditCost(int i, int j, const float *T1, const float *T2) { double dist; int k, kmax; kmax = (int) T1[1]; if ((int) T2[1] != kmax) vrna_message_error("inconsistent Profiles in PrfEditCost"); if(i==0) { for(dist = 0. ,k=0 ; k<kmax ; k++) dist += T2[j*kmax+k]; } if(j==0) { for(dist = 0. ,k=0 ; k<kmax ; k++) dist += T1[i*kmax+k]; } if((i>0)&&(j>0)) { for(dist = 2.,k=0; k<kmax; k++) dist -= 2.*average(T1[i*kmax+k],T2[j*kmax+k]); } return dist; }
PUBLIC double mean_bp_dist(int length) { /* compute the mean base pair distance in the thermodynamic ensemble */ /* <d> = \sum_{a,b} p_a p_b d(S_a,S_b) this can be computed from the pair probs p_ij as <d> = \sum_{ij} p_{ij}(1-p_{ij}) */ int i, j, *my_iindx; double d = 0; if (pr==NULL) vrna_message_error("pr==NULL. You need to call pf_fold() before mean_bp_dist()"); my_iindx = vrna_idx_row_wise(length); for (i=1; i<=length; i++) for (j=i+TURN+1; j<=length; j++) d += pr[my_iindx[i]-j] * (1-pr[my_iindx[i]-j]); free(my_iindx); return 2*d; }
PRIVATE void usage(void) { vrna_message_error("usage: AnalyseSeqs [-X[bswnm]] [-Q] [-M{mask}] \n" " [-D{H|A[,cost]|G[,cost1,cost2]}] [-d{D|B|H|S}]"); exit(0); }
PRIVATE void pf_linear(vrna_fold_compound_t *vc){ char *hard_constraints; int n, i,j, k, ij, d, *my_iindx, *jindx, with_gquad, turn, with_ud, hc_decompose; FLT_OR_DBL temp, Qmax, qbt1, *q, *qb, *qm, *qm1, *q1k, *qln; double max_real; vrna_ud_t *domains_up; vrna_md_t *md; vrna_hc_t *hc; vrna_mx_pf_t *matrices; vrna_mx_pf_aux_el_t *aux_mx_el; vrna_mx_pf_aux_ml_t *aux_mx_ml; vrna_exp_param_t *pf_params; n = vc->length; my_iindx = vc->iindx; jindx = vc->jindx; matrices = vc->exp_matrices; pf_params = vc->exp_params; hc = vc->hc; domains_up = vc->domains_up; q = matrices->q; qb = matrices->qb; qm = matrices->qm; qm1 = matrices->qm1; q1k = matrices->q1k; qln = matrices->qln; md = &(pf_params->model_details); with_gquad = md->gquad; turn = md->min_loop_size; hard_constraints = hc->matrix; with_ud = (domains_up && domains_up->exp_energy_cb); Qmax = 0; max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX; if (with_ud && domains_up->exp_prod_cb) domains_up->exp_prod_cb(vc, domains_up->data); if(with_gquad){ free(vc->exp_matrices->G); vc->exp_matrices->G = get_gquad_pf_matrix(vc->sequence_encoding2, vc->exp_matrices->scale, vc->exp_params); } /* init auxiliary arrays for fast exterior/multibranch loops */ aux_mx_el = vrna_exp_E_ext_fast_init(vc); aux_mx_ml = vrna_exp_E_ml_fast_init(vc); /*array initialization ; qb,qm,q qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */ for (d=0; d<=turn; d++) for (i=1; i<=n-d; i++) { j=i+d; ij = my_iindx[i]-j; qb[ij] = 0.0; } for (j = turn + 2; j <= n; j++) { for (i = j - turn - 1; i >= 1; i--) { /* construction of partition function of segment i,j */ /* firstly that given i binds j : qb(i,j) */ ij = my_iindx[i] - j; hc_decompose = hard_constraints[jindx[j] + i]; qbt1 = 0; if(hc_decompose){ /* process hairpin loop(s) */ qbt1 += vrna_exp_E_hp_loop(vc, i, j); /* process interior loop(s) */ qbt1 += vrna_exp_E_int_loop(vc, i, j); /* process multibranch loop(s) */ qbt1 += vrna_exp_E_mb_loop_fast(vc, i, j, aux_mx_ml->qqm1); } qb[ij] = qbt1; /* Multibranch loop */ qm[ij] = vrna_exp_E_ml_fast(vc, i, j, aux_mx_ml); if (qm1) qm1[jindx[j] + i] = aux_mx_ml->qqm[i]; /* for stochastic backtracking and circfold */ /* Exterior loop */ q[ij] = temp = vrna_exp_E_ext_fast(vc, i, j, aux_mx_el); if (temp>Qmax) { Qmax = temp; if (Qmax>max_real/10.) vrna_message_warning("Q close to overflow: %d %d %g", i,j,temp); } if (temp>=max_real) { vrna_message_error("overflow in pf_fold while calculating q[%d,%d]\n" "use larger pf_scale", i,j); } } /* rotate auxiliary arrays */ vrna_exp_E_ext_fast_rotate(vc, aux_mx_el); vrna_exp_E_ml_fast_rotate(vc, aux_mx_ml); } /* prefill linear qln, q1k arrays */ if(q1k && qln){ for (k=1; k<=n; k++) { q1k[k] = q[my_iindx[1] - k]; qln[k] = q[my_iindx[k] - n]; } q1k[0] = 1.0; qln[n+1] = 1.0; } /* free memory occupied by auxiliary arrays for fast exterior/multibranch loops */ vrna_exp_E_ml_fast_free(vc, aux_mx_ml); vrna_exp_E_ext_fast_free(vc, aux_mx_el); }
PRIVATE void alipf_linear( vrna_fold_compound_t *vc){ char *hard_constraints; int i,j, ij, jij, d, turn, n, *my_iindx, *jindx, *pscore; FLT_OR_DBL temp, Qmax, qbt1, *q, *qb, *qm, *qm1; double kTn, max_real; vrna_exp_param_t *pf_params; vrna_mx_pf_t *matrices; vrna_mx_pf_aux_el_t *aux_mx_el; vrna_mx_pf_aux_ml_t *aux_mx_ml; vrna_md_t *md; vrna_hc_t *hc; n = vc->length; pf_params = vc->exp_params; matrices = vc->exp_matrices; hc = vc->hc; my_iindx = vc->iindx; jindx = vc->jindx; pscore = vc->pscore; /* precomputed array of pair types */ md = &(pf_params->model_details); q = matrices->q; qb = matrices->qb; qm = matrices->qm; qm1 = matrices->qm1; hard_constraints = hc->matrix; turn = md->min_loop_size; kTn = pf_params->kT/10.; /* kT in cal/mol */ Qmax = 0.; max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX; /* init auxiliary arrays for fast exterior/multibranch loops */ aux_mx_el = vrna_exp_E_ext_fast_init(vc); aux_mx_ml = vrna_exp_E_ml_fast_init(vc); /* array initialization ; qb,qm,q qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */ for (d = 0; d <= turn; d++) for (i = 1; i <= n - d; i++) { j = i + d; ij = my_iindx[i]-j; qb[ij] = 0.0; } for (j = turn + 2; j <= n; j++) { for (i = j - turn - 1; i >= 1; i--) { int psc; /* construction of partition function for segment i,j */ /* calculate pf given that i and j pair: qb(i,j) */ ij = my_iindx[i] - j; jij = jindx[j] + i; psc = pscore[jij]; qbt1 = 0.; if (hard_constraints[jij]) { /* process hairpin loop(s) */ qbt1 += vrna_exp_E_hp_loop(vc, i, j); /* process interior loop(s) */ qbt1 += vrna_exp_E_int_loop(vc, i, j); /* process multibranch loop(s) */ qbt1 += vrna_exp_E_mb_loop_fast(vc, i, j, aux_mx_ml->qqm1); qbt1 *= exp(psc/kTn); } qb[ij] = qbt1; /* Multibranch loop */ qm[ij] = vrna_exp_E_ml_fast(vc, i, j, aux_mx_ml); if (qm1) qm1[jindx[j] + i] = aux_mx_ml->qqm[i]; /* for stochastic backtracking and circfold */ /* Exterior loop */ q[ij] = temp = vrna_exp_E_ext_fast(vc, i, j, aux_mx_el); if (temp > Qmax) { Qmax = temp; if (Qmax > max_real/10.) vrna_message_warning("Q close to overflow: %d %d %g", i,j,temp); } if (temp >= max_real) { vrna_message_error("overflow in pf_fold while calculating q[%d,%d]\n" "use larger pf_scale", i,j); } } /* rotate auxiliary arrays */ vrna_exp_E_ext_fast_rotate(vc, aux_mx_el); vrna_exp_E_ml_fast_rotate(vc, aux_mx_ml); } /* free memory occupied by auxiliary arrays for fast exterior/multibranch loops */ vrna_exp_E_ml_fast_free(vc, aux_mx_ml); vrna_exp_E_ext_fast_free(vc, aux_mx_el); }
PRIVATE void make_pscores(vrna_fold_compound_t *vc){ /* calculate co-variance bonus for each pair depending on */ /* compensatory/consistent mutations and incompatible seqs */ /* should be 0 for conserved pairs, >0 for good pairs */ #define NONE -10000 /* score for forbidden pairs */ char *structure = NULL; int i,j,k,l,s, max_span, turn; float **dm; int olddm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */ {0,0,2,2,1,2,2} /* CG */, {0,2,0,1,2,2,2} /* GC */, {0,2,1,0,2,1,2} /* GU */, {0,1,2,2,0,2,1} /* UG */, {0,2,2,1,2,0,2} /* AU */, {0,2,2,2,1,2,0} /* UA */}; short **S = vc->S; char **AS = vc->sequences; int n_seq = vc->n_seq; vrna_md_t *md = (vc->params) ? &(vc->params->model_details) : &(vc->exp_params->model_details); int *pscore = vc->pscore; /* precomputed array of pair types */ int *indx = vc->jindx; int *my_iindx = vc->iindx; int n = vc->length; turn = md->min_loop_size; if (md->ribo) { if (RibosumFile !=NULL) dm=readribosum(RibosumFile); else dm=get_ribosum((const char **)AS, n_seq, n); } else { /*use usual matrix*/ dm = vrna_alloc(7*sizeof(float*)); for (i=0; i<7;i++) { dm[i] = vrna_alloc(7*sizeof(float)); for (j=0; j<7; j++) dm[i][j] = (float) olddm[i][j]; } } max_span = md->max_bp_span; if((max_span < turn+2) || (max_span > n)) max_span = n; for (i=1; i<n; i++) { for (j=i+1; (j<i+turn+1) && (j<=n); j++) pscore[indx[j]+i] = NONE; for (j=i+turn+1; j<=n; j++) { int pfreq[8]={0,0,0,0,0,0,0,0}; double score; for (s=0; s<n_seq; s++) { int type; if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap */ else { if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7; else type = md->pair[S[s][i]][S[s][j]]; } pfreq[type]++; } if (pfreq[0]*2+pfreq[7]>n_seq) { pscore[indx[j]+i] = NONE; continue;} for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */ for (l=k; l<=6; l++) score += pfreq[k]*pfreq[l]*dm[k][l]; /* counter examples score -1, gap-gap scores -0.25 */ pscore[indx[j]+i] = md->cv_fact * ((UNIT*score)/n_seq - md->nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25)); if((j - i + 1) > max_span){ pscore[indx[j]+i] = NONE; } } } if (md->noLP) /* remove unwanted pairs */ for (k=1; k<n-turn-1; k++) for (l=1; l<=2; l++) { int type,ntype=0,otype=0; i=k; j = i+turn+l; type = pscore[indx[j]+i]; while ((i>=1)&&(j<=n)) { if ((i>1)&&(j<n)) ntype = pscore[indx[j+1]+i-1]; if ((otype<md->cv_fact*MINPSCORE)&&(ntype<md->cv_fact*MINPSCORE)) /* too many counterexamples */ pscore[indx[j]+i] = NONE; /* i.j can only form isolated pairs */ otype = type; type = ntype; i--; j++; } } if (fold_constrained&&(structure!=NULL)) { int psij, hx, hx2, *stack, *stack2; stack = vrna_alloc(sizeof(int)*(n+1)); stack2 = vrna_alloc(sizeof(int)*(n+1)); for(hx=hx2=0, j=1; j<=n; j++) { switch (structure[j-1]) { case 'x': /* can't pair */ for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE; for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; case '(': stack[hx++]=j; /* fallthrough */ case '[': stack2[hx2++]=j; /* fallthrough */ case '<': /* pairs upstream */ for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE; break; case ']': if (hx2<=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraints"); } i = stack2[--hx2]; pscore[indx[j]+i]=NONE; break; case ')': if (hx<=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraints"); } i = stack[--hx]; psij = pscore[indx[j]+i]; /* store for later */ for (k=j; k<=n; k++) for (l=i; l<=j; l++) pscore[indx[k]+l] = NONE; for (l=i; l<=j; l++) for (k=1; k<=i; k++) pscore[indx[l]+k] = NONE; for (k=i+1; k<j; k++) pscore[indx[k]+i] = pscore[indx[j]+k] = NONE; pscore[indx[j]+i] = (psij>0) ? psij : 0; /* fallthrough */ case '>': /* pairs downstream */ for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE; break; } } if (hx!=0) { fprintf(stderr, "%s\n", structure); vrna_message_error("unbalanced brackets in constraint string"); } free(stack); free(stack2); } /*free dm */ for (i=0; i<7;i++) { free(dm[i]); } free(dm); /* copy over pscores for backward compatibility */ if(vc->pscore_pf_compat){ for(i = 1; i < n; i++) for(j = i; j <= n; j++){ vc->pscore_pf_compat[my_iindx[i] - j] = (short)pscore[indx[j] + i]; } } }
PUBLIC vrna_fold_compound_t * vrna_fold_compound_TwoD(const char *sequence, const char *s1, const char *s2, vrna_md_t *md_p, unsigned int options){ int length, l, turn; vrna_fold_compound_t *vc; vrna_md_t md; if(sequence == NULL) return NULL; /* sanity check */ length = strlen(sequence); if(length == 0) vrna_message_error("vrna_fold_compound_TwoD: sequence length must be greater 0"); l = strlen(s1); if(l != length) vrna_message_error("vrna_fold_compound_TwoD: sequence and s1 differ in length"); l = strlen(s2); if(l != length) vrna_message_error("vrna_fold_compound_TwoD: sequence and s2 differ in length"); vc = vrna_alloc(sizeof(vrna_fold_compound_t)); vc->type = VRNA_VC_TYPE_SINGLE; vc->length = length; vc->sequence = strdup(sequence); /* get a copy of the model details */ if(md_p) md = *md_p; else /* this fallback relies on global parameters and thus is not threadsafe */ vrna_md_set_default(&md); /* always make uniq ML decomposition ! */ md.uniq_ML = 1; md.compute_bpp = 0; set_fold_compound(vc, &md, options, WITH_PTYPE | WITH_PTYPE_COMPAT); if(!(options & VRNA_OPTION_EVAL_ONLY)){ vrna_hc_init(vc); /* add default hard constraints */ /* add DP matrices */ vrna_mx_add(vc, VRNA_MX_2DFOLD, options); } /* set all fields that are unique to Distance class partitioning... */ turn = vc->params->model_details.min_loop_size; vc->reference_pt1 = vrna_ptable(s1); vc->reference_pt2 = vrna_ptable(s2); vc->referenceBPs1 = vrna_refBPcnt_matrix(vc->reference_pt1, turn); vc->referenceBPs2 = vrna_refBPcnt_matrix(vc->reference_pt2, turn); vc->bpdist = vrna_refBPdist_matrix(vc->reference_pt1, vc->reference_pt2, turn); /* compute maximum matching with reference structure 1 disallowed */ vc->mm1 = maximumMatchingConstraint(vc->sequence, vc->reference_pt1); /* compute maximum matching with reference structure 2 disallowed */ vc->mm2 = maximumMatchingConstraint(vc->sequence, vc->reference_pt2); vc->maxD1 = vc->mm1[vc->iindx[1]-length] + vc->referenceBPs1[vc->iindx[1]-length]; vc->maxD2 = vc->mm2[vc->iindx[1]-length] + vc->referenceBPs2[vc->iindx[1]-length]; return vc; }
PUBLIC vrna_fold_compound_t * vrna_fold_compound_comparative( const char **sequences, vrna_md_t *md_p, unsigned int options){ int s, n_seq, length; vrna_fold_compound_t *vc; vrna_md_t md; unsigned int aux_options; aux_options = 0L; if(sequences == NULL) return NULL; for(s=0;sequences[s];s++); /* count the sequences */ n_seq = s; length = strlen(sequences[0]); /* sanity check */ if(length == 0) vrna_message_error("vrna_fold_compound_comparative: sequence length must be greater 0"); for(s = 0; s < n_seq; s++) if(strlen(sequences[s]) != length) vrna_message_error("vrna_fold_compound_comparative: uneqal sequence lengths in alignment"); vc = vrna_alloc(sizeof(vrna_fold_compound_t)); vc->type = VRNA_VC_TYPE_ALIGNMENT; vc->n_seq = n_seq; vc->length = length; vc->sequences = vrna_alloc(sizeof(char *) * (vc->n_seq + 1)); for(s = 0; sequences[s]; s++) vc->sequences[s] = strdup(sequences[s]); /* get a copy of the model details */ if(md_p) md = *md_p; else /* this fallback relies on global parameters and thus is not threadsafe */ vrna_md_set_default(&md); aux_options |= WITH_PTYPE; if(options & VRNA_OPTION_PF) aux_options |= WITH_PTYPE_COMPAT; set_fold_compound(vc, &md, options, aux_options); make_pscores(vc); if(!(options & VRNA_OPTION_EVAL_ONLY)){ /* add default hard constraints */ vrna_hc_init(vc); /* add DP matrices (if required) */ vrna_mx_add(vc, VRNA_MX_DEFAULT, options); } return vc; }
PUBLIC vrna_fold_compound_t * vrna_fold_compound( const char *sequence, vrna_md_t *md_p, unsigned int options){ unsigned int i, length, aux_options; vrna_fold_compound_t *vc; vrna_md_t md; if(sequence == NULL) return NULL; /* sanity check */ length = strlen(sequence); if(length == 0) vrna_message_error("vrna_fold_compound: sequence length must be greater 0"); vc = vrna_alloc(sizeof(vrna_fold_compound_t)); vc->type = VRNA_VC_TYPE_SINGLE; vc->length = length; vc->sequence = strdup(sequence); aux_options = 0L; /* get a copy of the model details */ if(md_p) md = *md_p; else vrna_md_set_default(&md); if(options & VRNA_OPTION_WINDOW){ /* sliding window structure prediction */ if(md.window_size <= 0) md.window_size = (int)vc->length; else if(md.window_size > (int)vc->length) md.window_size = (int)vc->length; vc->window_size = md.window_size; if((md.max_bp_span <= 0) || (md.max_bp_span > md.window_size)) md.max_bp_span = md.window_size; set_fold_compound(vc, &md, options, aux_options); vc->ptype_local = vrna_alloc(sizeof(char *)*(vc->length+1)); for (i = vc->length; ( i > vc->length - vc->window_size - 5) && (i >= 0); i--){ vc->ptype_local[i] = vrna_alloc(sizeof(char)*(vc->window_size+5)); } if(!(options & VRNA_OPTION_EVAL_ONLY)){ /* add default hard constraints */ /* vrna_hc_init(vc); */ /* no hard constraints in Lfold, yet! */ /* add DP matrices */ vrna_mx_add(vc, VRNA_MX_WINDOW, options); } } else { /* regular global structure prediction */ /* set window size to entire sequence */ md.window_size = (int)vc->length; aux_options |= WITH_PTYPE; if(options & VRNA_OPTION_PF) aux_options |= WITH_PTYPE_COMPAT; set_fold_compound(vc, &md, options, aux_options); if(!(options & VRNA_OPTION_EVAL_ONLY)){ /* add default hard constraints */ vrna_hc_init(vc); /* add DP matrices (if required) */ vrna_mx_add(vc, VRNA_MX_DEFAULT, options); } } return vc; }
int main(int argc, char *argv[]) { int n,i,j,l; int intty; int outtty; char *mask, junk[20]; char **s; char **ss[4]; float *B; float **dm; Split *S; Union *U; char DistAlgorithm='H'; int nn[4]; short Do_Split=0, Do_Wards=0, Do_Stg=1, Do_4_Stg=0, Do_Nj=0, Do_Mat=0; float per_digit, per_gap; mask = vrna_alloc(sizeof(char)*54); strcpy (mask,"%ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); for (i=1; i<argc; i++) { if (argv[i][0]=='-') { switch ( argv[i][1] ) { case 'X': if (argv[i][2]=='\0') { Do_Stg = 1 ; break; } Do_Split = 0; Do_Wards = 0; Do_Stg = 0; Do_Nj = 0; for(j=2;j<strlen(argv[i]);j++) { switch(argv[i][j]) { case 's' : Do_Split = 1; break; case 'w' : Do_Wards = 1; break; case 'b' : Do_Stg = 1; break; case 'n' : Do_Nj = 1; break; case 'm' : Do_Mat = 1; break; default : usage(); } } break; case 'Q': Do_4_Stg = 1; break; case 'M': if(mask) { free(mask); mask = NULL; } switch (argv[i][2] ) { case '\0' : usage(); break; case 'a' : mask = vrna_alloc(sizeof(char)*54); strcpy(mask, "%ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz"); if(argv[i][3]=='+') mask[0] = '~'; /* make case sensitive */ break; case 'u' : mask = vrna_alloc(sizeof(char)*28); strcpy(mask,"~ABCDEFGHIJKLMNOPQRSTUVW"); break; case 'l' : mask = vrna_alloc(sizeof(char)*28); strcpy(mask,"~abcdefghijklmnopqrstuvwxyz"); break; case 'c' : mask = vrna_alloc(sizeof(char)*12); strcpy(mask,"~1234567890"); break; case 'n' : mask = vrna_alloc(sizeof(char)*64); strcpy (mask, "%ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz1234567890"); if(argv[i][3]=='+') mask[0] = '~'; /* make case sensitive */ break; case 'R' : /* RNA */ mask = vrna_alloc(sizeof(char)*10); strcpy(mask,"%GCAUgcau"); if(argv[i][3]=='+') mask[0] = '~'; /* make case sensitive */ break; case 'D' : /* DNA */ mask = vrna_alloc(sizeof(char)*10); strcpy (mask,"%GCATgcat"); if(argv[i][3]=='+') mask[0] = '~'; /* make case sensitive */ break; case 'A' : /* AMINOACIDS */ mask = vrna_alloc(sizeof(char)*42); strcpy(mask,"%ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy"); if(argv[i][3]=='+') mask[0] = '~'; /* make case sensitive */ break; case 'S' : /* SECONDARY STRUCTURES */ mask = vrna_alloc(sizeof(char)*6); strcpy(mask,"~().^"); break; case '%' : /* ARBITRARY ALPHABETS */ l = strlen(argv[i]); if(argv[i][l] == '+'){ mask = vrna_alloc(sizeof(char)*(l-2)); mask[0] = '~'; for(j=1;j<=l-4;j++) mask[j] = argv[i][j+2]; mask[l-3]='\0'; } if(argv[i][l] == '!'){ mask = vrna_alloc(sizeof(char)*(l-2)); mask[0] = '!'; for(j=1;j<=l-4;j++) mask[j] = argv[i][j+2]; mask[l-3]='\0'; } else { mask = vrna_alloc(sizeof(char)*(l-1)); mask[0] = '%'; for(j=1;j<=l-3;j++) mask[j] = argv[i][j+2]; mask[l-2]='\0'; } break; default : usage(); } break; case 'D' : /* choose algorithm */ switch(argv[i][2]) { case '\0' : usage(); break; case 'H' : DistAlgorithm = 'H'; break; case 'A' : DistAlgorithm = 'A'; if(argv[i][3]==',') { per_digit=-1.; sscanf(argv[i],"%[^,],%g",junk,&per_digit); if(per_digit<0.) usage(); per_gap = per_digit; Set_StrEdit_GapCosts(per_digit,per_gap); } break; case 'G' : DistAlgorithm = 'G'; if(argv[i][3]==',') { per_digit=-1.; per_gap =-1.; sscanf(argv[i]+4,"%f,%f",&per_digit,&per_gap); if((per_digit<0.)||(per_gap<0)) usage(); Set_StrEdit_GapCosts(per_digit,per_gap); } break; default: usage(); } break; case 'd' : /* choose distance matrix */ switch(argv[i][2]) { case 'D' : /* Dayhoff Distances */ case 'A' : /* Aminoacid Distance (Hofacker & Borstnik) */ case 'B' : /* RY distances for nucleotides */ case 'H' : /* Hogeweg's Distance for Secondary Structures */ case 'S' : /* Simple Distance (superfluous option) */ Set_StrEdit_CostMatrix(argv[i][2]); break; default : usage(); } break; default : usage(); } } } /* END PARSING OF COMMAND LINE */ intty = isatty(fileno(stdin)); outtty= isatty(fileno(stdout)); if(intty){ if(outtty) { printf("Input sequences; @ to mark end of input\n"); printf("%s%s\n", scale1, scale2); } else { fprintf(stderr,"Input sequences; @ to mark end of input\n"); fprintf(stderr,"%s%s\n", scale1, scale2); } } while ((s=read_sequence_list(&n,mask))!=NULL) { dm = NULL; if(Do_4_Stg) { ss[0] = s; nn[0] = n; for(i=1;i<4;i++) { ss[i] = read_sequence_list(&n,mask); if(ss[i]==NULL) vrna_message_error("read_sequences: wrong or insufficient data."); nn[i] = n; } printf_taxa_list(); B = statgeom4(ss,nn); printf_stg(B); SimplifiedBox(B,"box.ps"); /* This is preliminary !!! */ free(B); for(i=0;i<4;i++){ for(j=0;j<nn[i];j++) free(ss[i][j]); free(ss[i]); } /* free(ss); */ /* attempt to free a non-heap object */ } else { printf_taxa_list(); if(Do_Stg) { B = statgeom(s,n); if (B) { printf_stg(B); SimplifiedBox(B,"box.ps"); free(B); } } if((Do_Split)||(Do_Wards)||(Do_Nj)||(Do_Mat)) { switch(DistAlgorithm) { case 'H' : dm = Hamming_Distance_Matrix(s,n); printf("> %s\n","H (Hamming Distance)"); break; case 'A' : dm = StrEdit_SimpleDistMatrix(s,n); printf("> %s\n","A (Needleman-Wunsch Distance)"); break; case 'G' : dm = StrEdit_GotohDistMatrix(s,n); printf("> %s\n","G (Gotoh Distance)"); break; default: vrna_message_error("This can't happen."); } } if(Do_Split) { S = split_decomposition(dm); sort_Split(S); print_Split(S); free_Split(S); } if(Do_Wards) { U = wards_cluster(dm); printf_phylogeny(U,"W"); PSplot_phylogeny(U,"wards.ps","Ward's Method"); free(U); } if(Do_Nj) { U = neighbour_joining(dm); printf_phylogeny(U,"Nj"); PSplot_phylogeny(U,"nj.ps","Neighbor Joining"); free(U); } if(Do_Mat) printf_distance_matrix(dm); } if (dm!=NULL) free_distance_matrix(dm); for(i=0;i<n;i++) free(s[i]); free(s); } return 0; }
PUBLIC void nrerror(const char message[]){ vrna_message_error(message); }
int main(int argc, char *argv[]){ struct RNAplot_args_info args_info; char *structure, *pre, *post; char fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH]; char *rec_sequence, *rec_id, **rec_rest; int i, length; int istty; char format[5]="ps"; unsigned int rec_type, read_opt; vrna_md_t md; structure = pre = post = NULL; length = 0; vrna_md_set_default(&md); /* ############################################# # check the command line parameters ############################################# */ if(RNAplot_cmdline_parser (argc, argv, &args_info) != 0) exit(1); if(args_info.layout_type_given) rna_plot_type = args_info.layout_type_arg; if(args_info.pre_given) pre = strdup(args_info.pre_arg); if(args_info.post_given) post = strdup(args_info.post_arg); if(args_info.output_format_given){ strncpy(format, args_info.output_format_arg, 4); format[4] = '\0'; } /* free allocated memory of command line data structure */ RNAplot_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ rec_type = read_opt = 0; rec_id = rec_sequence = NULL; rec_rest = NULL; istty = isatty(fileno(stdout)) && isatty(fileno(stdin)); /* set options we wanna pass to vrna_file_fasta_read_record() */ if(istty){ read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES; vrna_message_input_seq("Input sequence (upper or lower case) followed by structure"); } /* ############################################# # main loop: continue until end of file ############################################# */ while( !((rec_type = vrna_file_fasta_read_record(&rec_id, &rec_sequence, &rec_rest, NULL, read_opt)) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ if(rec_id){ (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname); } else fname[0] = '\0'; length = (int)strlen(rec_sequence); structure = vrna_extract_record_rest_structure((const char **)rec_rest, 0, (rec_id) ? VRNA_OPTION_MULTILINE : 0); if(!structure) vrna_message_error("structure missing"); if((int)strlen(structure) != length) vrna_message_error("structure and sequence differ in length!"); if (fname[0]!='\0'){ strcpy(ffname, fname); strcat(ffname, "_ss"); } else strcpy(ffname, "rna"); structure = NULL; unsigned int struct_options = (rec_id) ? VRNA_OPTION_MULTILINE : 0; structure = vrna_extract_record_rest_structure((const char **)rec_rest, 0, struct_options); if(strlen(rec_sequence) != strlen(structure)) vrna_message_error("sequence and structure have unequal length"); switch (format[0]) { case 'p': strcat(ffname, ".ps"); (void) vrna_file_PS_rnaplot_a(rec_sequence, structure, ffname, pre, post, &md); break; case 'g': strcat(ffname, ".gml"); gmlRNA(rec_sequence, structure, ffname, 'x'); break; case 'x': strcat(ffname, ".ss"); xrna_plot(rec_sequence, structure, ffname); break; case 's': strcat(ffname, ".svg"); svg_rna_plot(rec_sequence, structure, ffname); break; default: RNAplot_cmdline_parser_print_help(); exit(EXIT_FAILURE); } fflush(stdout); /* clean up */ if(rec_id) free(rec_id); free(rec_sequence); free(structure); /* free the rest of current dataset */ if(rec_rest){ for(i=0;rec_rest[i];i++) free(rec_rest[i]); free(rec_rest); } rec_id = rec_sequence = structure = NULL; rec_rest = NULL; /* print user help for the next round if we get input from tty */ if(istty){ vrna_message_input_seq("Input sequence (upper or lower case) followed by structure"); } } return EXIT_SUCCESS; }