PUBLIC short * vrna_seq_encode(const char *sequence, vrna_md_t *md){ unsigned int i, l; short *S = NULL; if(sequence && md){ S = vrna_seq_encode_simple(sequence, md); l = (unsigned int)strlen(sequence); for(i=1; i<=l; i++) S[i] = md->alias[S[i]]; S[l+1] = S[1]; S[0] = S[l]; } return S; }
PRIVATE void set_fold_compound(vrna_fold_compound_t *vc, vrna_md_t *md_p, unsigned int options, unsigned int aux){ char *sequence, **sequences; unsigned int length, s; int cp; /* cut point for cofold */ char *seq, *seq2; sequence = NULL; sequences = NULL; cp = -1; /* some default init values */ vc->params = NULL; vc->exp_params = NULL; vc->matrices = NULL; vc->exp_matrices = NULL; vc->hc = NULL; vc->auxdata = NULL; vc->free_auxdata = NULL; switch(vc->type){ case VRNA_VC_TYPE_SINGLE: sequence = vc->sequence; seq2 = strdup(sequence); seq = vrna_cut_point_remove(seq2, &cp); /* splice out the '&' if concatenated sequences and reset cp... this should also be safe for single sequences */ vc->cutpoint = cp; if((cp > 0) && (md_p->min_loop_size == TURN)) md_p->min_loop_size = 0; /* is it safe to set this here? */ free(vc->sequence); vc->sequence = seq; vc->length = length = strlen(seq); vc->sequence_encoding = vrna_seq_encode(seq, md_p); vc->sequence_encoding2 = vrna_seq_encode_simple(seq, md_p); if(!(options & VRNA_OPTION_EVAL_ONLY)){ vc->ptype = (aux & WITH_PTYPE) ? vrna_ptypes(vc->sequence_encoding2, md_p) : NULL; /* backward compatibility ptypes */ vc->ptype_pf_compat = (aux & WITH_PTYPE_COMPAT) ? get_ptypes(vc->sequence_encoding2, md_p, 1) : NULL; } else { vc->ptype = NULL; vc->ptype_pf_compat = NULL; } vc->sc = NULL; free(seq2); break; case VRNA_VC_TYPE_ALIGNMENT: sequences = vc->sequences; vc->length = length = vc->length; vc->cons_seq = consensus((const char **)sequences); vc->S_cons = vrna_seq_encode_simple(vc->cons_seq, md_p); vc->pscore = vrna_alloc(sizeof(int)*((length*(length+1))/2+2)); /* backward compatibility ptypes */ vc->pscore_pf_compat = (aux & WITH_PTYPE_COMPAT) ? vrna_alloc(sizeof(int)*((length*(length+1))/2+2)) : NULL; oldAliEn = vc->oldAliEn = md_p->oldAliEn; vc->S = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->S5 = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->S3 = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->a2s = vrna_alloc((vc->n_seq+1) * sizeof(unsigned short *)); vc->Ss = vrna_alloc((vc->n_seq+1) * sizeof(char *)); for (s = 0; s < vc->n_seq; s++) { vrna_aln_encode(vc->sequences[s], &(vc->S[s]), &(vc->S5[s]), &(vc->S3[s]), &(vc->Ss[s]), &(vc->a2s[s]), md_p); } vc->S5[vc->n_seq] = NULL; vc->S3[vc->n_seq] = NULL; vc->a2s[vc->n_seq] = NULL; vc->Ss[vc->n_seq] = NULL; vc->S[vc->n_seq] = NULL; vc->scs = NULL; break; default: /* do nothing ? */ break; } vc->iindx = vrna_idx_row_wise(vc->length); vc->jindx = vrna_idx_col_wise(vc->length); /* now come the energy parameters */ add_params(vc, md_p, options); }
PUBLIC void vrna_aln_encode(const char *sequence, short **S_p, short **s5_p, short **s3_p, char **ss_p, unsigned short **as_p, vrna_md_t *md){ unsigned int i,l; unsigned short p; l = strlen(sequence); (*s5_p) = (short *) vrna_alloc((l + 2) * sizeof(short)); (*s3_p) = (short *) vrna_alloc((l + 2) * sizeof(short)); (*as_p) = (unsigned short *)vrna_alloc((l + 2) * sizeof(unsigned short)); (*ss_p) = (char *) vrna_alloc((l + 2) * sizeof(char)); /* make numerical encoding of sequence */ (*S_p) = vrna_seq_encode_simple(sequence, md); (*s5_p)[0] = (*s5_p)[1] = 0; if(md->oldAliEn){ /* use alignment sequences in all energy evaluations */ (*ss_p)[0]=sequence[0]; for(i=1; i<l; i++){ (*s5_p)[i] = (*S_p)[i-1]; (*s3_p)[i] = (*S_p)[i+1]; (*ss_p)[i] = sequence[i]; (*as_p)[i] = i; } (*ss_p)[l] = sequence[l]; (*as_p)[l] = l; (*s5_p)[l] = (*S_p)[l-1]; (*s3_p)[l] = 0; (*S_p)[l+1] = (*S_p)[1]; (*s5_p)[1] = 0; if(md->circ){ (*s5_p)[1] = (*S_p)[l]; (*s3_p)[l] = (*S_p)[1]; (*ss_p)[l+1] = (*S_p)[1]; } } else{ if(md->circ){ for(i=l; i>0; i--){ char c5; c5 = sequence[i-1]; if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.')) continue; (*s5_p)[1] = (*S_p)[i]; break; } for (i=1; i<=l; i++) { char c3; c3 = sequence[i-1]; if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.')) continue; (*s3_p)[l] = (*S_p)[i]; break; } } else (*s5_p)[1]=(*s3_p)[l]=0; for(i=1,p=0; i<=l; i++){ char c5; c5 = sequence[i-1]; if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.')) (*s5_p)[i+1]=(*s5_p)[i]; else { /* no gap */ (*ss_p)[p++]=sequence[i-1]; /*start at 0!!*/ (*s5_p)[i+1]=(*S_p)[i]; } (*as_p)[i]=p; } for (i=l; i>=1; i--) { char c3; c3 = sequence[i-1]; if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.')) (*s3_p)[i-1]=(*s3_p)[i]; else (*s3_p)[i-1]=(*S_p)[i]; } } }