RNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &name, const string &constraint, double t) : PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(2*baseStr.length()), m_name(name), m_numStructures(1) { char *viennaStr=NULL; // calculate partition function for the sequence do_backtrack=1; init_pf_fold(baseStr.length()); //if(constraint.length()>0) //pf_fold((char*)baseStr.c_str(),(char*)constraint.c_str()); // expicit conversion to non-const value, but pf_fold does not alter baseStr //else pf_fold((char*)baseStr.c_str(),NULL); // expicit conversion to non-const value, but pf_fold does not alter baseStr viennaStr=new char[baseStr.length()+1]; dangles=2; fold((char*)baseStr.c_str(),viennaStr); setSize(RNAFuncs::treeSize(viennaStr)); buildForest(baseStr,viennaStr,true); free_pf_arrays(); delete[] viennaStr; // hasSequence=true; addStrName(name); }
char* seq_pf_fold(const char* sequence, float* gfe) { char* structure = (char*)space(sizeof(char) * (strlen(sequence) + 1)); *gfe = pf_fold(sequence, structure); free_pf_arrays(); return structure; }
/* Objective function */ double calculate_f(const gsl_vector *v, void *params) { double D; int i,j,length; minimizer_pars_struct *pars = (minimizer_pars_struct *)params; double q_tmp; double sigma_tmp; //fprintf(stderr, "=> Evaluating objective Function...\n"); length = pars->length; for (i=0; i <= length; i++) { epsilon[i] = gsl_vector_get(v, i); p_unpaired[i] = 0.0; for (j=0; j <= length; j++) { p_pp[i][j] = 0.0; } } init_pf_fold(length); last_lnQ = pf_fold_pb(pars->seq, NULL); if (isnan(last_lnQ)) { return(NAN); } for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); free_pf_arrays(); D = 0.0; for (i = 1; i <= length; i++) { D += 1 / pars->tau * epsilon[i] * epsilon[i]; /* Ignore missing data. These have values of -1.0. To be on the safe side numerically test for < -0.5 */ if (q_unpaired[i] < -0.5) { sigma_tmp = 10000; // Set very high sigma to ignore these positions q_tmp = 0.5; // Set to arbitrary value, does not matter } else { sigma_tmp = pars->sigma; q_tmp = q_unpaired[i]; } D += 1 / sigma_tmp * ( p_unpaired[i] - q_tmp ) * ( p_unpaired[i] - q_tmp ); } return D; }
PRIVATE void heat_capacity(char *string, float T_min, float T_max, float h, int m) { int length, i; char *structure; float hc, kT, min_en; length = (int) strlen(string); do_backtrack = 0; temperature = T_min -m*h; /* initialize_fold(length); <- obsolete */ structure = (char *) vrna_alloc((unsigned) length+1); min_en = fold(string, structure); free(structure); free_arrays(); kT = (temperature+K0)*GASCONST/1000; /* in kcal */ pf_scale = exp(-(1.07*min_en)/kT/length ); /* init_pf_fold(length); <- obsolete */ vrna_exp_param_t *pf_parameters = NULL; vrna_md_t md; set_model_details(&md); pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale); update_pf_params_par(length, pf_parameters); for (i=0; i<2*m+1; i++) { F[i] = pf_fold_par(string, NULL, pf_parameters, 0, 0, 0); /* T_min -2h */ md.temperature = temperature += h; kT = (temperature+K0)*GASCONST/1000; pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */ free(pf_parameters); pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale); update_pf_params_par(length, pf_parameters); } while (temperature <= (T_max+m*h+h)) { hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h); printf("%g %g\n", (temperature-m*h-h), hc); for (i=0; i<2*m; i++) F[i] = F[i+1]; F[2*m] = pf_fold_par(string, NULL, pf_parameters, 0, 0, 0); /* printf("%g\n", F[2*m]);*/ temperature += h; kT = (temperature+K0)*GASCONST/1000; pf_scale=exp(-(F[i]/length +h*0.00727)/kT); free(pf_parameters); md.temperature = temperature; pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale); update_pf_params_par(length, pf_parameters); } free_pf_arrays(); }
PRIVATE void heat_capacity(char *string, float T_min, float T_max, float h, int m) { int length, i; char *structure; float hc, kT, min_en; length = (int) strlen(string); do_backtrack = 0; temperature = T_min -m*h; initialize_fold(length); structure = (char *) space((unsigned) length+1); min_en = fold(string, structure); free(structure); free_arrays(); kT = (temperature+K0)*GASCONST/1000; /* in kcal */ pf_scale = exp(-(1.07*min_en)/kT/length ); init_pf_fold(length); for (i=0; i<2*m+1; i++) { F[i] = pf_fold(string, NULL); /* T_min -2h */ temperature += h; kT = (temperature+K0)*GASCONST/1000; pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */ update_pf_params(length); } while (temperature <= (T_max+m*h+h)) { hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h); printf("%g %g\n", (temperature-m*h-h), hc); for (i=0; i<2*m; i++) F[i] = F[i+1]; F[2*m] = pf_fold(string, NULL); temperature += h; kT = (temperature+K0)*GASCONST/1000; pf_scale=exp(-(F[i]/length +h*0.00727)/kT); update_pf_params(length); } free_pf_arrays(); }
float PFWrapper:: fold(std::string& structure) { #if !defined(HAVE_MPI) && defined(HAVE_BOOST_THREAD) static boost::mutex mtx; boost::mutex::scoped_lock lock(mtx); #endif ::noGU = noGU_ ? 1 : 0; ::no_closingGU = noCloseGU_ ? 1 : 0; ::noLonelyPairs = noLP_ ? 1 : 0; float ret=0.0; init_pf_fold(sz_-1); char *s = new char[sz_]; ret=::pf_fold(const_cast<char*>(seq_.c_str()), s); structure=s; delete[] s; std::copy(pr, pr+sz_*(sz_+1)/2, pr_.begin()); std::copy(iindx, iindx+sz_, iindx_.begin()); free_pf_arrays(); return ret; }
int main(int argc, char *argv[]) { char *line; char *sequence; char *structure = NULL; char fname[21]; char *ParamFile = NULL; char *ns_bases = NULL, *c; int i, length, l, sym, r; int istty; double deltaf, deltap=0; int delta=100; int n_back = 0; int noconv = 0; int circ=0; int dos=0; int zuker=0; do_backtrack = 1; dangles = 2; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (r!=1) usage(); break; case 'p': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); (void) sscanf(argv[++i], "%d", &n_back); init_rand(); break; case 'n': if ( strcmp(argv[i], "-noGU" )==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU" ) ==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'C': fold_constrained=1; break; case 'D': dos=1; print_energy = -999999; break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; case 's': subopt_sorted=1; break; case 'l': if (strcmp(argv[i],"-logML")==0) { logML=1; break; } else usage(); break; case 'e': if (i>=argc-1) usage(); if (strcmp(argv[i],"-ep")==0) r=sscanf(argv[++i], "%lf", &deltap); else { r=sscanf(argv[++i], "%lf", &deltaf); delta = (int) (0.1+deltaf*100); } if (r!=1) usage(); break; case 'c': if ( strcmp(argv[i], "-circ")==0) circ=1; break; case 'z': zuker=1; break; default: usage(); } } if ((zuker)&&(circ)) { printf("Sorry, zuker subopts not yet implemented for circfold\n"); usage(); } if ((zuker)&&(n_back>0)) { printf("Cna't do zuker subopts and stochastic subopts at the same time\n"); usage(); } if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c) { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); /* printf("| : paired with another base\n"); */ printf(". : no constraint at all\n"); printf("x : base must not pair\n"); } do { /* main loop: continue until end of file */ cut_point = -1; if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); if (!zuker)printf("Use '&' to connect 2 sequences that shall form a complex.\n"); printf("%s\n", scale); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%20s", fname); free(line); if ((line = get_line(stdin))==NULL) break;; } if ((line==NULL)||strcmp(line,"@")==0) break; sequence = tokenize(line); /* frees line */ length = (int) strlen(sequence); structure = (char *) space((unsigned) length+1); if (fold_constrained) { char *cstruc; cstruc = tokenize(get_line(stdin)); if (cstruc!=NULL) { strncpy(structure, cstruc, length); for (i=0; i<length; i++) if (structure[i]=='|') nrerror("constraints of type '|' not allowed"); free(cstruc); } } for (l = 0; l < length; l++) { sequence[l] = toupper(sequence[l]); if (!noconv && sequence[l] == 'T') sequence[l] = 'U'; } if (istty) { if (cut_point == -1) printf("length = %d\n", length); else printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1); } if ((logML!=0 || dangles==1 || dangles==3) && dos==0) if (deltap<=0) deltap=delta/100. +0.001; if (deltap>0) print_energy = deltap; /* first lines of output (suitable for sort +1n) */ if (fname[0] != '\0') printf("> %s [%d]\n", fname, delta); if (n_back>0) { /* stochastic backtrack */ double mfe, kT; char *ss; st_back=1; ss = (char *) space(strlen(sequence)+1); strncpy(ss, structure, length); mfe = fold(sequence, ss); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(1.03*mfe)/kT/length); strncpy(ss, structure, length); /* ignore return value, we are not interested in the free energy */ (circ) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss); free(ss); for (i=0; i<n_back; i++) { char *s; s =(circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence); printf("%s\n", s); free(s); } free_pf_arrays(); } else if (!zuker) { /* normal subopt */ (circ) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout); if (dos) { int i; for (i=0; i<= MAXDOS && i<=delta/10; i++) { printf("%4d %6d\n", i, density_of_states[i]); } } } else { /* Zuker suboptimals */ SOLUTION *zr; int i; if (cut_point!=-1) { printf("Sorry, zuker subopts not yet implemented for cofold\n"); usage(); } zr = zukersubopt(sequence); putoutzuker(zr); (void)fflush(stdout); for (i=0; zr[i].structure; i++) { free(zr[i].structure); } free(zr); } (void)fflush(stdout); free(sequence); free(structure); } while (1); return 0; }
int main(int argc, char *argv[]) { char *string/*, *line*/; char *structure=NULL, *cstruc=NULL; /*char fname[13], ffname[20], gfname[20];*/ /*char *ParamFile=NULL;*/ char *ns_bases=NULL, *c; int i, length, l, sym/*, r*/; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; AjPSeq seq = NULL; AjPFile confile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfilea = NULL; AjPFile dotfileb = NULL; AjPStr seqstring = NULL; AjPStr constring = NULL; AjPStr seqname = NULL; float eT = 0.; AjBool eGU; AjBool ecirc = ajFalse; AjBool eclose; AjBool lonely; AjBool convert; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; embInitPV("vrnafold",argc,argv,"VIENNA",VERSION); seqstring = ajStrNew(); constring = ajStrNew(); seqname = ajStrNew(); seq = ajAcdGetSeq("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); ecirc = ajAcdGetBoolean("circular"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); convert = ajAcdGetBoolean("convert"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); /* dotfilea = ajAcdGetOutfile("adotoutfile"); dotfileb = ajAcdGetOutfile("bdotoutfile"); */ do_backtrack = 2; pf = 0; string = NULL; istty = 0; temperature = (double) eT; circ = !!ecirc; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; noconv = (convert) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(circ && noLonelyPairs) { ajWarn("Depending on the origin of the circular sequence\n" "some structures may be missed when using -noLP\nTry " "rotating your sequence a few times\n"); } if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(confile) vienna_GetConstraints(confile,&constring); string = NULL; structure = NULL; length = ajSeqGetLen(seq); string = (char *) space(length+1); strcpy(string,ajSeqGetSeqC(seq)); len = ajStrGetLen(constring); structure = (char *) space(length+1); if(len) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); ajFmtPrintF(outf,"%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else ajFmtPrintF(outf," (%6.2f)\n", min_en); if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, essfile); else ajWarn("Structure too long, not doing xy_plot\n"); } if (length>=2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { ajFmtPrintF(outf,"%s", pf_struc); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) : energy_of_struct(string, cent); ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); PS_dot_plot_list(string, dotfileb, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length)); ajFmtPrintF(outf,"\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); free(string); free(structure); ajStrDel(&seqstring); ajStrDel(&constring); ajStrDel(&seqname); ajStrDel(&ensbases); ajStrDel(&eenergy); ajStrDel(&edangles); ajSeqDel(&seq); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); /* ajFileClose(&dotfilea); ajFileClose(&dotfileb); */ if (length<2000) free_arrays(); embExit(); return 0; }
int main(int argc, char *argv[]) { char *sequence; char *structure = NULL; char *ns_bases = NULL, *c; int i, length, l, sym; int istty; double deltap=0.; int delta=100; int n_back = 0; int noconv=0; int circ=0; int dos=0; AjPSeq seq = NULL; AjPFile confile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPStr constring = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjBool convert; AjPStr ensbases = NULL; AjBool etloop; AjPStr edangles = NULL; char edangle = '\0'; ajint len; float erange; float prange; embInitPV("vrnasubopt",argc,argv,"VIENNA",VERSION); constring = ajStrNew(); seq = ajAcdGetSeq("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); circ = !!ajAcdGetBoolean("circular"); dos = !!ajAcdGetBoolean("dos"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); convert = ajAcdGetBoolean("convert"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); erange = ajAcdGetFloat("erange"); prange = ajAcdGetFloat("prange"); subopt_sorted = !!ajAcdGetBoolean("sort"); logML = !!ajAcdGetBoolean("logml"); n_back = ajAcdGetInt("nrandom"); edangles = ajAcdGetListSingle("dangles"); outf = ajAcdGetOutfile("outfile"); if(dos) print_energy = -999999; do_backtrack = 1; istty = 0; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; noconv = (convert) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; delta = (int) (0.1 + erange * 100); deltap = prange; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c) { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(confile) vienna_GetConstraints(confile,&constring); if(n_back) init_rand(); sequence = NULL; structure = NULL; length = ajSeqGetLen(seq); sequence = (char *) space(length+1); strcpy(sequence,ajSeqGetSeqC(seq)); len = ajStrGetLen(constring); structure = (char *) space(length+1); if(len) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } istty = 0; if (fold_constrained) { for (i=0; i<length; i++) if (structure[i]=='|') ajFatal("Constraints of type '|' are not allowed\n"); } for (l = 0; l < length; l++) { sequence[l] = toupper(sequence[l]); if (!noconv && sequence[l] == 'T') sequence[l] = 'U'; } if ((logML!=0 || dangles==1 || dangles==3) && dos==0) if (deltap<=0) deltap=delta/100. +0.001; if (deltap>0) print_energy = deltap; /* first lines of output (suitable for sort +1n) */ ajFmtPrintF(outf,"> %s [%d]\n", ajSeqGetNameC(seq), delta); if(n_back>0) { int i; double mfe, kT; char *ss; st_back=1; ss = (char *) space(strlen(sequence)+1); strncpy(ss, structure, length); mfe = (circ) ? circfold(sequence, ss) : fold(sequence, ss); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(1.03*mfe)/kT/length); strncpy(ss, structure, length); /* ** we are not interested in the free energy but in the bppm, so we ** drop free energy into the void */ (circ) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss); free(ss); for (i=0; i<n_back; i++) { char *s; s = (circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence); ajFmtPrintF(outf,"%s\n", s); free(s); } free_pf_arrays(); } else { (circ) ? subopt_circ(sequence, structure, delta, ajFileGetFileptr(outf)) : subopt(sequence, structure, delta, ajFileGetFileptr(outf)); } free(sequence); free(structure); ajSeqDel(&seq); ajStrDel(&ensbases); ajStrDel(&edangles); ajFileClose(&confile); ajFileClose(&outf); ajFileClose(¶mfile); embExit(); return 0; }
int main(int argc, char *argv[]) { char *start, *structure, *rstart, *str2, *line; char *ParamFile=NULL; int i,j, length, l, hd; double energy=0., kT; int pf, mfe, istty; int repeat, found; do_backtrack = 0; pf = 0; mfe = 1; repeat = 0; init_rand(); for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'a': symbolset = argv[++i]; /* symbolset should only have uppercase characters */ for (l = 0; l < (int)strlen(symbolset); l++) symbolset[l] = toupper(symbolset[l]); break; case 'T': if (argv[i][2]!='\0') usage(); if (sscanf(argv[++i], "%lf", &temperature)==0) usage(); break; case 'F': mfe = 0; pf = 0; for(j=2; j<(int)strlen(argv[i]); j++) { switch( argv[i][j] ) { case 'm' : mfe = 1; break; case 'p' : pf = 1; /* old version had dangles=0 here */ break; default : usage(); } } break; case 'R': repeat = REPEAT_DEFAULT; if(++i<argc) if (sscanf(argv[i], "%d", &repeat)==0) usage(); break; case 'n': if (strcmp(argv[i], "-noGU" )==0) noGU=1; else if (strcmp(argv[i], "-noLP" )==0) noLonelyPairs=1; else usage(); break; case '4': tetra_loop=0; break; case 'e': if (sscanf(argv[++i],"%d", &energy_set)==0) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') if (sscanf(argv[i]+2, "%d", &dangles)==0) usage(); break; case 'f': /* when to stop RNAfold -p */ if (sscanf(argv[++i],"%f", &final_cost)==0) usage(); break; case 'P': if (++i<argc) ParamFile = argv[i]; else usage(); break; case 'v': inv_verbose = 1; break; default: usage(); } } kT = (temperature+273.15)*1.98717/1000.0; istty = (isatty(fileno(stdout))&&isatty(fileno(stdin))); if (ParamFile!=NULL) read_parameter_file(ParamFile); give_up = (repeat<0); do { if (istty) { printf("\nInput structure & start string" " (lower case letters for const positions)\n" " @ to quit, and 0 for random start string\n"); printf("%s\n", scale); } if ((line = get_line(stdin))==NULL) break; /* read structure, skipping over comment lines */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { printf("%s\n", line); free(line); if ((line = get_line(stdin))==NULL) break; } /* stop at eof or '@' */ if (line==NULL) break; if (strcmp(line, "@") == 0) { free(line); break; } structure = (char *) space(strlen(line)+1); (void) sscanf(line,"%s",structure); /* scanf gets rid of trailing junk */ free(line); length = (int) strlen(structure); str2 = (char *) space((unsigned)length+1); if ((line = get_line(stdin))!=NULL) if (strcmp(line, "@") == 0) { free(line); break; } start = (char *) space((unsigned) length+1); if (line !=NULL) { (void) strncpy(start, line, length); free(line); } if (istty) printf("length = %d\n", length); if (repeat!=0) found = (repeat>0)? repeat : (-repeat); else found = 1; initialize_fold(length); rstart = (char *) space((unsigned)length+1); while(found>0) { char *string; string = (char *) space((unsigned)length+1); strcpy(string, start); for (i=0; i<length; i++) { /* lower case characters are kept fixed, any other character not in symbolset is replaced by a random character */ if (islower(string[i])) continue; if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL)) string[i]=symbolset[int_urn(0,strlen(symbolset)-1)]; } strcpy(rstart, string); /* remember start string */ if (mfe) { energy = inverse_fold(string, structure); if( (repeat>=0) || (energy<=0.0) ) { found--; hd = hamming(rstart, string); printf("%s %3d", string, hd); if (energy>0) { /* no solution found */ printf(" d= %g\n", energy); if(istty) { energy = fold(string,str2); printf("%s\n", str2); } } else printf("\n"); } } if (pf) { if (!(mfe && give_up && (energy>0))) { /* unless we gave up in the mfe part */ double prob, min_en, sfact=1.07; /* get a reasonable pf_scale */ min_en = fold(string,str2); pf_scale = exp(-(sfact*min_en)/kT/length); init_pf_fold(length); energy = inverse_pf_fold(string, structure); prob = exp(-energy/kT); hd = hamming(rstart, string); printf("%s %3d (%g)\n", string, hd, prob); free_pf_arrays(); } if (!mfe) found--; } (void) fflush(stdout); free(string); } free(rstart); free_arrays(); free(structure); free(str2); free(start); (void) fflush(stdout); } while (1); return 0; }
int main(int argc, char *argv[]){ struct RNAsubopt_args_info args_info; unsigned int input_type; char fname[80], *cstruc, *sequence, *c, *input_string; char *structure = NULL, *ParamFile = NULL, *ns_bases = NULL; int i, length, l, sym, istty; double deltaf, deltap; int delta, n_back, noconv, circular, dos, zuker; do_backtrack = 1; dangles = 2; delta = 100; deltap = n_back = noconv = circular = dos = zuker = 0; /* ############################################# # check the command line parameters ############################################# */ if(RNAsubopt_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) tetra_loop=0; /* set dangle model */ if(args_info.dangles_given) dangles = args_info.dangles_arg; /* do not allow weak pairs */ if(args_info.noLP_given) noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) no_closingGU = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ if(args_info.noconv_given) noconv = 1; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* energy range */ if(args_info.deltaEnergy_given) delta = (int) (0.1+args_info.deltaEnergy_arg*100); /* energy range after post evaluation */ if(args_info.deltaEnergyPost_given) deltap = args_info.deltaEnergyPost_arg; /* sorted output */ if(args_info.sorted_given) subopt_sorted = 1; /* assume RNA sequence to be circular */ if(args_info.circ_given) circular=1; /* stochastic backtracking */ if(args_info.stochBT_given){ n_back = args_info.stochBT_arg; init_rand(); } /* density of states */ if(args_info.dos_given){ dos = 1; print_energy = -999999; } /* logarithmic multiloop energies */ if(args_info.logML_given) logML = 1; /* zuker subopts */ if(args_info.zuker_given) zuker = 1; if(zuker){ if(circular){ warn_user("Sorry, zuker subopts not yet implemented for circfold"); RNAsubopt_cmdline_parser_print_help(); exit(1); } else if(n_back>0){ warn_user("Can't do zuker subopts and stochastic subopts at the same time"); RNAsubopt_cmdline_parser_print_help(); exit(1); } } /* free allocated memory of command line data structure */ RNAsubopt_cmdline_parser_free(&args_info); /* ############################################# # begin initializing ############################################# */ if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if(fold_constrained && istty) print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X); /* ############################################# # main loop: continue until end of file ############################################# */ do { cut_point = -1; /* ######################################################## # handle user input from 'stdin' ######################################################## */ if(istty){ if (!zuker) printf("Use '&' to connect 2 sequences that shall form a complex.\n"); print_tty_input_seq(); } /* extract filename from fasta header if available */ fname[0] = '\0'; while((input_type = get_input_line(&input_string, 0)) == VRNA_INPUT_FASTA_HEADER){ printf(">%s\n", input_string); (void) sscanf(input_string, "%42s", fname); free(input_string); } /* break on any error, EOF or quit request */ if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;} /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */ else{ sequence = tokenize(input_string); /* frees input_string */ length = (int) strlen(sequence); } structure = (char *) space((unsigned) length+1); if(noconv) str_RNA2RNA(sequence); else str_DNA2RNA(sequence); if(istty){ if (cut_point == -1) printf("length = %d\n", length); else printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1); } /* get structure constraint or break if necessary, entering an empty line results in a warning */ if (fold_constrained) { input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ break;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ cstruc = tokenize(input_string); strncpy(structure, cstruc, length); for (i=0; i<length; i++) if (structure[i]=='|') nrerror("constraints of type '|' not allowed"); free(cstruc); } else warn_user("constraints missing"); } /* ######################################################## # done with 'stdin' handling, now init everything properly ######################################################## */ if((logML != 0 || dangles==1 || dangles==3) && dos == 0) if(deltap<=0) deltap = delta/100. + 0.001; if (deltap>0) print_energy = deltap; /* first lines of output (suitable for sort +1n) */ if (fname[0] != '\0') printf("> %s [%d]\n", fname, delta); /* stochastic backtracking */ if(n_back>0){ double mfe, kT; char *ss; st_back=1; ss = (char *) space(strlen(sequence)+1); strncpy(ss, structure, length); mfe = fold(sequence, ss); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(1.03*mfe)/kT/length); strncpy(ss, structure, length); /* ignore return value, we are not interested in the free energy */ (circular) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss); free(ss); for (i=0; i<n_back; i++) { char *s; s =(circular) ? pbacktrack_circ(sequence) : pbacktrack(sequence); printf("%s\n", s); free(s); } free_pf_arrays(); } /* normal subopt */ else if(!zuker){ (circular) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout); if (dos) { int i; for (i=0; i<= MAXDOS && i<=delta/10; i++) { printf("%4d %6d\n", i, density_of_states[i]); } } } /* Zuker suboptimals */ else{ SOLUTION *zr; int i; if (cut_point!=-1) { nrerror("Sorry, zuker subopts not yet implemented for cofold\n"); } zr = zukersubopt(sequence); putoutzuker(zr); (void)fflush(stdout); for (i=0; zr[i].structure; i++) { free(zr[i].structure); } free(zr); } (void)fflush(stdout); free(sequence); free(structure); } while (1); return 0; }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]) { char *string1=NULL, *string2=NULL, *dummy=NULL, *temp=NULL, *line=NULL; char *structure=NULL, *cstruc=NULL, *cstruc_l=NULL, *cstruc_s=NULL; char fname[53], ffname[53], temp_name[201], first_name[53], my_contrib[10]; char up_out[250], unstrs[201], name[400], cmd_line[500]; char *ParamFile=NULL; char *ns_bases=NULL, *c,*head; int i, length1,length2,length, l, sym, r, *u_vals, Switch, header,output; double energy, min_en; double sfact=1.07; int istty; int noconv=0; /* variables for output */ pu_contrib *unstr_out, *unstr_short; interact *inter_out; /* pu_out *longer; */ char *title; /* commandline parameters */ int w; /* length of region of interaction */ int incr3; /* add x unpaired bases after 3'end of short RNA*/ int incr5; /* add x unpaired bases after 5'end of short RNA*/ int unstr; /* length of unpaired region for output*/ int upmode ; /* 1 compute only pf_unpaired, >1 compute interactions 2 compute intra-molecular structure only for long RNA, 3 both RNAs */ int task; /* input mode for calculation of interaction */ /* default settings for RNAup */ head = NULL;/* header text - if header wanted, see header */ header = 1; /* if header is 0 print no header in output file: option -nh */ output = 1; /* if output is 0 make no output file: option -o */ Switch = 1; /* the longer sequence is selected as the target */ task=0; upmode = 1; /* default is one sequence, option -X[p|f] has to be set for the calculation of an interaction, if no "&" is in the sequence string */ unstrs[0]='\0'; default_u = 4; unstr=default_u; default_w = 25; w=default_w; u_vals=NULL; incr3=0; incr5=0; do_backtrack = 1; length1=length2=0; title=NULL; unstr_out=NULL; inter_out=NULL; my_contrib[0] = 'S'; my_contrib[1] = '\0'; first_name[0] = '\0'; /* collect the command line */ sprintf(cmd_line,"RNAup "); length = 0; for (i=1; i<argc; i++) { r=sscanf(argv[i], "%100s", &temp_name); length+=r+1; if(length > 500) break; strcat(cmd_line, temp_name); strcat(cmd_line," "); } length = 0; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if (i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'w': /* -w maximal length of unstructured region */ if (i==argc-1) usage(); r=sscanf(argv[++i],"%d", &w); if (!r) usage(); break; case 't': /* use the first sequence as the target */ if ( strcmp(argv[i], "-target")==0) { Switch=0; } break; case 'o': /* make no output file */ output=0; break; case 'n': if ( strcmp(argv[i], "-nh")==0) { header=0; } if ( strcmp(argv[i], "-noGU")==0) { noGU=1; } if ( strcmp(argv[i], "-noCloseGU")==0) { no_closingGU=1; } if ( strcmp(argv[i], "-noLP")==0) { noLonelyPairs=1; } if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) { noconv=1; } break; case '4': tetra_loop=0; break; case 'e': if (i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'S': if (i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'b': upmode=3; break; case 'X': /* interaction mode invoked */ if (upmode == 1) upmode=2; switch (argv[i][2]) { /* now determine which sequences interact */ case 'p': task=1; break; /* pairwise interaction */ case 'f': task=2; break; /* first one interacts with all others */ } break; case 'u': /* -u length of unstructured region in pr_unpaired output */ if (i==argc-1) usage(); r=sscanf(argv[++i],"%200s", unstrs); if (!r) usage(); if (!isdigit(unstrs[0])) usage(); break; /* incr5 and incr3 are only for the longer (target) sequence */ /* increments w (length of the unpaired region) to incr5+w+incr3*/ /* the longer sequence is given in 5'(= position 1) to */ /* 3' (=position n) direction */ /* incr5 adds incr5 residues to the 5' end of w */ case '5': if (i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr5); if (!r) usage(); break; /* incr3 adds incr3 residues to the 3' end of w */ case '3': if (i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr3); if (!r) usage(); break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; case 'c': if (i==argc-1) usage(); r=sscanf(argv[++i], "%6s", my_contrib); if (!r) usage(); break; default: usage(); } } cmd_line[strlen(cmd_line)] = '\0'; if (dangles>0) dangles=2; /* only 0 or 2 allowed */ if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("matching brackets ( ): base i pairs base j\n"); printf("constraints for intramolecular folding only:\n"); printf("< : base i is intramolecularly paired with a base j<i\n"); printf("> : base i is intramolecularly paired with a base j>i\n"); printf("constraints for cofolding (intermolecular folding) only:\n"); printf("| : paired with another base intermolecularly\n"); } RT = ((temperature+K0)*GASCONST/1000.0); do { /* main loop: continue until end of file */ cut_point=-1; if (istty) { if (upmode == 1) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("%s%s\n", scale1, scale2); } else if (upmode > 1) { if (task == 1 || (task == 0 && upmode == 3)) { printf("\nUse either '&' to connect the 2 sequences or give each sequence on an extra line.\n"); printf("%s%s\n", scale1, scale2); } else if (task == 2) { /* option -Xf read the first two seqs */ printf("\nGive each sequence on an extra line. The first seq. is stored, every other seq. is compared to the first one.\n"); printf("%s%s\n", scale1, scale2); } else if (task == 3) {/* option -Xf read another sequence which will interact with the first one */ printf("\nEnter another sequence.\n"); printf("%s%s\n", scale1, scale2); } } } fname[0]='\0'; ffname[0]='\0'; /* read the first sequence */ if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%51s", fname); free(line); line=NULL; if ((line = get_line(stdin))==NULL) break; } if ((line == NULL) || (strcmp(line, "@") == 0)) break; if (first_name[0] == '\0' && fname[0] !='\0' && task == 2) { strncpy(first_name,fname,30); first_name[30] = '\0'; } /* if upmode == 2: check if the sequences are seperated via "&" (cut_point > -1) or given on extra lines */ if (task < 3) { tokenize(line,&string1,&string2); if (task == 2 && cut_point != -1) task = 3; /* two sequences with & are given: calculate interaction */ if (task == 0 && cut_point != -1) { task = 1; if (upmode == 1) upmode = 2; } } else if (task == 3) { /* option -Xf*/ strncpy(ffname,fname,30); ffname[30] = '\0'; strncpy(fname,first_name,30); /* first_name: name of first seq */ fname[30] = '\0'; if (temp != NULL) { /*strings have been switched - write temp to string1*/ string1 = (char *) xrealloc (string1,sizeof(char)*strlen(temp)+1); (void) sscanf(temp,"%s",string1); free(temp);temp=NULL; } tokenize(line,&string2,&dummy); /*compare every seq to first one given */ free(dummy);dummy=NULL; if (cut_point != -1) { nrerror( "After the first sequence pair: Input a single sequence (no &)!\n" "Each input seq. is compared to the first seq. given.\n"); } } /* interaction mode -> get the second seq. if seq are on seperate lines*/ if (upmode > 1){ /* interaction mode */ if (cut_point == -1 && task < 3) { /* seqs are given on seperate lines */ /* read the second sequence */ if (task == 2) task = 3; if ((line = get_line(stdin))==NULL) { nrerror("only one sequence - can not cofold one sequence!"); } /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%51s", ffname); /* name of the 2nd seq */ free(line); line=NULL; if ((line = get_line(stdin))==NULL) break; } if ((line ==NULL) || (strcmp(line, "@") == 0)) break; free(string2); /* string2 has been allocated in tokenize() */ string2 = (char *) space(strlen(line)+1); (void) sscanf(line,"%s",string2); free(line);line=NULL; } } else { /* default mode pr_unpaired for ONE seq */ /* if a second sequence is give, cofold the sequences*/ if (cut_point != -1){ upmode = 2; } } if (string1 != NULL){length1 = (int) strlen(string1);} else {nrerror("sequence is NULL, check your input.");} if (upmode > 1) { if (string2 != NULL) {length2 = (int) strlen(string2);} else{nrerror("one of the sequences is NULL, check your input.");} /* write longer seq in string1 and and shorter one in string2 */ if (length1 < length2 && Switch) { strncpy(temp_name,fname,30); strncpy(fname,ffname,30); strncpy(ffname,temp_name,30); length=length1; length1=length2; length2=length; temp=(char *) space(sizeof(char)*strlen(string1)+1); (void) sscanf(string1,"%s",temp); string1 = (char *) xrealloc (string1,sizeof(char)*length1+1); (void) sscanf(string2,"%s",string1); string2 = (char *) xrealloc(string2,sizeof(char)*length2+1); (void) sscanf(temp,"%s",string2); if (task == 1) { free(temp); temp = NULL; } } } /* parse cml parameters for output filename*/ /* create the name of the output file */ if (fname[0]!='\0') { printf(">%s\n",fname); if(strlen(fname) < 30) { strcpy(up_out,fname); } else { strncpy(up_out,fname,30); up_out[30] = '\0'; } if (upmode > 1 && ffname[0] != '\0') { printf(">%s\n",ffname); if(strlen(fname) < 15) { strcpy(up_out,fname); } else { strncpy(up_out,fname,15); up_out[15] = '\0'; } strcat(up_out, "_"); if(strlen(ffname) < 15) { strcat(up_out,ffname); } else { strncat(up_out,ffname,15); } } } else { strcpy(up_out, "RNA"); } if (upmode >1) { sprintf(temp_name,"_w%d",w); strncat(up_out, temp_name,10); } /* do this only when -X[p|f] is used or if two sequences seperated by & are given */ if (upmode > 1) { if (task == 3) { /* strncpy(temp_name,fname,30); */ if(strlen(fname) < 30) { strcpy(temp_name,fname); } else { strncpy(temp_name,fname,30); up_out[30] = '\0'; } } } /* get values for -u */ if ( ! get_u_values(unstrs,&u_vals,length1)) { nrerror("option -u: length value exceeds sequence length\n"); } for (l = 0; l < length1; l++) { string1[l] = toupper(string1[l]); if (!noconv && string1[l] == 'T') string1[l] = 'U'; } for (l = 0; l < length2; l++) { string2[l] = toupper(string2[l]); if (!noconv && string2[l] == 'T') string2[l] = 'U'; } if (fold_constrained) { char *temp_cstruc=NULL; int old_cut; temp_cstruc = get_line(stdin); old_cut = cut_point; cut_point=-1; /* get contrained string without & */ cstruc = tokenize_one(temp_cstruc); /* free(temp_cstruc); */ /* only one seq, cstruc should not have an & */ if (upmode == 1 && cut_point == -1) { if (strlen(cstruc) == length1) { cstruc_l=(char*)space(sizeof(char)*(length1+1)); strncpy(cstruc_l,cstruc,length1); }else{ fprintf(stderr, "%s\n%s\n",string1,cstruc); nrerror("RNAup -C: constrain string and structure have unequal length"); } } else if (upmode == 1 && cut_point != -1) { fprintf(stderr, "%s\n%s\n",string1,cstruc); nrerror("RNAup -C: only one sequence but constrain structure for cofolding"); } /* constrain string is for both seqs */ else if (upmode > 1 && cut_point != -1) { if (old_cut != cut_point) { nrerror("RNAup -C: different cut points in sequence und constrain string"); } seperate_bp(&cstruc,length1,&cstruc_l,&cstruc_s); if (strlen(cstruc) != (length1+length2)) { fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc); nrerror("RNAup -C: constrain string and structure have unequal length"); } if (strlen(cstruc_l) != (length1)) { fprintf(stderr, "%s\n%s\n",string1,cstruc_l); nrerror("RNAup -C: constrain string and structure have unequal length"); } if (strlen(cstruc_s) != (length2)) { fprintf(stderr, "%s\n%s\n",string2,cstruc_s); nrerror("RNAup -C: constrain string and structure have unequal length"); } } else { fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc); nrerror("RNAup -C: no cutpoint in constrain string"); } } if(length1 > length2) { structure = (char *) space(sizeof(char)*(length1+1)); } else { structure = (char *) space(sizeof(char)*(length2+1)); } update_fold_params(); if (cstruc_s != NULL) strncpy(structure, cstruc_s, length2+1); min_en = fold(string1, structure); (void) fflush(stdout); if (upmode != 0){ int wplus,w_sh; if (upmode == 3) { /* calculate prob. unstruct. for shorter seq */ w_sh = w; /* len of unstructured region has to be <= len shorter seq. */ if (w > length2) w_sh = length2; if (cstruc_s != NULL) strncpy(structure, cstruc_s, length2+1); min_en = fold(string2, structure); pf_scale = exp(-(sfact*min_en)/RT/length2); if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length2); if (cstruc_s != NULL) strncpy(structure, cstruc_s, length2+1); energy = pf_fold(string2, structure); unstr_short = pf_unstru(string2, w_sh); free_pf_arrays(); /* for arrays for pf_fold(...) */ } /* calculate prob. unstructured for longer seq */ wplus=w+incr3+incr5; /* calculate prob. unpaired for the maximal length of -u */ if (u_vals[u_vals[0]] > wplus) wplus=u_vals[u_vals[0]]; /* length of the unstructured region has to be <= len longer seq. */ if (wplus > length1) wplus=length1; if (cstruc_l !=NULL) strncpy(structure, cstruc_l, length1+1); min_en = fold(string1, structure); pf_scale = exp(-(sfact*min_en)/RT/length1); if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length1); if (cstruc_l !=NULL) strncpy(structure, cstruc_l, length1+1); energy = pf_fold(string1, structure); if (upmode > 1) { unstr_out = pf_unstru(string1, wplus); } else { unstr_out = pf_unstru(string1, u_vals[u_vals[0]]); } free_pf_arrays(); /* for arrays for pf_fold(...) */ /* now make output to stdout and to the output file */ if (upmode > 1){/* calculate interaction between two sequences */ int count; if (upmode == 2) { inter_out = pf_interact(string1,string2,unstr_out,NULL,w,cstruc,incr3,incr5); print_interaction(inter_out,string1,string2,unstr_out,NULL,w,incr3,incr5); } else if (upmode == 3){ inter_out = pf_interact(string1,string2,unstr_out,unstr_short,w,cstruc,incr3,incr5); print_interaction(inter_out,string1,string2,unstr_out,unstr_short,w,incr3,incr5); } if(output) { /* make RNAup output to file */ printf("RNAup output in file: "); /* plot for all -u values */ strcpy(name,up_out); strcat(name, "_u"); if(u_vals[0] <= 20) { for (count = 1; count <= u_vals[0]; count++) { unstr = u_vals[count]; sprintf(temp_name,"%d",unstr); if (count < u_vals[0]) { strcat(temp_name,"_"); strncat(name, temp_name,5); } else { strncat(name, temp_name,5); strcat(name, "_up.out"); printf("%s\n",name); } } } else { sprintf(temp_name,"%d",u_vals[1]); strcat(temp_name,"_to_"); strncat(name, temp_name,5); sprintf(temp_name,"%d",u_vals[0]); strncat(name, temp_name,5); strcat(name, ".out"); printf("%s\n",name); } if(header) { char startl[3]; sprintf(startl,"# "); head = (char*)space(sizeof(char)*(length1+length2+1000)); /* mach kein \n als ende von head */ sprintf(head,"%s %s\n%s %d %s\n%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1, startl,length2,ffname, startl,string2); } else { if(head != NULL) { nrerror("error with header\n"); } } Up_plot(unstr_out,NULL,inter_out,name,u_vals,my_contrib,head); if(head != NULL) { free(head); head = NULL; } if (upmode == 3 ) {/* plot opening energy for boths RNAs */ if(head != NULL) { nrerror("error with header\n"); } Up_plot(NULL,unstr_short,NULL,name,u_vals,my_contrib,head); } } } else { /* one sequence: plot only results for prob unstructured */ int count; char collect_out[1000]; collect_out[0]='\0'; for (count = 1; count <= u_vals[0]; count++) { unstr = u_vals[count]; print_unstru(unstr_out,unstr); } if(output) {/* make RNAup output to file */ printf("RNAup output in file: "); strcpy(name,up_out); strcat(name, "_u"); if(u_vals[0] <= 20) { for (count = 1; count <= u_vals[0]; count++) { unstr = u_vals[count]; sprintf(temp_name,"%d",unstr); if (count < u_vals[0]) { strcat(temp_name,"_"); strncat(name, temp_name,5); } else { strncat(name, temp_name,5); strcat(name, ".out"); printf("%s\n",name); } } } else { sprintf(temp_name,"%d",u_vals[1]); strcat(temp_name,"_to_"); strncat(name, temp_name,5); sprintf(temp_name,"%d",u_vals[0]); strncat(name, temp_name,5); strcat(name, ".out"); printf("%s\n",name); } if(header) { char startl[3]; sprintf(startl,"# "); head = (char*)space(sizeof(char)*(length1+length2+1000)); /* mach kein \n als ende von head */ sprintf(head,"%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1); } else { if(head != NULL) { nrerror("error with header\n"); }} Up_plot(unstr_out,NULL,NULL,name,u_vals,my_contrib,head); if(head != NULL) { free(head); head = NULL;} } } } else { nrerror("no output format given\n"); } if(structure != NULL) free(structure); structure = NULL; if (title != NULL) free(title); title=NULL; if (u_vals != NULL) free(u_vals); u_vals=NULL; if (upmode == 1) free_pu_contrib(unstr_out); if (upmode > 1) { free_pu_contrib(unstr_out); free_interact(inter_out); } if (upmode == 3)free_pu_contrib(unstr_short); free_arrays(); /* for arrays for fold(...) */ if (cstruc!=NULL) free(cstruc); cstruc=NULL; if (cstruc_l!=NULL) free(cstruc_l); cstruc_l=NULL; if (cstruc_s!=NULL) free(cstruc_s); cstruc_s=NULL; (void) fflush(stdout); if (string1!=NULL && task != 3) { free(string1); string1 = NULL; } if (string2!=NULL) free(string2); string2 = NULL; } while (1); if (line != NULL) free(line); if (string1!=NULL) free(string1); if (string2!=NULL) free(string2); if (cstruc!=NULL) free(cstruc); if (cstruc_l!=NULL) free(cstruc_l); if (cstruc_s!=NULL) free(cstruc_s); return 0; }
int main(int argc, char *argv[]) { char *start; char *structure; char *rstart; char *str2; char *line; int i; int length; int l; int hd; double energy = 0.; double kT; int pf = 0; int mfe = 0; int istty; int repeat; int found; AjPFile inf = NULL; AjPSeq seq = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; AjPStr edangles = NULL; AjPStr method = NULL; AjPStr ealpha = NULL; AjBool showfails = ajFalse; AjBool succeed = ajFalse; char edangle = '\0'; ajint len; FILE *fp; embInitPV("vrnainverse",argc,argv,"VIENNA",VERSION); inf = ajAcdGetInfile("structuresfile"); seq = ajAcdGetSeq("sequence"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); edangles = ajAcdGetListSingle("dangles"); method = ajAcdGetListSingle("folding"); ealpha = ajAcdGetString("alphabet"); final_cost = ajAcdGetFloat("final"); repeat = ajAcdGetInt("repeats"); showfails = ajAcdGetBoolean("showfails"); succeed = ajAcdGetBoolean("succeed"); outf = ajAcdGetOutfile("outfile"); do_backtrack = 0; structure = NULL; istty = 0; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(ajStrMatchC(method,"mp")) { mfe = 1; pf = 1; } else if(ajStrMatchC(method,"m")) { mfe = 1; pf = 0; } else if(ajStrMatchC(method,"p")) { mfe = 0; pf = 1; } len = ajStrGetLen(ealpha); symbolset = (char *) space(len + 1); strcpy(symbolset, ajStrGetPtr(ealpha)); for (l = 0; l < len; l++) symbolset[l] = toupper(symbolset[l]); inv_verbose = !!showfails; fp = ajFileGetFileptr(inf); init_rand(); kT = (temperature+273.15)*1.98717/1000.0; istty = (isatty(fileno(stdout))&&isatty(fileno(stdin))); if (paramfile) read_parameter_file(paramfile); give_up = succeed; do { if ((line = get_line(fp))==NULL) break; /* read structure, skipping over comment lines */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { free(line); if ((line = get_line(fp))==NULL) break; } /* stop at eof or '@' */ if (line==NULL) break; if (strcmp(line, "@") == 0) { free(line); break; } structure = (char *) space(strlen(line)+1); /* scanf gets rid of trailing junk */ (void) sscanf(line,"%s",structure); free(line); length = (int) strlen(structure); str2 = (char *) space((unsigned)length+1); /* now look for a sequence to match the structure */ /* if ((line = get_line(fp))!=NULL) if (strcmp(line, "@") == 0) { free(line); break; } */ start = (char *) space((unsigned) length+1); if(seq) (void) strncpy(start, ajSeqGetSeqC(seq), length); if (repeat!=0) found = repeat; else found = 1; initialize_fold(length); rstart = (char *) space((unsigned)length+1); while(found>0) { char *string; string = (char *) space((unsigned)length+1); strcpy(string, start); for (i=0; i<length; i++) { /* lower case characters are kept fixed, any other character not in symbolset is replaced by a random character */ if (islower(string[i])) continue; if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL)) string[i]=symbolset[int_urn(0,strlen(symbolset)-1)]; } strcpy(rstart, string); /* remember start string */ if (mfe) { energy = inverse_fold(string, structure); if( (!succeed) || (energy<=0.0) ) { found--; hd = hamming(rstart, string); ajFmtPrintF(outf,"%s %3d", string, hd); if (energy>0) { /* no solution found */ ajFmtPrintF(outf," d = %f\n", energy); } else ajFmtPrintF(outf,"\n"); } } if (pf) { if (!(mfe && give_up && (energy>0))) { /* unless we gave up in the mfe part */ double prob, min_en, sfact=1.07; /* get a reasonable pf_scale */ min_en = fold(string,str2); pf_scale = exp(-(sfact*min_en)/kT/length); init_pf_fold(length); energy = inverse_pf_fold(string, structure); prob = exp(-energy/kT); hd = hamming(rstart, string); ajFmtPrintF(outf,"%s %3d (%f)\n", string, hd, prob); free_pf_arrays(); } if (!mfe) found--; } free(string); } free(rstart); free_arrays(); free(structure); free(str2); free(start); } while (1); ajSeqDel(&seq); ajStrDel(&eenergy); ajStrDel(&edangles); ajStrDel(&method); ajStrDel(&ealpha); ajFileClose(&inf); ajFileClose(¶mfile); ajFileClose(&outf); AJFREE(symbolset); embExit(); return 0; }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]) { char *string1=NULL, *string2=NULL, *temp, *line; char *structure=NULL, *cstruc=NULL; char fname[53], my_contrib[10], *up_out; char *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length1,length2,length, l, sym, r; double energy, min_en; double kT, sfact=1.07; int pf, istty; int noconv=0; double Zu, Zup; /* variables for output */ pu_contrib *unstr_out, *unstr_short; FLT_OR_DBL **inter_out; char *title; /* commandline parameters */ int w; /* length of region of interaction */ int incr3; /* add x unpaired bases after 3'end of short RNA*/ int incr5; /* add x unpaired bases after 5'end of short RNA*/ int unstr; /* length of unpaired region for output*/ int upmode; /* output mode for pf_unpaired and pf_up()*/ upmode = 0; unstr = 4; incr3=0; incr5=0; w=25; do_backtrack = 1; pf=1; /* partition function has to be calculated */ length1=length2=0; up_out=NULL; title=NULL; unstr_out=NULL; inter_out=NULL; my_contrib[0] = 'S'; my_contrib[1] = '\0'; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'w': /* -w maximal length of unstructured region */ if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &w); if (!r) usage(); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'o': upmode=1; /* output mode 0: non, 1:only pr_unpaired, 2: pr_unpaired + pr_up */ if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &upmode); if (r!=1) usage(); } break; case 'u': /* -u length of unstructured region in pr_unpaired output makes only sense in combination with -o1 or -o2 */ if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &unstr); if (!r) usage(); break; /* incr5 and incr3 are only for the longer (target) sequence */ /* increments w (length of the unpaired region) to incr5+w+incr3*/ /* the longer sequence is given in 5'(= position 1) to */ /* 3' (=position n) direction */ /* incr5 adds incr5 residues to the 5' end of w */ case '5': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr5); if (!r) usage(); break; /* incr3 adds incr3 residues to the 3' end of w */ case '3': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr3); if (!r) usage(); break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; case 'x': if(i==argc-1) usage(); r=sscanf(argv[++i], "%s", my_contrib); if (!r) usage(); break; default: usage(); } } if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ cut_point=-1; if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("Use '&' to connect 2 sequences that shall form a complex.\n"); printf("%s%s\n", scale1, scale2); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%51s", fname); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line == NULL) || (strcmp(line, "@") == 0)) break; tokenize(line,&string1,&string2); if(upmode != 0){ if(cut_point == -1 && upmode == 2) { nrerror("only one sequence - can not cofold one sequence!"); } } else { if(cut_point == -1){ upmode=1; } else { upmode=2; } } if(string1 != NULL) length1 = (int) strlen(string1); if(string2 != NULL) length2 = (int) strlen(string2); else length2=0; /* write longer seq in string1 and and shorter one in string2 */ if(length1 < length2) { length=length1; length1=length2; length2=length; temp=(char *) space(strlen(string1)+1); (void) sscanf(string1,"%s",temp); string1 = (char *) xrealloc (string1,sizeof(char)*length1+1); (void) sscanf(string2,"%s",string1); string2 = (char *) xrealloc(string2,sizeof(char)*length2+1); (void) sscanf(temp,"%s",string2); free(temp); } structure = (char *) space((unsigned) length1+1); if (fold_constrained) { cstruc = get_line(stdin); if (cstruc!=NULL) strncpy(structure, cstruc, length1); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length1; l++) { string1[l] = toupper(string1[l]); if (!noconv && string1[l] == 'T') string1[l] = 'U'; } for (l = 0; l < length2; l++) { string2[l] = toupper(string2[l]); if (!noconv && string2[l] == 'T') string2[l] = 'U'; } if (istty) printf("length1 = %d\n", length1); /* initialize_fold(length); */ update_fold_params(); printf("\n%s", string1); min_en = fold(string1, structure); if (istty) { printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); } else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); /* parse cml parameters for the filename*/ if(upmode > 0) { char wuadd[10]; up_out = (char*) space(sizeof(char)*53); /* create the name of the output file */ if(fname[0]!='\0' && up_out[0] =='\0' ){ if(strlen(fname)< 30){ strcpy(up_out, fname); } else { strncpy(up_out, fname,30); } } else if(fname[0]=='\0' && up_out[0] == '\0'){ char defaultn[10] = "RNA"; sprintf(up_out,"%s",defaultn); } sprintf(wuadd,"%d",w); strcat(up_out, "_w"); strcat(up_out, wuadd); strcat(up_out, "u"); sprintf(wuadd,"%d",unstr); strcat(up_out, wuadd); strcat(up_out, "_up.out"); printf("RNAup output in file: %s\n",up_out); /* create the title for the output file */ if (title == NULL) { char wuadd[10]; title = (char*) space(sizeof(char)*60); if(fname[0]!='\0'){ if(strlen(fname)< 30){ strcpy(title, fname); } else { strncpy(title, fname,30); } } else if (fname[0]=='\0'){ char defaultn[10]= "RNAup"; sprintf(title,"%s",defaultn); } sprintf(wuadd,"%d",unstr); strcat(title," u="); strcat(title, wuadd); sprintf(wuadd,"%d",w); strcat(title," w="); strcat(title, wuadd); sprintf(wuadd,"%d",length1); strcat(title," n="); strcat(title, wuadd); } } else { nrerror("no output format given: use [-o[1|2]] to select output format"); } if (pf) { if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = energy_of_struct(string1, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ if(upmode != 0){ int wplus; wplus=w+incr3+incr5; /* calculate prob. unstructured for the shorter seq */ if(upmode == 3) { min_en = fold(string2, structure); pf_scale = exp(-(sfact*min_en)/kT/length2); if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length2); if (cstruc!=NULL) strncpy(structure, cstruc, length2+1); energy = pf_fold(string2, structure); if(wplus > length2){ wplus = length2;} /* for the shorter seq */ unstr_short = pf_unstru(string2, structure, wplus); free_pf_unstru(); free_pf_arrays(); /* for arrays for pf_fold(...) */ } /* calculate prob. unstructured for the longer seq */ wplus=w+incr3+incr5; min_en = fold(string1, structure); pf_scale = exp(-(sfact*min_en)/kT/length1); if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length1); if (cstruc!=NULL) strncpy(structure, cstruc, length1+1); energy = pf_fold(string1, structure); unstr_out = pf_unstru(string1, structure, wplus); free_pf_unstru(); free_pf_arrays(); /* for arrays for pf_fold(...) */ /* calculate the interaction between the two sequences */ if(upmode > 1 && cut_point > -1){ inter_out = pf_interact(string1,string2,unstr_out,w, incr3, incr5); if(Up_plot(unstr_out,inter_out,length1,up_out,unstr,my_contrib)==0){ nrerror("Up_plot: no output values assigned"); } } else if(cut_point == -1 && upmode > 1) { /* no second seq given */ nrerror("only one sequence given - cannot cofold one sequence!"); } else { /* plot only the results for prob unstructured */ if(Up_plot(unstr_out,NULL,length1,up_out,unstr,my_contrib)==0){ nrerror("Up_plot: no output values assigned"); } } } else { nrerror("no output format given: use [-o[1|2]] to select output format"); } if (do_backtrack) { printf("%s", structure); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); energy = pf_fold(string1, structure); printf(" frequency of mfe structure in ensemble %g; " "ensemble diversity %-6.2f\n", exp((energy-min_en)/kT), mean_bp_dist(length1)); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); if (string1!=NULL) free(string1); if (string2!=NULL) free(string2); free(structure); if(up_out != NULL) free(up_out); up_out=NULL; if(title != NULL) free(title); title=NULL; if(upmode == 1) free_pf_two(unstr_out,NULL); if(upmode > 1) free_pf_two(unstr_out,inter_out); if(upmode == 3)free_pf_two(unstr_short,NULL); free_arrays(); /* for arrays for fold(...) */ } while (1); return 0; }
void main() { char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC", *struct1,* struct2,* xstruc; float e1, e2, tree_dist, string_dist, profile_dist, kT; Tree *T1, *T2; swString *S1, *S2; float *pf1, *pf2; FLT_OR_DBL *bppm; /* fold at 30C instead of the default 37C */ temperature = 30.; /* must be set *before* initializing */ /* allocate memory for structure and fold */ struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1)); e1 = fold(seq1, struct1); struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1)); e2 = fold(seq2, struct2); free_arrays(); /* free arrays used in fold() */ /* produce tree and string representations for comparison */ xstruc = expand_Full(struct1); T1 = make_tree(xstruc); S1 = Make_swString(xstruc); free(xstruc); xstruc = expand_Full(struct2); T2 = make_tree(xstruc); S2 = Make_swString(xstruc); free(xstruc); /* calculate tree edit distance and aligned structures with gaps */ edit_backtrack = 1; tree_dist = tree_edit_distance(T1, T2); free_tree(T1); free_tree(T2); unexpand_aligned_F(aligned_line); printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist); /* same thing using string edit (alignment) distance */ string_dist = string_edit_distance(S1, S2); free(S1); free(S2); printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n", aligned_line[0], e1, aligned_line[1], e2, string_dist); /* for longer sequences one should also set a scaling factor for partition function folding, e.g: */ kT = (temperature+273.15)*1.98717/1000.; /* kT in kcal/mol */ pf_scale = exp(-e1/kT/strlen(seq1)); /* calculate partition function and base pair probabilities */ e1 = pf_fold(seq1, struct1); /* get the base pair probability matrix for the previous run of pf_fold() */ bppm = export_bppm(); pf1 = Make_bp_profile_bppm(bppm, strlen(seq1)); e2 = pf_fold(seq2, struct2); /* get the base pair probability matrix for the previous run of pf_fold() */ bppm = export_bppm(); pf2 = Make_bp_profile_bppm(bppm, strlen(seq2)); free_pf_arrays(); /* free space allocated for pf_fold() */ profile_dist = profile_edit_distance(pf1, pf2); printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n", aligned_line[0], e1, aligned_line[1], e2, profile_dist); free_profile(pf1); free_profile(pf2); }
void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) { plist *pl, *pl1,*pl2; char fname[100]; char title[100]; char* ss; double MEAgamma, mea, mea_en; char* output; int i,j; static char timestamp[40]; const struct tm *tm; time_t now; ss = (char *) space((unsigned) length+1); memset(ss,'.',length); init_pf_fold(length); pf_fold_pb(seq, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); fprintf (stderr, "\nITERATION: %i\n", iteration); fprintf(stderr, "DISCREPANCY: %.4f\n", D); fprintf(stderr, "NORM: %.2f\n", norm); if (prev_D > -1.0) { fprintf(stderr, "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100); } fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations); for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) { pl = make_plist(length, 1e-4/(1+MEAgamma)); mea = MEA(pl, ss, MEAgamma); mea_en = energy_of_struct(seq, ss); fprintf(statsfile,"%s,%.2e;", ss, MEAgamma); free(pl); } fprintf(statsfile, "\t"); // Stochastic backtracking fprintf(stderr, "Sampling structures...\n"); if (sample_structure) { char* best_structure; char* curr_structure; double x; double curr_energy = 0.0; double min_energy = +1.0; int curr_distance = 0; int min_distance = 999999; best_structure = (char *) space((unsigned) length+1); for (i=1; i<=10000; i++) { curr_structure = pbacktrack_pb(seq); curr_energy = energy_of_struct(seq, curr_structure); curr_distance = 0.0; //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy); for (j = 1; j <= length; j++) { if (q_unpaired[j] > -0.5) { x = (curr_structure[j-1] == '.') ? 1.0 : 0.0; curr_distance += abs(x-q_unpaired[j]); } } if (curr_distance < min_distance) { min_distance = curr_distance; min_energy = curr_energy; strcpy(best_structure, curr_structure); } if (curr_distance == min_distance) { if (curr_energy < min_energy) { min_energy = curr_energy; strcpy(best_structure, curr_structure); } } //fprintf(stderr, "%i\n", curr_distance); free(curr_structure); } //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance); fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance); } else { fprintf(statsfile, "NA\tNA\tNA\t"); } for (i = 1; i <= length; i++) { fprintf(statsfile, "%.4f", epsilon[i]); if (!(i==length)) { fprintf(statsfile, ","); } } now = time ( NULL ); tm = localtime ( &now ); strftime ( timestamp, 40, "%Y-%m-%d %X", tm ); fprintf(statsfile, "\t%s\n", timestamp); /* Print dotplot only if not noPS is given and function call asks for it */ if (!noPS && printPS) { /* Print dotplot */ sprintf(fname,"%s/iteration%i.ps", psDir, iteration); pl1 = make_plist(length, 1e-5); if (struc != NULL) { pl2 = b2plist(struc); } else { pl2 = NULL; } sprintf(title,"Iteration %i, D = %.4f", iteration, D); (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title); } free_pf_arrays(); }
int main(int argc, char *argv[]) { struct RNAfold_args_info args_info; char *string, *input_string, *structure=NULL, *cstruc=NULL; char fname[80], ffname[80], gfname[80], *ParamFile=NULL; char *ns_bases=NULL, *c; int i, j, ii, jj, mu, length, l, sym, r, pf=0, noconv=0; unsigned int input_type; double energy, min_en, kT, sfact=1.07; int doMEA=0, circular = 0, N; char *pf_struc; double dist; plist *pl; FILE * filehandle; FILE * statsfile; char* line; double tau = 0.01; /* Variance of energy parameters */ double sigma = 0.01; /* Variance of experimental constraints */ double *gradient; /* Gradient for steepest descent search epsilon[i+1]= epsilon[i] - gradient * step_size */ double initial_step_size = 0.5; /* Initial step size for steepest descent search */ double step_size; double D; /* Discrepancy (i.e. value of objective function) for the current prediction */ int iteration, max_iteration = 2000; /* Current and maximum number of iterations after which algorithm stops */ double precision = 0.1; /* cutoff used for stop conditions */ double tolerance = 0.1; /* Parameter used by various GSL minimizers */ int method_id = 1; /* Method to use for minimization, 0 and 1 are custom steepest descent, the rest are GSL implementations (see below)*/ int initial_guess_method = 0; int sample_N = 1000; double *prev_epsilon; double *prev_gradient; double DD, prev_D, sum, norm; int status; double* gradient_numeric; double* gradient_numeric_gsl; /* Minimizer vars */ const gsl_multimin_fdfminimizer_type *T; gsl_multimin_fdfminimizer *minimizer; gsl_vector *minimizer_x; gsl_vector *minimizer_g; gsl_multimin_function_fdf minimizer_func; minimizer_pars_struct minimizer_pars; char *constraints; char outfile[256]; char constraints_file[256]; char epsilon_file[256]; FILE* fh; double last_non_nan_lnQ; pf_overflow = 0; pf_underflow = 0; dangles=2; do_backtrack = 1; string = NULL; noPS = 0; outfile[0]='\0'; epsilon_file[0]='\0'; strcpy(psDir, "dotplots"); if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* RNAbpfold specific options */ if (args_info.tau_given) tau = args_info.tau_arg; if (args_info.sigma_given) sigma = args_info.sigma_arg; if (args_info.precision_given) precision = args_info.precision_arg; if (args_info.step_given) initial_step_size = args_info.step_arg; if (args_info.maxN_given) max_iteration = args_info.maxN_arg; if (args_info.minimization_given) method_id = args_info.minimization_arg; if (args_info.init_given) initial_guess_method = args_info.init_arg; if (args_info.tolerance_given) tolerance = args_info.tolerance_arg; if (args_info.outfile_given) strcpy(outfile, args_info.outfile_arg); if (args_info.constraints_given) strcpy(constraints_file, args_info.constraints_arg); if (args_info.epsilon_given) strcpy(epsilon_file, args_info.epsilon_arg); if (args_info.sampleGradient_given) sample_conditionals=1; if (args_info.hybridGradient_given) { sample_conditionals=1; hybrid_conditionals=1; } if (args_info.numericalGradient_given) numerical=1; if (args_info.sampleStructure_given) sample_structure=1; if (args_info.psDir_given) strcpy(psDir, args_info.psDir_arg); if (args_info.sparsePS_given) sparsePS=args_info.sparsePS_arg; if (args_info.gridSearch_given) grid_search = 1; /* Generic RNAfold options */ if (args_info.temp_given) temperature = args_info.temp_arg; if (args_info.reference_given) fold_constrained=1; if (args_info.noTetra_given) tetra_loop=0; if (args_info.dangles_given) dangles = args_info.dangles_arg; if (args_info.noLP_given) noLonelyPairs = 1; if (args_info.noGU_given) noGU = 1; if (args_info.noClosingGU_given) no_closingGU = 1; if (args_info.noconv_given) noconv = 1; if (args_info.energyModel_given) energy_set = args_info.energyModel_arg; if (args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); if (args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); if (args_info.pfScale_given) sfact = args_info.pfScale_arg; if (args_info.noPS_given) noPS=1; /* Create postscript directory */ if (!noPS) { struct stat stat_p; if (stat (psDir, &stat_p) != 0) { if (mkdir(psDir, S_IRWXU|S_IROTH|S_IRGRP ) !=0) { fprintf(stderr, "WARNING: Could not create directory: %s", psDir); } } } if (ParamFile != NULL) { read_parameter_file(ParamFile); } if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } /*Read sequence*/ fname[0] = '\0'; while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER) { (void) sscanf(input_string, "%42s", fname); free(input_string); } length = (int) strlen(input_string); string = strdup(input_string); free(input_string); structure = (char *) space((unsigned) length+1); /* For testing purpose pass dot bracket structure of reference structure via -C */ if (fold_constrained) { input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT) { exit(1); } else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)) { cstruc = strdup(input_string); free(input_string); } else warn_user("-C was given but reference structure is missing"); } if(noconv) { str_RNA2RNA(string); } else { str_DNA2RNA(string); } /* Allocating space */ epsilon = (double *) space(sizeof(double)*(length+1)); exp_pert = (double **)space(sizeof(double *)*(length+1)); perturbations = (double **)space(sizeof(double *)*(length+1)); prev_epsilon = (double *) space(sizeof(double)*(length+1)); gradient = (double *) space(sizeof(double)*(length+1)); gradient_numeric = (double *) space(sizeof(double)*(length+1)); gradient_numeric_gsl = (double *) space(sizeof(double)*(length+1)); prev_gradient = (double *) space(sizeof(double)*(length+1)); q_unpaired = (double *) space(sizeof(double)*(length+1)); p_unpaired_cond = (double **)space(sizeof(double *)*(length+1)); p_unpaired_cond_sampled = (double **)space(sizeof(double *)*(length+1)); p_pp = (double **)space(sizeof(double *)*(length+1)); p_unpaired = (double *) space(sizeof(double)*(length+1)); p_unpaired_tmp = (double *) space(sizeof(double)*(length+1)); for (i=0; i <= length; i++) { epsilon[i] = gradient[i] = q_unpaired[i] = 0.0; p_unpaired_cond[i] = (double *) space(sizeof(double)*(length+1)); p_unpaired_cond_sampled[i] = (double *) space(sizeof(double)*(length+1)); p_pp[i] = (double *) space(sizeof(double)*(length+1)); exp_pert[i] = (double *) space(sizeof(double)*(length+1)); perturbations[i] = (double *) space(sizeof(double)*(length+1)); for (j=0; j <= length; j++) { p_pp[i][j]=p_unpaired_cond[i][j] = 0.0; p_unpaired_cond_sampled[i][j] = 0.0; } } /*** If file with perturbation vector epsilon is given we fold using this epsilon and are done ***/ if (args_info.epsilon_given) { plist *pl, *pl1,*pl2; filehandle = fopen (epsilon_file,"r"); if (filehandle == NULL) { nrerror("Could not open file with perturbation vector."); } i=1; while (1) { double t; line = get_line(filehandle); if (line == NULL) break; if (i>length) nrerror("Too many values in perturbation vector file."); if (sscanf(line, "%lf", &epsilon[i]) !=1) { nrerror("Error while reading perturbation vector file."); } i++; } if (i-1 != length) { nrerror("Too few values in perturbation vector file."); } init_pf_fold(length); pf_fold_pb(string, NULL); sprintf(fname,"%s/dot.ps", psDir); pl1 = make_plist(length, 1e-5); (void) PS_dot_plot_list_epsilon(string, fname, NULL, pl1, epsilon, ""); exit(0); } /*** Get constraints from reference structure or from external file ***/ /* Structure was given by -C */ if (fold_constrained) { for (i=0; i<length; i++) { if (cstruc[i] == '(' || cstruc[i] == ')') { q_unpaired[i+1] = 0.0; } else { q_unpaired[i+1] = 1.0; } } /*Read constraints from file*/ } else { filehandle = fopen (constraints_file,"r"); if (filehandle == NULL) { nrerror("No constraints given as dot bracket or wrong file name"); } i=1; while (1) { double t; line = get_line(filehandle); if (line == NULL) break; if (i>length) nrerror("Too many values in constraints.dat"); if (sscanf(line, "%lf", &q_unpaired[i]) !=1) { nrerror("Error while reading constraints.dat"); } i++; } if (i-1 != length) { nrerror("Too few values in constraints.dat"); } } /* Create file handle */ if (outfile[0] !='\0') { statsfile = fopen (outfile,"w"); } else { statsfile = fopen ("stats.dat","w"); } setvbuf(statsfile, NULL, _IONBF, 0); if (!grid_search) { fprintf(statsfile, "Iteration\tDiscrepancy\tNorm\tdfCount\tMEA\tSampled_structure\tSampled_energy\tSampled_distance\tEpsilon\ttimestamp\n"); } else { /* If we do a grid search we have a different output. */ fprintf(statsfile, "Dummy\tm\tb\tdummy\tMEA\tepsilon\n"); } if (statsfile == NULL) { nrerror("Could not open stats.dat for writing."); } fprintf(stderr, "tau^2 = %.4f; sigma^2 = %.4f; precision = %.4f; tolerance = %.4f; step-size: %.4f\n\n", tau, sigma, precision, tolerance, initial_step_size); st_back=1; min_en = fold(string, structure); (void) fflush(stdout); if (length>2000) free_arrays(); pf_struc = (char *) space((unsigned) length+1); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); /* Set up minimizer */ minimizer_x = gsl_vector_alloc (length+1); minimizer_g = gsl_vector_alloc (length+1); for (i=0; i <= length; i++) { epsilon[i] = 0.0; gsl_vector_set (minimizer_g, i, 0.0); gsl_vector_set (minimizer_x, i, epsilon[i]); } minimizer_pars.length=length; minimizer_pars.seq = string; minimizer_pars.tau=tau; minimizer_pars.sigma=sigma; minimizer_pars.kT=kT; minimizer_func.n = length+1; minimizer_func.f = calculate_f; minimizer_func.df = numerical ? calculate_df_numerically: calculate_df; minimizer_func.fdf = calculate_fdf; minimizer_func.params = &minimizer_pars; //min_en = fold_pb(string, structure); //fprintf(stderr, "%f", min_en); //exit(0); /* Calling test functions for debugging */ for (i=1; i <= length; i++) { if (i%2==0) { epsilon[i] = +0.2*i; } else { epsilon[i] = -0.2*i; } } //test_folding(string, length); /* //test_stochastic_backtracking(string, length); */ /* //test_gradient(minimizer_func, minimizer_pars); */ /* //test_gradient_sampling(minimizer_func, minimizer_pars); */ //exit(1); count_df_evaluations=0; /* Initial guess for epsilon */ if (initial_guess_method !=0 && initial_guess_method !=3) { /* Vars for inital guess methods */ double m,b; double* curr_epsilon; double* best_epsilon; double best_m, best_b, best_scale; double curr_D; double min_D = 999999999.0; double inc = +0.25; double cut; if (initial_guess_method == 1) fprintf(stderr, "Mathew's constant perturbations\n"); if (initial_guess_method == 2) fprintf(stderr, "Perturbations proportional to q-p\n"); curr_epsilon = (double *) space(sizeof(double)*(length+1)); best_epsilon = (double *) space(sizeof(double)*(length+1)); last_non_nan_lnQ = min_en; // Calculate p_unpaired for unperturbed state which we need later // for the proportinal method if (initial_guess_method == 2) { init_pf_fold(length); for (i=0; i <= length; i++) { epsilon[i] = 0.0; } pf_fold_pb(string, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired_tmp, length, 1); free_pf_arrays(); } /* We do the same grid search as in the Mathews paper Fig. 4*/ for (m=0.25; m <=7.0; m+=0.25) { // Weird way of writing this inner loop for the grid search. We // traverse the grid without big jumps in the parameters to make // sure that the updated scaling factor is accurate all the time. inc*=-1; for (b = inc < 0.0 ? 0.0 : -3.0; inc < 0.0 ? b >= -3.0 : b<= 0.0 ; b+=inc) { // calculate cut point with x-axis and skip parameter pairs // which give a cut point outside the range of // q_unpaired (0 to 1). They gave frequently overflows and the // idea is that we both want positive and negative perturbations cut = exp( (-1) * b / m ) - 1; fprintf(stderr, "\nm = %.2f, b = %.2f, cut=%.2f\n", m, b, cut); if (cut > 1.0 || cut < 0.01) { fprintf(stderr, "\nSkipping m = %.2f, b = %.2f\n", m, b); continue; } /* Mathew's constant perturbations */ if (initial_guess_method == 1) { for (i=0; i <= length; i++) { /* We add epsilon to unpaired regions (as opposed to paired regions as in the Mathews paper) so we multiply by -1; if missing data we set it to 0.0 */ if (q_unpaired[i] < -0.5) { curr_epsilon[i] = 0.0; } else { curr_epsilon[i] = (m *(log(q_unpaired[i]+1))+b) *(-1); } gsl_vector_set (minimizer_x, i, curr_epsilon[i]); } /* Perturbations proportional to q-p */ } else { for (i=0; i <= length; i++) { curr_epsilon[i] = (m *(log(q_unpaired[i]+1)-log(p_unpaired_tmp[i]+1))+ b ) * (-1); gsl_vector_set (minimizer_x, i, curr_epsilon[i]); } } // Repeat and adjust scaling factor until we get result without over-/underflows do { // First we use default scaling factor if (pf_underflow == 0 && pf_overflow == 0) { sfact = 1.070; } if (pf_underflow) { sfact *= 0.8; fprintf(stderr,"Underflow, adjusting sfact to %.4f\n", sfact ); } if (pf_overflow) { sfact *= 1.2; fprintf(stderr,"Overflow, adjusting sfact to %.4f\n", sfact ); } pf_scale = exp(-(sfact*last_non_nan_lnQ)/kT/length); //fprintf(stderr,"Scaling factor is now: %.4e\n", pf_scale); curr_D = calculate_f(minimizer_x, (void*)&minimizer_pars); if (!isnan(last_lnQ)) last_non_nan_lnQ = last_lnQ; // Give up when even extreme scaling does not give results // (for some reason I could not get rid of overflows even with high scaling factors) if (sfact < 0.1 || sfact > 2.0) break; } while (pf_underflow == 1 || pf_overflow == 1); // We have not given up so everything is ok now if (!(sfact < 0.1 || sfact > 2.0)) { if (curr_D < min_D) { min_D = curr_D; for (i=0; i <= length; i++) { best_epsilon[i] = curr_epsilon[i]; } best_m = m; best_b = b; best_scale = pf_scale; } /*If we are interested in the grid search we misuse the print_stats function and report m and b together with MEA*/ if (grid_search) { for (i=0; i <= length; i++) { epsilon[i] = curr_epsilon[i]; } print_stats(statsfile, string, cstruc, length, 0, 0, m, 0.0, b, 0); } fprintf(stderr, "curr D: %.2f, minimum D: %.2f\n", curr_D, min_D); // Adjust pf_scale with default scaling factor but lnQ from // previous step sfact = 1.070; pf_scale = exp(-(sfact*last_lnQ)/kT/length); } else { sfact = 1.070; fprintf(stderr, "Skipping m = %.2f, b = %.2f; did not get stable result.\n", m, b); } } // for b } // for m fprintf(stderr, "Minimum found: m=%.2f, b=%.2f: %.2f\n", best_m, best_b, min_D); for (i=0; i <= length; i++) { epsilon[i] = best_epsilon[i]; gsl_vector_set (minimizer_x, i, best_epsilon[i]); } pf_scale = best_scale; } if (initial_guess_method == 3) { srand((unsigned)time(0)); for (i=0; i <= length; i++) { double r = (double)rand()/(double)RAND_MAX * 4 - 2; epsilon[i] = r; gsl_vector_set (minimizer_x, i, epsilon[i]); } } /* If we just wanted a grid search we are done now. */ if (grid_search) { exit(0); } prev_D = calculate_f(minimizer_x, (void*)&minimizer_pars); print_stats(statsfile, string, cstruc, length, 0 , count_df_evaluations , prev_D, -1.0, 0.0,1); /* GSL minimization */ if (method_id !=0) { if (method_id > 2) { char name[100]; // Available algorithms // 3 gsl_multimin_fdfminimizer_conjugate_fr // 4 gsl_multimin_fdfminimizer_conjugate_pr // 5 gsl_multimin_fdfminimizer_vector_bfgs // 6 gsl_multimin_fdfminimizer_vector_bfgs2 // 7 gsl_multimin_fdfminimizer_steepest_descent // http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html switch (method_id) { case 2: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_fr, length+1); strcpy(name, "Fletcher-Reeves conjugate gradient"); break; case 3: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_pr, length+1); strcpy(name, "Polak-Ribiere conjugate gradient"); break; case 4: minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs, length+1); strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno"); break; case 5: minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs2, length+1); strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno (improved version)"); break; case 6: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_steepest_descent, length+1); strcpy(name, "Gradient descent (GSL implmementation)"); break; } fprintf(stderr, "Starting minimization via GSL implementation of %s...\n\n", name); // The last two parmeters are step size and tolerance (with // different meaning for different algorithms gsl_multimin_fdfminimizer_set (minimizer, &minimizer_func, minimizer_x, initial_step_size, tolerance); iteration = 1; do { status = gsl_multimin_fdfminimizer_iterate (minimizer); D = minimizer->f; norm = gsl_blas_dnrm2(minimizer->gradient); print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, D, prev_D, norm, iteration%sparsePS == 0); prev_D = D; if (status) { fprintf(stderr, "An unexpected error has occured in the iteration (status:%i)\n", status); break; } status = gsl_multimin_test_gradient (minimizer->gradient, precision); if (status == GSL_SUCCESS) fprintf(stderr, "Minimum found stopping.\n"); iteration++; } while (status == GSL_CONTINUE && iteration < max_iteration); gsl_multimin_fdfminimizer_free (minimizer); gsl_vector_free (minimizer_x); /* Custom implementation of steepest descent */ } else { if (method_id == 1) { fprintf(stderr, "Starting custom implemented steepest descent search...\n\n"); } else { fprintf(stderr, "Starting custom implemented steepest descent search with Barzilai Borwein step size...\n\n"); } iteration = 0; D = 0.0; while (iteration++ < max_iteration) { for (i=1; i <= length; i++) { gsl_vector_set (minimizer_x, i, epsilon[i]); } D = calculate_f(minimizer_x, (void*)&minimizer_pars); if (numerical) { calculate_df_numerically(minimizer_x, (void*)&minimizer_pars, minimizer_g); } else { calculate_df(minimizer_x, (void*)&minimizer_pars, minimizer_g); } for (i=1; i <= length; i++) { gradient[i] = gsl_vector_get (minimizer_g, i); } // Do line search fprintf(stderr, "\nLine search:\n"); // After the first iteration, use Barzilai-Borwain (1988) step size (currently turned off) if (iteration>1 && method_id==2) { double denominator=0.0; double numerator=0.0; for (i=1; i <= length; i++) { numerator += (epsilon[i]-prev_epsilon[i]) * (gradient[i]-prev_gradient[i]); denominator+=(gradient[i]-prev_gradient[i]) * (gradient[i]-prev_gradient[i]); } step_size = numerator / denominator; norm =1.0; } else { // Use step sized given by the user (normalize it first) step_size = initial_step_size / calculate_norm(gradient, length); } for (i=1; i <= length; i++) { prev_epsilon[i] = epsilon[i]; prev_gradient[i] = gradient[i]; } do { for (mu=1; mu <= length; mu++) { epsilon[mu] = prev_epsilon[mu] - step_size * gradient[mu]; } for (i=1; i <= length; i++) { gsl_vector_set (minimizer_x, i, epsilon[i]); } DD = calculate_f(minimizer_x, (void*)&minimizer_pars); if (step_size > 0.0001) { fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4f\n", D, DD, step_size); } else { fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4e\n", D, DD, step_size); } step_size /= 2; } while (step_size > 1e-12 && DD > D); norm = calculate_norm(gradient,length); if (DD > D) { fprintf(stderr, "Line search did not improve D in iteration %i. Stop.\n", iteration); if (hybrid_conditionals) { sample_conditionals=0; } else { break; } } print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, iteration%sparsePS == 0); if (norm<precision && iteration>1) { fprintf(stderr, "Minimum found stopping.\n"); break; } prev_D = DD; } } /* Force last dotplot to be printed */ print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, 1); } free(pf_struc); if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); RNAfold_cmdline_parser_free (&args_info); return 0; }
int main(int argc, char *argv[]){ struct RNAinverse_args_info args_info; int input_type; char *input_string, *start, *structure, *rstart, *str2, *line; char *ParamFile=NULL, *c, *ns_bases; int i,j, length, l, hd, sym; double energy=0., kT; int pf, mfe, istty; int repeat, found; do_backtrack = 0; pf = 0; mfe = 1; repeat = 0; input_type = 0; input_string = ns_bases = NULL; init_rand(); /* ############################################# # check the command line parameters ############################################# */ if(RNAinverse_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) tetra_loop=0; /* set dangle model */ if(args_info.dangles_given) dangles = args_info.dangles_arg; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) no_closingGU = 1; /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* alter the alphabet */ if(args_info.alphabet_given){ symbolset=args_info.alphabet_arg; /* symbolset should only have uppercase characters */ for (l = 0; l < (int)strlen(symbolset); l++) symbolset[l] = toupper(symbolset[l]); } /* set function for optimization */ if(args_info.function_given){ if(strlen(args_info.function_arg) > 2){ RNAinverse_cmdline_parser_print_help(); exit(EXIT_FAILURE); } else{ if((*args_info.function_arg == 'm') || (*(args_info.function_arg+1) == 'm')) mfe = 1; if((*args_info.function_arg == 'p') || (*(args_info.function_arg+1) == 'p')) pf = 1; } } /* set repeat */ if(args_info.repeat_given) repeat = args_info.repeat_arg; /* set final cost */ if(args_info.final_given) final_cost = args_info.final_arg; /* do we wannabe verbose */ if(args_info.verbose_given) inv_verbose = 1; /* free allocated memory of command line data structure */ RNAinverse_cmdline_parser_free (&args_info); kT = (temperature+273.15)*1.98717/1000.0; istty = (isatty(fileno(stdout))&&isatty(fileno(stdin))); if (ParamFile!=NULL) read_parameter_file(ParamFile); give_up = (repeat<0); do { /* ######################################################## # handle user input from 'stdin' ######################################################## */ if(istty) print_tty_input_seq_str("Input structure & start string\n" "(lower case letters for const positions) and 0 or empty line for random start string\n"); input_type = get_multi_input_line(&input_string, 0); /* we are waiting for a structure (i.e. something like a constraint) so we skip all sequences, fasta-headers and misc lines */ while(input_type & (VRNA_INPUT_SEQUENCE | VRNA_INPUT_MISC | VRNA_INPUT_FASTA_HEADER)){ if(!istty && (input_type & VRNA_INPUT_FASTA_HEADER)) printf(">%s\n", input_string); free(input_string); input_string = NULL; input_type = get_multi_input_line(&input_string, 0); } if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)) break; if(input_type & (VRNA_INPUT_CONSTRAINT)){ structure = (char *)space(sizeof(char) * (strlen(input_string) + 1)); (void)sscanf(input_string, "%s", structure); /* scanf gets rid of trailing junk */ length = (int)strlen(structure); free(input_string); input_string = NULL; input_type = get_multi_input_line(&input_string, VRNA_INPUT_NOSKIP_BLANK_LINES | VRNA_INPUT_NOSKIP_COMMENTS); } if(input_type & VRNA_INPUT_QUIT) break; start = (char *)space(sizeof(char) * (length+1)); /* now we assume to get a sequence (input_string may be empty as well) */ if(input_type & VRNA_INPUT_SEQUENCE){ (void)strncpy(start, input_string, length); start[length] = '\0'; free(input_string); input_string = NULL; } /* fallback to empty start sequence */ else start[0] = '\0'; /* ######################################################## # done with 'stdin' handling ######################################################## */ if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } str2 = (char *) space((unsigned)length+1); if (istty) printf("length = %d\n", length); if (repeat!=0) found = (repeat>0)? repeat : (-repeat); else found = 1; /* initialize_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */ rstart = (char *) space((unsigned)length+1); while(found>0) { char *string; string = (char *) space((unsigned)length+1); strcpy(string, start); for (i=0; i<length; i++) { /* lower case characters are kept fixed, any other character not in symbolset is replaced by a random character */ if (islower(string[i])) continue; if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL)) string[i]=symbolset[int_urn(0,strlen(symbolset)-1)]; } strcpy(rstart, string); /* remember start string */ if (mfe) { energy = inverse_fold(string, structure); if( (repeat>=0) || (energy<=0.0) ) { found--; hd = hamming(rstart, string); printf("%s %3d", string, hd); if (energy>0) { /* no solution found */ printf(" d= %g\n", energy); if(istty) { energy = fold(string,str2); printf("%s\n", str2); } } else printf("\n"); } } if (pf) { if (!(mfe && give_up && (energy>0))) { /* unless we gave up in the mfe part */ double prob, min_en, sfact=1.07; /* get a reasonable pf_scale */ min_en = fold(string,str2); pf_scale = exp(-(sfact*min_en)/kT/length); /* init_pf_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */ energy = inverse_pf_fold(string, structure); prob = exp(-energy/kT); hd = hamming(rstart, string); printf("%s %3d (%g)\n", string, hd, prob); free_pf_arrays(); } if (!mfe) found--; } (void) fflush(stdout); free(string); } free(rstart); free_arrays(); free(structure); free(str2); free(start); (void) fflush(stdout); } while (1); return 0; }
/* Calculate the gradient analytically */ void calculate_df (const gsl_vector *v, void *params, gsl_vector *df) { double D, sum; int ii,jj,i,j,mu, length, N; minimizer_pars_struct *pars = (minimizer_pars_struct *)params; char *constraints; int* unpaired_count; int** unpaired_count_cond; double q_tmp; double sigma_tmp; length = pars->length; count_df_evaluations++; fprintf(stderr, "=> Evaluating gradient (analytical, %s)...\n",sample_conditionals == 1 ? "sampled conditionals" : "exact conditionals"); constraints = (char *) space((unsigned) length+1); for (i=0; i <= length; i++) { epsilon[i] = gsl_vector_get(v, i); p_unpaired[i] = 0.0; for (j=0; j <= length; j++) { p_pp[i][j] = p_pp[j][i] = 0.0; p_unpaired_cond[i][j] = 0.0; p_unpaired_cond_sampled[i][j] = 0.0; } } init_pf_fold(length); pf_fold_pb(pars->seq, NULL); for (i=1; i<length; i++) { for (j=i+1; j<=length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); free_pf_arrays(); if (!sample_conditionals) { // Calculate conditional probabilities fold_constrained=1; for (ii = 1; ii <= length; ii++) { // Set constraints strings like // x............ // .x........... // ..x.......... memset(constraints,'.',length); constraints[ii-1]='x'; fprintf(stderr, "."); init_pf_fold(length); pf_fold_pb(pars->seq, constraints); for (i=1; i<length; i++) { for (j=i+1; j<=length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired_cond[ii], length, 1); free_pf_arrays(); } fprintf(stderr, "\n"); fold_constrained = 0; // Sample gradient with stochastic backtracking } else { unpaired_count = (int *) space(sizeof(int)*(length+1)); unpaired_count_cond = (int **)space(sizeof(int *)*(length+1)); for (i=0; i <= length; i++) { unpaired_count[i] = 0; unpaired_count_cond[i] = (int *) space(sizeof(int)*(length+1)); for (j=0; j <= length; j++) { unpaired_count_cond[i][j] = 0; } } fold_constrained = 0; init_pf_fold(length); pf_fold_pb(pars->seq, NULL); N=10000; for (i=1; i<=N; i++) { char *s; s = pbacktrack_pb(pars->seq); for (ii = 1; ii <= length; ii++) { if (s[ii-1]=='.') { unpaired_count[ii]++; for (jj = 1; jj <= length; jj++) { if (s[jj-1]=='.') { unpaired_count_cond[ii][jj]++; } } } } free(s); } for (i = 1; i <= length; i++) { for (ii = 1; ii <= length; ii++) { if (unpaired_count_cond[i][ii] > 0) { p_unpaired_cond_sampled[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i]; p_unpaired_cond[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i]; } else { p_unpaired_cond_sampled[i][ii]= 0.0; p_unpaired_cond[i][ii]= 0.0; } } } } for (mu=1; mu <= length; mu++) { sum = 0.0; for (i=1; i <= length; i++) { // Comments on handling missing data see the corresponding code in calcuate_f if (q_unpaired[i] < -0.5) { sigma_tmp = 10000; q_tmp = 0.5; } else { sigma_tmp = pars->sigma; q_tmp = q_unpaired[i]; } sum += (1 / sigma_tmp) * p_unpaired[i] * ( p_unpaired[i] - q_tmp ) * ( p_unpaired[mu] - p_unpaired_cond[i][mu] ); } gsl_vector_set(df, mu, (2 * epsilon[mu] /pars->tau ) + (2 / pars->kT * sum)); } }
int main(int argc, char *argv[]) { char *string, *line; char *structure=NULL, *cstruc=NULL; char fname[13], ffname[20], gfname[20]; char *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length, l, sym, r; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; do_backtrack = 1; string=NULL; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'p': pf=1; if (argv[i][2]!='\0') (void) sscanf(argv[i]+2, "%d", &do_backtrack); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-noPS")==0) noPS=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'c': if ( strcmp(argv[i], "-circ")==0) circ=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; default: usage(); } } if (circ && noLonelyPairs) fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("%s%s\n", scale1, scale2); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%12s", fname); printf("%s\n", line); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line ==NULL) || (strcmp(line, "@") == 0)) break; string = (char *) space(strlen(line)+1); (void) sscanf(line,"%s",string); free(line); length = (int) strlen(string); structure = (char *) space((unsigned) length+1); if (fold_constrained) { cstruc = get_line(stdin); if (cstruc!=NULL) strncpy(structure, cstruc, length); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } if (istty) printf("length = %d\n", length); /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); printf("%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ss.ps"); strcpy(gfname, fname); strcat(gfname, "_ss.g"); } else { strcpy(ffname, "rna.ps"); strcpy(gfname, "rna.g"); } if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, ffname); else fprintf(stderr,"INFO: structure too long, not doing xy_plot\n"); } if (length>2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { printf("%s", pf_struc); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent); printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, ffname, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp2.ps"); } else strcpy(ffname, "dot2.ps"); PS_dot_plot_list(string, ffname, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } printf(" frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) printf("ensemble diversity %-6.2f", mean_bp_dist(length)); printf("\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); } while (1); return 0; }
int main(int argc, char *argv[]){ struct RNA2Dfold_args_info args_info; unsigned int input_type; char *string, *input_string, *orig_sequence; char *mfe_structure=NULL, *structure1=NULL, *structure2=NULL, *reference_struc1=NULL, *reference_struc2=NULL; char *ParamFile=NULL; int i, j, length, l; double min_en; double kT, sfact=1.07; int pf=0,istty; int noconv=0; int circ=0; int maxDistance1 = -1; int maxDistance2 = -1; int do_backtrack = 1; int stBT = 0; int nstBT = 0; string=NULL; dangles = 2; struct nbhoods *neighborhoods = NULL; struct nbhoods *neighborhoods_cur = NULL; string = input_string = orig_sequence = NULL; /* ############################################# # check the command line prameters ############################################# */ if(RNA2Dfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* max distance to 1st reference structure */ if(args_info.maxDist1_given) maxDistance1 = args_info.maxDist1_arg; /* max distance to 2nd reference structure */ if(args_info.maxDist2_given) maxDistance2 = args_info.maxDist2_arg; /* compute partition function and boltzmann probabilities */ if(args_info.partfunc_given) pf = 1; /* do stachastic backtracking */ if(args_info.stochBT_given){ pf = 1; stBT = 1; nstBT = args_info.stochBT_arg; } if(args_info.noTetra_given) tetra_loop=0; /* assume RNA sequence to be circular */ if(args_info.circ_given) circ=1; /* dangle options */ if(args_info.dangles_given) dangles=args_info.dangles_arg; /* set number of threads for parallel computation */ if(args_info.numThreads_given) #ifdef _OPENMP omp_set_num_threads(args_info.numThreads_arg); #else nrerror("\'j\' option is available only if compiled with OpenMP support!"); #endif /* get energy parameter file name */ if(args_info.parameterFile_given) ParamFile = strdup(args_info.parameterFile_arg); /* do not allow GU pairs ? */ if(args_info.noGU_given) noGU = 1; /* do not allow GU pairs at the end of helices? */ if(args_info.noClosingGU_given) no_closingGU = 1; /* pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* do not backtrack structures ? */ if(args_info.noBT_given) do_backtrack = 0; for (i = 0; i < args_info.neighborhood_given; i++){ int kappa, lambda; kappa = lambda = 0; if(sscanf(args_info.neighborhood_arg[i], "%d:%d", &kappa, &lambda) == 2); if ((kappa>-2) && (lambda>-2)){ if(neighborhoods_cur != NULL){ neighborhoods_cur->next = (nbhoods *)space(sizeof(nbhoods)); neighborhoods_cur = neighborhoods_cur->next; } else{ neighborhoods = (nbhoods *)space(sizeof(nbhoods)); neighborhoods_cur = neighborhoods; } neighborhoods_cur->k = kappa; neighborhoods_cur->l = lambda; neighborhoods_cur->next = NULL; } } /* free allocated memory of command line data structure */ RNA2Dfold_cmdline_parser_free (&args_info); /* ############################################# # begin actual program code ############################################# */ if (ParamFile != NULL) read_parameter_file(ParamFile); istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* ############################################# # main loop, continue until end of file ############################################# */ do { if (istty) print_tty_input_seq_str("Input strings\n1st line: sequence (upper or lower case)\n2nd + 3rd line: reference structures (dot bracket notation)\n@ to quit\n"); while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){ printf(">%s\n", input_string); /* print fasta header if available */ free(input_string); } /* break on any error, EOF or quit request */ if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;} /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */ else{ length = (int) strlen(input_string); string = strdup(input_string); free(input_string); } mfe_structure = (char *) space((unsigned) length+1); structure1 = (char *) space((unsigned) length+1); structure2 = (char *) space((unsigned) length+1); input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ break;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ reference_struc1 = strdup(input_string); free(input_string); if(strlen(reference_struc1) != length) nrerror("sequence and 1st reference structure have unequal length"); } else nrerror("1st reference structure missing\n"); strncpy(structure1, reference_struc1, length); input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ break;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ reference_struc2 = strdup(input_string); free(input_string); if(strlen(reference_struc2) != length) nrerror("sequence and 2nd reference structure have unequal length"); } else nrerror("2nd reference structure missing\n"); strncpy(structure2, reference_struc2, length); /* convert DNA alphabet to RNA if not explicitely switched off */ if(!noconv) str_DNA2RNA(string); /* store case-unmodified sequence */ orig_sequence = strdup(string); /* convert sequence to uppercase letters only */ str_uppercase(string); if (istty) printf("length = %d\n", length); min_en = (circ) ? circfold(string, mfe_structure) : fold(string, mfe_structure); printf("%s\n%s", orig_sequence, mfe_structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); printf("%s (%6.2f) <ref 1>\n", structure1, (circ) ? energy_of_circ_structure(string, structure1, 0) : energy_of_structure(string,structure1, 0)); printf("%s (%6.2f) <ref 2>\n", structure2, (circ) ? energy_of_circ_structure(string, structure2, 0) : energy_of_structure(string,structure2, 0)); /* get all variables need for the folding process (some memory will be preallocated here too) */ TwoDfold_vars *mfe_vars = get_TwoDfold_variables(string, structure1, structure2, circ); mfe_vars->do_backtrack = do_backtrack; TwoDfold_solution *mfe_s = TwoDfoldList(mfe_vars, maxDistance1, maxDistance2); if(!pf){ #ifdef COUNT_STATES printf("k\tl\tn\tMFE\tMFE-structure\n"); for(i = 0; mfe_s[i].k != INF; i++){ printf("%d\t%d\t%lu\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_vars->N_F5[length][mfe_s[i].k][mfe_s[i].l/2], mfe_s[i].en, mfe_s[i].s); if(mfe_s[i].s) free(mfe_s[i].s); } free(mfe_s); #else printf("k\tl\tMFE\tMFE-structure\n"); for(i = 0; mfe_s[i].k != INF; i++){ printf("%d\t%d\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_s[i].en, mfe_s[i].s); if(mfe_s[i].s) free(mfe_s[i].s); } free(mfe_s); #endif } if(pf){ int maxD1 = (int) mfe_vars->maxD1; int maxD2 = (int) mfe_vars->maxD2; float mmfe = INF; double Q; for(i = 0; mfe_s[i].k != INF; i++){ if(mmfe > mfe_s[i].en) mmfe = mfe_s[i].en; } kT = (temperature+K0)*GASCONST/1000.0; /* in Kcal */ pf_scale = exp(-(sfact*mmfe)/kT/length); if (length>2000) fprintf(stdout, "scaling factor %f\n", pf_scale); /* get all variables need for the folding process (some memory will be preallocated there too) */ //TwoDpfold_vars *q_vars = get_TwoDpfold_variables_from_MFE(mfe_vars); /* we dont need the mfe vars and arrays anymore, so we can savely free their occupying memory */ destroy_TwoDfold_variables(mfe_vars); TwoDpfold_vars *q_vars = get_TwoDpfold_variables(string, structure1, structure2, circ); TwoDpfold_solution *pf_s = TwoDpfoldList(q_vars, maxD1, maxD2); Q = 0.; for(i = 0; pf_s[i].k != INF; i++){ Q += pf_s[i].q; } double fee = (-log(Q)-length*log(pf_scale))*kT; if(!stBT){ printf("free energy of ensemble = %6.2f kcal/mol\n",fee); printf("k\tl\tP(neighborhood)\tP(MFE in neighborhood)\tP(MFE in ensemble)\tMFE\tE_gibbs\tMFE-structure\n"); for(i=0; pf_s[i].k != INF;i++){ float free_energy = (-log((float)pf_s[i].q)-length*log(pf_scale))*kT; if((pf_s[i].k != mfe_s[i].k) || (pf_s[i].l != mfe_s[i].l)) nrerror("This should never happen!"); fprintf(stdout, "%d\t%d\t%2.8f\t%2.8f\t%2.8f\t%6.2f\t%6.2f\t%s\n", pf_s[i].k, pf_s[i].l, (float)(pf_s[i].q)/(float)Q, exp((free_energy-mfe_s[i].en)/kT), exp((fee-mfe_s[i].en)/kT), mfe_s[i].en, free_energy, mfe_s[i].s); } } else{ init_rand(); if(neighborhoods != NULL){ nbhoods *tmp, *tmp2; for(tmp = neighborhoods; tmp != NULL; tmp = tmp->next){ int k,l; k = tmp->k; l = tmp->l; for(i = 0; i < nstBT; i++){ char *s = TwoDpfold_pbacktrack(q_vars, k, l); printf("%d\t%d\t%s\t%6.2f\n", k, l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0)); } } } else{ for(i=0; pf_s[i].k != INF;i++){ for(l = 0; l < nstBT; l++){ char *s = TwoDpfold_pbacktrack(q_vars, pf_s[i].k, pf_s[i].l); printf("%d\t%d\t%s\t%6.2f\n", pf_s[i].k, pf_s[i].l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0)); } } } } free_pf_arrays(); for(i=0; mfe_s[i].k != INF;i++){ if(mfe_s[i].s) free(mfe_s[i].s); } free(pf_s); free(mfe_s); /* destroy the q_vars */ destroy_TwoDpfold_variables(q_vars); } else destroy_TwoDfold_variables(mfe_vars); free_arrays(); free(string); free(orig_sequence); free(mfe_structure); free(structure1); free(structure2); free(reference_struc1); free(reference_struc2); string = orig_sequence = mfe_structure = NULL; } while (1); return 0; }
int main(int argc, char *argv[]) { float *T[MAXSEQ]; int i,j, istty, n=0; int type, length, taxa_list=0; float dist; FILE *somewhere=NULL; char *structure; char *line=NULL, fname[FILENAME_MAX_LENGTH], *list_title=NULL; plist *pr_pl, *mfe_pl; pr_pl = mfe_pl = NULL; command_line(argc, argv); if((outfile[0]=='\0')&&(task=='m')&&(edit_backtrack)) strcpy(outfile,"backtrack.file"); if (outfile[0]!='\0') somewhere = fopen(outfile,"w"); if (somewhere==NULL) somewhere = stdout; istty = (isatty(fileno(stdout))&&isatty(fileno(stdin))); while (1) { if ((istty)&&(n==0)) { printf("\nInput sequence; @ to quit\n"); printf("%s\n", ruler); } type = 0; do { /* get sequence to fold */ if (line!=NULL) free(line); *fname='\0'; if ((line=get_line(stdin))==NULL) {type = 999; break;} if (line[0]=='@') type = 999; if (line[0]=='*') { if (taxa_list==0) { if (task=='m') taxa_list=1; printf("%s\n", line); type = 0; } else { list_title = strdup(line); type = 888; } } if (line[0]=='>') { if (sscanf(line,">%" XSTR(FILENAME_ID_LENGTH) "s", fname)!=0) strcat(fname, "_dp.ps"); if (taxa_list) printf("%d : %s\n", n+1, line+1); else printf("%s\n",line); type = 0; } if (isalpha(line[0])) { char *cp; cp =strchr(line,' '); if (cp) *cp='\0'; type = 1; } } while(type==0); if( (task == 'm')&&(type>800) ) { if (taxa_list) printf("* END of taxa list\n"); printf("> p %d (pdist)\n",n); for (i=1; i<n; i++) { for (j=0; j<i; j++) { printf("%g ",profile_edit_distance(T[i], T[j])); if(edit_backtrack) fprintf(somewhere,"> %d %d\n",i+1,j+1); print_aligned_lines(somewhere); } printf("\n"); } if (type==888) { /* do another distance matrix */ n = 0; printf("%s\n", list_title); free(list_title); } } if(type>800) { for (i=0; i<n; i++) free_profile(T[i]); if (type == 888) continue; if (outfile[0]!='\0') (void) fclose(somewhere); if (line!= NULL) free(line); return 0; /* finito */ } length = (int) strlen(line); for (i=0; i<length; i++) { line[i]=toupper(line[i]); if (!noconv && line[i] == 'T') line[i] = 'U'; } /* init_pf_fold(length); <- obsolete */ structure = (char *) space((length+1)*sizeof(char)); (void) pf_fold(line,structure); if (*fname=='\0') sprintf(fname, "%d_dp.ps", n+1); /* PS_dot_plot(line, fname); <- NOT THREADSAFE and obsolete function! */ /* get pairlist of probability matrix */ assign_plist_from_pr(&pr_pl, pr, length, 1e-5); /* no previous mfe call thus no mfe structure information known */ mfe_pl = (plist *)space(sizeof(plist)); mfe_pl[0].i = mfe_pl[0].j = 0; /* call threadsafe dot plot printing function */ PS_dot_plot_list(line, fname, pr_pl, mfe_pl, ""); T[n] = Make_bp_profile_bppm(pr, length); if((istty)&&(task=='m')) printf("%s\n",structure); free(structure); free(mfe_pl); free(pr_pl); free_pf_arrays(); n++; switch (task) { case 'p' : if (n==2) { dist = profile_edit_distance(T[0],T[1]); printf("%g\n",dist); print_aligned_lines(somewhere); free_profile(T[0]); free_profile(T[1]); n=0; } break; case 'f' : if (n>1) { dist = profile_edit_distance(T[1], T[0]); printf("%g\n",dist); print_aligned_lines(somewhere); free_profile(T[1]); n=1; } break; case 'c' : if (n>1) { dist = profile_edit_distance(T[1], T[0]); printf("%g\n",dist); print_aligned_lines(somewhere); free_profile(T[0]); T[0] = T[1]; n=1; } break; case 'm' : break; default : nrerror("This can't happen."); } /* END switch task */ (void) fflush(stdout); } /* END while */ if (line !=NULL) free(line); return 0; }