int main(int argc, char *argv[]) { struct RNAfold_args_info args_info; char *string, *input_string, *structure=NULL, *cstruc=NULL; char fname[80], ffname[80], gfname[80], *ParamFile=NULL; char *ns_bases=NULL, *c; int i, j, ii, jj, mu, length, l, sym, r, pf=0, noconv=0; unsigned int input_type; double energy, min_en, kT, sfact=1.07; int doMEA=0, circular = 0, N; char *pf_struc; double dist; plist *pl; FILE * filehandle; FILE * statsfile; char* line; double tau = 0.01; /* Variance of energy parameters */ double sigma = 0.01; /* Variance of experimental constraints */ double *gradient; /* Gradient for steepest descent search epsilon[i+1]= epsilon[i] - gradient * step_size */ double initial_step_size = 0.5; /* Initial step size for steepest descent search */ double step_size; double D; /* Discrepancy (i.e. value of objective function) for the current prediction */ int iteration, max_iteration = 2000; /* Current and maximum number of iterations after which algorithm stops */ double precision = 0.1; /* cutoff used for stop conditions */ double tolerance = 0.1; /* Parameter used by various GSL minimizers */ int method_id = 1; /* Method to use for minimization, 0 and 1 are custom steepest descent, the rest are GSL implementations (see below)*/ int initial_guess_method = 0; int sample_N = 1000; double *prev_epsilon; double *prev_gradient; double DD, prev_D, sum, norm; int status; double* gradient_numeric; double* gradient_numeric_gsl; /* Minimizer vars */ const gsl_multimin_fdfminimizer_type *T; gsl_multimin_fdfminimizer *minimizer; gsl_vector *minimizer_x; gsl_vector *minimizer_g; gsl_multimin_function_fdf minimizer_func; minimizer_pars_struct minimizer_pars; char *constraints; char outfile[256]; char constraints_file[256]; char epsilon_file[256]; FILE* fh; double last_non_nan_lnQ; pf_overflow = 0; pf_underflow = 0; dangles=2; do_backtrack = 1; string = NULL; noPS = 0; outfile[0]='\0'; epsilon_file[0]='\0'; strcpy(psDir, "dotplots"); if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* RNAbpfold specific options */ if (args_info.tau_given) tau = args_info.tau_arg; if (args_info.sigma_given) sigma = args_info.sigma_arg; if (args_info.precision_given) precision = args_info.precision_arg; if (args_info.step_given) initial_step_size = args_info.step_arg; if (args_info.maxN_given) max_iteration = args_info.maxN_arg; if (args_info.minimization_given) method_id = args_info.minimization_arg; if (args_info.init_given) initial_guess_method = args_info.init_arg; if (args_info.tolerance_given) tolerance = args_info.tolerance_arg; if (args_info.outfile_given) strcpy(outfile, args_info.outfile_arg); if (args_info.constraints_given) strcpy(constraints_file, args_info.constraints_arg); if (args_info.epsilon_given) strcpy(epsilon_file, args_info.epsilon_arg); if (args_info.sampleGradient_given) sample_conditionals=1; if (args_info.hybridGradient_given) { sample_conditionals=1; hybrid_conditionals=1; } if (args_info.numericalGradient_given) numerical=1; if (args_info.sampleStructure_given) sample_structure=1; if (args_info.psDir_given) strcpy(psDir, args_info.psDir_arg); if (args_info.sparsePS_given) sparsePS=args_info.sparsePS_arg; if (args_info.gridSearch_given) grid_search = 1; /* Generic RNAfold options */ if (args_info.temp_given) temperature = args_info.temp_arg; if (args_info.reference_given) fold_constrained=1; if (args_info.noTetra_given) tetra_loop=0; if (args_info.dangles_given) dangles = args_info.dangles_arg; if (args_info.noLP_given) noLonelyPairs = 1; if (args_info.noGU_given) noGU = 1; if (args_info.noClosingGU_given) no_closingGU = 1; if (args_info.noconv_given) noconv = 1; if (args_info.energyModel_given) energy_set = args_info.energyModel_arg; if (args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); if (args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); if (args_info.pfScale_given) sfact = args_info.pfScale_arg; if (args_info.noPS_given) noPS=1; /* Create postscript directory */ if (!noPS) { struct stat stat_p; if (stat (psDir, &stat_p) != 0) { if (mkdir(psDir, S_IRWXU|S_IROTH|S_IRGRP ) !=0) { fprintf(stderr, "WARNING: Could not create directory: %s", psDir); } } } if (ParamFile != NULL) { read_parameter_file(ParamFile); } if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } /*Read sequence*/ fname[0] = '\0'; while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER) { (void) sscanf(input_string, "%42s", fname); free(input_string); } length = (int) strlen(input_string); string = strdup(input_string); free(input_string); structure = (char *) space((unsigned) length+1); /* For testing purpose pass dot bracket structure of reference structure via -C */ if (fold_constrained) { input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT) { exit(1); } else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)) { cstruc = strdup(input_string); free(input_string); } else warn_user("-C was given but reference structure is missing"); } if(noconv) { str_RNA2RNA(string); } else { str_DNA2RNA(string); } /* Allocating space */ epsilon = (double *) space(sizeof(double)*(length+1)); exp_pert = (double **)space(sizeof(double *)*(length+1)); perturbations = (double **)space(sizeof(double *)*(length+1)); prev_epsilon = (double *) space(sizeof(double)*(length+1)); gradient = (double *) space(sizeof(double)*(length+1)); gradient_numeric = (double *) space(sizeof(double)*(length+1)); gradient_numeric_gsl = (double *) space(sizeof(double)*(length+1)); prev_gradient = (double *) space(sizeof(double)*(length+1)); q_unpaired = (double *) space(sizeof(double)*(length+1)); p_unpaired_cond = (double **)space(sizeof(double *)*(length+1)); p_unpaired_cond_sampled = (double **)space(sizeof(double *)*(length+1)); p_pp = (double **)space(sizeof(double *)*(length+1)); p_unpaired = (double *) space(sizeof(double)*(length+1)); p_unpaired_tmp = (double *) space(sizeof(double)*(length+1)); for (i=0; i <= length; i++) { epsilon[i] = gradient[i] = q_unpaired[i] = 0.0; p_unpaired_cond[i] = (double *) space(sizeof(double)*(length+1)); p_unpaired_cond_sampled[i] = (double *) space(sizeof(double)*(length+1)); p_pp[i] = (double *) space(sizeof(double)*(length+1)); exp_pert[i] = (double *) space(sizeof(double)*(length+1)); perturbations[i] = (double *) space(sizeof(double)*(length+1)); for (j=0; j <= length; j++) { p_pp[i][j]=p_unpaired_cond[i][j] = 0.0; p_unpaired_cond_sampled[i][j] = 0.0; } } /*** If file with perturbation vector epsilon is given we fold using this epsilon and are done ***/ if (args_info.epsilon_given) { plist *pl, *pl1,*pl2; filehandle = fopen (epsilon_file,"r"); if (filehandle == NULL) { nrerror("Could not open file with perturbation vector."); } i=1; while (1) { double t; line = get_line(filehandle); if (line == NULL) break; if (i>length) nrerror("Too many values in perturbation vector file."); if (sscanf(line, "%lf", &epsilon[i]) !=1) { nrerror("Error while reading perturbation vector file."); } i++; } if (i-1 != length) { nrerror("Too few values in perturbation vector file."); } init_pf_fold(length); pf_fold_pb(string, NULL); sprintf(fname,"%s/dot.ps", psDir); pl1 = make_plist(length, 1e-5); (void) PS_dot_plot_list_epsilon(string, fname, NULL, pl1, epsilon, ""); exit(0); } /*** Get constraints from reference structure or from external file ***/ /* Structure was given by -C */ if (fold_constrained) { for (i=0; i<length; i++) { if (cstruc[i] == '(' || cstruc[i] == ')') { q_unpaired[i+1] = 0.0; } else { q_unpaired[i+1] = 1.0; } } /*Read constraints from file*/ } else { filehandle = fopen (constraints_file,"r"); if (filehandle == NULL) { nrerror("No constraints given as dot bracket or wrong file name"); } i=1; while (1) { double t; line = get_line(filehandle); if (line == NULL) break; if (i>length) nrerror("Too many values in constraints.dat"); if (sscanf(line, "%lf", &q_unpaired[i]) !=1) { nrerror("Error while reading constraints.dat"); } i++; } if (i-1 != length) { nrerror("Too few values in constraints.dat"); } } /* Create file handle */ if (outfile[0] !='\0') { statsfile = fopen (outfile,"w"); } else { statsfile = fopen ("stats.dat","w"); } setvbuf(statsfile, NULL, _IONBF, 0); if (!grid_search) { fprintf(statsfile, "Iteration\tDiscrepancy\tNorm\tdfCount\tMEA\tSampled_structure\tSampled_energy\tSampled_distance\tEpsilon\ttimestamp\n"); } else { /* If we do a grid search we have a different output. */ fprintf(statsfile, "Dummy\tm\tb\tdummy\tMEA\tepsilon\n"); } if (statsfile == NULL) { nrerror("Could not open stats.dat for writing."); } fprintf(stderr, "tau^2 = %.4f; sigma^2 = %.4f; precision = %.4f; tolerance = %.4f; step-size: %.4f\n\n", tau, sigma, precision, tolerance, initial_step_size); st_back=1; min_en = fold(string, structure); (void) fflush(stdout); if (length>2000) free_arrays(); pf_struc = (char *) space((unsigned) length+1); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); /* Set up minimizer */ minimizer_x = gsl_vector_alloc (length+1); minimizer_g = gsl_vector_alloc (length+1); for (i=0; i <= length; i++) { epsilon[i] = 0.0; gsl_vector_set (minimizer_g, i, 0.0); gsl_vector_set (minimizer_x, i, epsilon[i]); } minimizer_pars.length=length; minimizer_pars.seq = string; minimizer_pars.tau=tau; minimizer_pars.sigma=sigma; minimizer_pars.kT=kT; minimizer_func.n = length+1; minimizer_func.f = calculate_f; minimizer_func.df = numerical ? calculate_df_numerically: calculate_df; minimizer_func.fdf = calculate_fdf; minimizer_func.params = &minimizer_pars; //min_en = fold_pb(string, structure); //fprintf(stderr, "%f", min_en); //exit(0); /* Calling test functions for debugging */ for (i=1; i <= length; i++) { if (i%2==0) { epsilon[i] = +0.2*i; } else { epsilon[i] = -0.2*i; } } //test_folding(string, length); /* //test_stochastic_backtracking(string, length); */ /* //test_gradient(minimizer_func, minimizer_pars); */ /* //test_gradient_sampling(minimizer_func, minimizer_pars); */ //exit(1); count_df_evaluations=0; /* Initial guess for epsilon */ if (initial_guess_method !=0 && initial_guess_method !=3) { /* Vars for inital guess methods */ double m,b; double* curr_epsilon; double* best_epsilon; double best_m, best_b, best_scale; double curr_D; double min_D = 999999999.0; double inc = +0.25; double cut; if (initial_guess_method == 1) fprintf(stderr, "Mathew's constant perturbations\n"); if (initial_guess_method == 2) fprintf(stderr, "Perturbations proportional to q-p\n"); curr_epsilon = (double *) space(sizeof(double)*(length+1)); best_epsilon = (double *) space(sizeof(double)*(length+1)); last_non_nan_lnQ = min_en; // Calculate p_unpaired for unperturbed state which we need later // for the proportinal method if (initial_guess_method == 2) { init_pf_fold(length); for (i=0; i <= length; i++) { epsilon[i] = 0.0; } pf_fold_pb(string, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired_tmp, length, 1); free_pf_arrays(); } /* We do the same grid search as in the Mathews paper Fig. 4*/ for (m=0.25; m <=7.0; m+=0.25) { // Weird way of writing this inner loop for the grid search. We // traverse the grid without big jumps in the parameters to make // sure that the updated scaling factor is accurate all the time. inc*=-1; for (b = inc < 0.0 ? 0.0 : -3.0; inc < 0.0 ? b >= -3.0 : b<= 0.0 ; b+=inc) { // calculate cut point with x-axis and skip parameter pairs // which give a cut point outside the range of // q_unpaired (0 to 1). They gave frequently overflows and the // idea is that we both want positive and negative perturbations cut = exp( (-1) * b / m ) - 1; fprintf(stderr, "\nm = %.2f, b = %.2f, cut=%.2f\n", m, b, cut); if (cut > 1.0 || cut < 0.01) { fprintf(stderr, "\nSkipping m = %.2f, b = %.2f\n", m, b); continue; } /* Mathew's constant perturbations */ if (initial_guess_method == 1) { for (i=0; i <= length; i++) { /* We add epsilon to unpaired regions (as opposed to paired regions as in the Mathews paper) so we multiply by -1; if missing data we set it to 0.0 */ if (q_unpaired[i] < -0.5) { curr_epsilon[i] = 0.0; } else { curr_epsilon[i] = (m *(log(q_unpaired[i]+1))+b) *(-1); } gsl_vector_set (minimizer_x, i, curr_epsilon[i]); } /* Perturbations proportional to q-p */ } else { for (i=0; i <= length; i++) { curr_epsilon[i] = (m *(log(q_unpaired[i]+1)-log(p_unpaired_tmp[i]+1))+ b ) * (-1); gsl_vector_set (minimizer_x, i, curr_epsilon[i]); } } // Repeat and adjust scaling factor until we get result without over-/underflows do { // First we use default scaling factor if (pf_underflow == 0 && pf_overflow == 0) { sfact = 1.070; } if (pf_underflow) { sfact *= 0.8; fprintf(stderr,"Underflow, adjusting sfact to %.4f\n", sfact ); } if (pf_overflow) { sfact *= 1.2; fprintf(stderr,"Overflow, adjusting sfact to %.4f\n", sfact ); } pf_scale = exp(-(sfact*last_non_nan_lnQ)/kT/length); //fprintf(stderr,"Scaling factor is now: %.4e\n", pf_scale); curr_D = calculate_f(minimizer_x, (void*)&minimizer_pars); if (!isnan(last_lnQ)) last_non_nan_lnQ = last_lnQ; // Give up when even extreme scaling does not give results // (for some reason I could not get rid of overflows even with high scaling factors) if (sfact < 0.1 || sfact > 2.0) break; } while (pf_underflow == 1 || pf_overflow == 1); // We have not given up so everything is ok now if (!(sfact < 0.1 || sfact > 2.0)) { if (curr_D < min_D) { min_D = curr_D; for (i=0; i <= length; i++) { best_epsilon[i] = curr_epsilon[i]; } best_m = m; best_b = b; best_scale = pf_scale; } /*If we are interested in the grid search we misuse the print_stats function and report m and b together with MEA*/ if (grid_search) { for (i=0; i <= length; i++) { epsilon[i] = curr_epsilon[i]; } print_stats(statsfile, string, cstruc, length, 0, 0, m, 0.0, b, 0); } fprintf(stderr, "curr D: %.2f, minimum D: %.2f\n", curr_D, min_D); // Adjust pf_scale with default scaling factor but lnQ from // previous step sfact = 1.070; pf_scale = exp(-(sfact*last_lnQ)/kT/length); } else { sfact = 1.070; fprintf(stderr, "Skipping m = %.2f, b = %.2f; did not get stable result.\n", m, b); } } // for b } // for m fprintf(stderr, "Minimum found: m=%.2f, b=%.2f: %.2f\n", best_m, best_b, min_D); for (i=0; i <= length; i++) { epsilon[i] = best_epsilon[i]; gsl_vector_set (minimizer_x, i, best_epsilon[i]); } pf_scale = best_scale; } if (initial_guess_method == 3) { srand((unsigned)time(0)); for (i=0; i <= length; i++) { double r = (double)rand()/(double)RAND_MAX * 4 - 2; epsilon[i] = r; gsl_vector_set (minimizer_x, i, epsilon[i]); } } /* If we just wanted a grid search we are done now. */ if (grid_search) { exit(0); } prev_D = calculate_f(minimizer_x, (void*)&minimizer_pars); print_stats(statsfile, string, cstruc, length, 0 , count_df_evaluations , prev_D, -1.0, 0.0,1); /* GSL minimization */ if (method_id !=0) { if (method_id > 2) { char name[100]; // Available algorithms // 3 gsl_multimin_fdfminimizer_conjugate_fr // 4 gsl_multimin_fdfminimizer_conjugate_pr // 5 gsl_multimin_fdfminimizer_vector_bfgs // 6 gsl_multimin_fdfminimizer_vector_bfgs2 // 7 gsl_multimin_fdfminimizer_steepest_descent // http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html switch (method_id) { case 2: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_fr, length+1); strcpy(name, "Fletcher-Reeves conjugate gradient"); break; case 3: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_pr, length+1); strcpy(name, "Polak-Ribiere conjugate gradient"); break; case 4: minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs, length+1); strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno"); break; case 5: minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs2, length+1); strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno (improved version)"); break; case 6: minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_steepest_descent, length+1); strcpy(name, "Gradient descent (GSL implmementation)"); break; } fprintf(stderr, "Starting minimization via GSL implementation of %s...\n\n", name); // The last two parmeters are step size and tolerance (with // different meaning for different algorithms gsl_multimin_fdfminimizer_set (minimizer, &minimizer_func, minimizer_x, initial_step_size, tolerance); iteration = 1; do { status = gsl_multimin_fdfminimizer_iterate (minimizer); D = minimizer->f; norm = gsl_blas_dnrm2(minimizer->gradient); print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, D, prev_D, norm, iteration%sparsePS == 0); prev_D = D; if (status) { fprintf(stderr, "An unexpected error has occured in the iteration (status:%i)\n", status); break; } status = gsl_multimin_test_gradient (minimizer->gradient, precision); if (status == GSL_SUCCESS) fprintf(stderr, "Minimum found stopping.\n"); iteration++; } while (status == GSL_CONTINUE && iteration < max_iteration); gsl_multimin_fdfminimizer_free (minimizer); gsl_vector_free (minimizer_x); /* Custom implementation of steepest descent */ } else { if (method_id == 1) { fprintf(stderr, "Starting custom implemented steepest descent search...\n\n"); } else { fprintf(stderr, "Starting custom implemented steepest descent search with Barzilai Borwein step size...\n\n"); } iteration = 0; D = 0.0; while (iteration++ < max_iteration) { for (i=1; i <= length; i++) { gsl_vector_set (minimizer_x, i, epsilon[i]); } D = calculate_f(minimizer_x, (void*)&minimizer_pars); if (numerical) { calculate_df_numerically(minimizer_x, (void*)&minimizer_pars, minimizer_g); } else { calculate_df(minimizer_x, (void*)&minimizer_pars, minimizer_g); } for (i=1; i <= length; i++) { gradient[i] = gsl_vector_get (minimizer_g, i); } // Do line search fprintf(stderr, "\nLine search:\n"); // After the first iteration, use Barzilai-Borwain (1988) step size (currently turned off) if (iteration>1 && method_id==2) { double denominator=0.0; double numerator=0.0; for (i=1; i <= length; i++) { numerator += (epsilon[i]-prev_epsilon[i]) * (gradient[i]-prev_gradient[i]); denominator+=(gradient[i]-prev_gradient[i]) * (gradient[i]-prev_gradient[i]); } step_size = numerator / denominator; norm =1.0; } else { // Use step sized given by the user (normalize it first) step_size = initial_step_size / calculate_norm(gradient, length); } for (i=1; i <= length; i++) { prev_epsilon[i] = epsilon[i]; prev_gradient[i] = gradient[i]; } do { for (mu=1; mu <= length; mu++) { epsilon[mu] = prev_epsilon[mu] - step_size * gradient[mu]; } for (i=1; i <= length; i++) { gsl_vector_set (minimizer_x, i, epsilon[i]); } DD = calculate_f(minimizer_x, (void*)&minimizer_pars); if (step_size > 0.0001) { fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4f\n", D, DD, step_size); } else { fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4e\n", D, DD, step_size); } step_size /= 2; } while (step_size > 1e-12 && DD > D); norm = calculate_norm(gradient,length); if (DD > D) { fprintf(stderr, "Line search did not improve D in iteration %i. Stop.\n", iteration); if (hybrid_conditionals) { sample_conditionals=0; } else { break; } } print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, iteration%sparsePS == 0); if (norm<precision && iteration>1) { fprintf(stderr, "Minimum found stopping.\n"); break; } prev_D = DD; } } /* Force last dotplot to be printed */ print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, 1); } free(pf_struc); if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); RNAfold_cmdline_parser_free (&args_info); return 0; }
int main(int argc, char *argv[]) { char *string/*, *line*/; char *structure=NULL, *cstruc=NULL; /*char fname[13], ffname[20], gfname[20];*/ /*char *ParamFile=NULL;*/ char *ns_bases=NULL, *c; int i, length, l, sym/*, r*/; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; AjPSeq seq = NULL; AjPFile confile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfilea = NULL; AjPFile dotfileb = NULL; AjPStr seqstring = NULL; AjPStr constring = NULL; AjPStr seqname = NULL; float eT = 0.; AjBool eGU; AjBool ecirc = ajFalse; AjBool eclose; AjBool lonely; AjBool convert; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; embInitPV("vrnafold",argc,argv,"VIENNA",VERSION); seqstring = ajStrNew(); constring = ajStrNew(); seqname = ajStrNew(); seq = ajAcdGetSeq("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); ecirc = ajAcdGetBoolean("circular"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); convert = ajAcdGetBoolean("convert"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); /* dotfilea = ajAcdGetOutfile("adotoutfile"); dotfileb = ajAcdGetOutfile("bdotoutfile"); */ do_backtrack = 2; pf = 0; string = NULL; istty = 0; temperature = (double) eT; circ = !!ecirc; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; noconv = (convert) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(circ && noLonelyPairs) { ajWarn("Depending on the origin of the circular sequence\n" "some structures may be missed when using -noLP\nTry " "rotating your sequence a few times\n"); } if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(confile) vienna_GetConstraints(confile,&constring); string = NULL; structure = NULL; length = ajSeqGetLen(seq); string = (char *) space(length+1); strcpy(string,ajSeqGetSeqC(seq)); len = ajStrGetLen(constring); structure = (char *) space(length+1); if(len) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); ajFmtPrintF(outf,"%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else ajFmtPrintF(outf," (%6.2f)\n", min_en); if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, essfile); else ajWarn("Structure too long, not doing xy_plot\n"); } if (length>=2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { ajFmtPrintF(outf,"%s", pf_struc); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) : energy_of_struct(string, cent); ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); PS_dot_plot_list(string, dotfileb, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length)); ajFmtPrintF(outf,"\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); free(string); free(structure); ajStrDel(&seqstring); ajStrDel(&constring); ajStrDel(&seqname); ajStrDel(&ensbases); ajStrDel(&eenergy); ajStrDel(&edangles); ajSeqDel(&seq); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); /* ajFileClose(&dotfilea); ajFileClose(&dotfileb); */ if (length<2000) free_arrays(); embExit(); return 0; }
void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) { plist *pl, *pl1,*pl2; char fname[100]; char title[100]; char* ss; double MEAgamma, mea, mea_en; char* output; int i,j; static char timestamp[40]; const struct tm *tm; time_t now; ss = (char *) space((unsigned) length+1); memset(ss,'.',length); init_pf_fold(length); pf_fold_pb(seq, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); fprintf (stderr, "\nITERATION: %i\n", iteration); fprintf(stderr, "DISCREPANCY: %.4f\n", D); fprintf(stderr, "NORM: %.2f\n", norm); if (prev_D > -1.0) { fprintf(stderr, "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100); } fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations); for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) { pl = make_plist(length, 1e-4/(1+MEAgamma)); mea = MEA(pl, ss, MEAgamma); mea_en = energy_of_struct(seq, ss); fprintf(statsfile,"%s,%.2e;", ss, MEAgamma); free(pl); } fprintf(statsfile, "\t"); // Stochastic backtracking fprintf(stderr, "Sampling structures...\n"); if (sample_structure) { char* best_structure; char* curr_structure; double x; double curr_energy = 0.0; double min_energy = +1.0; int curr_distance = 0; int min_distance = 999999; best_structure = (char *) space((unsigned) length+1); for (i=1; i<=10000; i++) { curr_structure = pbacktrack_pb(seq); curr_energy = energy_of_struct(seq, curr_structure); curr_distance = 0.0; //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy); for (j = 1; j <= length; j++) { if (q_unpaired[j] > -0.5) { x = (curr_structure[j-1] == '.') ? 1.0 : 0.0; curr_distance += abs(x-q_unpaired[j]); } } if (curr_distance < min_distance) { min_distance = curr_distance; min_energy = curr_energy; strcpy(best_structure, curr_structure); } if (curr_distance == min_distance) { if (curr_energy < min_energy) { min_energy = curr_energy; strcpy(best_structure, curr_structure); } } //fprintf(stderr, "%i\n", curr_distance); free(curr_structure); } //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance); fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance); } else { fprintf(statsfile, "NA\tNA\tNA\t"); } for (i = 1; i <= length; i++) { fprintf(statsfile, "%.4f", epsilon[i]); if (!(i==length)) { fprintf(statsfile, ","); } } now = time ( NULL ); tm = localtime ( &now ); strftime ( timestamp, 40, "%Y-%m-%d %X", tm ); fprintf(statsfile, "\t%s\n", timestamp); /* Print dotplot only if not noPS is given and function call asks for it */ if (!noPS && printPS) { /* Print dotplot */ sprintf(fname,"%s/iteration%i.ps", psDir, iteration); pl1 = make_plist(length, 1e-5); if (struc != NULL) { pl2 = b2plist(struc); } else { pl2 = NULL; } sprintf(title,"Iteration %i, D = %.4f", iteration, D); (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title); } free_pf_arrays(); }
int main(int argc, char *argv[]) { char *string, *line; char *structure=NULL, *cstruc=NULL; char fname[13], ffname[20], gfname[20]; char *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length, l, sym, r; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; do_backtrack = 1; string=NULL; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'p': pf=1; if (argv[i][2]!='\0') (void) sscanf(argv[i]+2, "%d", &do_backtrack); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-noPS")==0) noPS=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'c': if ( strcmp(argv[i], "-circ")==0) circ=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; default: usage(); } } if (circ && noLonelyPairs) fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("%s%s\n", scale1, scale2); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%12s", fname); printf("%s\n", line); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line ==NULL) || (strcmp(line, "@") == 0)) break; string = (char *) space(strlen(line)+1); (void) sscanf(line,"%s",string); free(line); length = (int) strlen(string); structure = (char *) space((unsigned) length+1); if (fold_constrained) { cstruc = get_line(stdin); if (cstruc!=NULL) strncpy(structure, cstruc, length); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } if (istty) printf("length = %d\n", length); /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); printf("%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ss.ps"); strcpy(gfname, fname); strcat(gfname, "_ss.g"); } else { strcpy(ffname, "rna.ps"); strcpy(gfname, "rna.g"); } if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, ffname); else fprintf(stderr,"INFO: structure too long, not doing xy_plot\n"); } if (length>2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { printf("%s", pf_struc); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent); printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, ffname, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp2.ps"); } else strcpy(ffname, "dot2.ps"); PS_dot_plot_list(string, ffname, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } printf(" frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) printf("ensemble diversity %-6.2f", mean_bp_dist(length)); printf("\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); } while (1); return 0; }