void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) { plist *pl, *pl1,*pl2; char fname[100]; char title[100]; char* ss; double MEAgamma, mea, mea_en; char* output; int i,j; static char timestamp[40]; const struct tm *tm; time_t now; ss = (char *) space((unsigned) length+1); memset(ss,'.',length); init_pf_fold(length); pf_fold_pb(seq, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); fprintf (stderr, "\nITERATION: %i\n", iteration); fprintf(stderr, "DISCREPANCY: %.4f\n", D); fprintf(stderr, "NORM: %.2f\n", norm); if (prev_D > -1.0) { fprintf(stderr, "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100); } fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations); for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) { pl = make_plist(length, 1e-4/(1+MEAgamma)); mea = MEA(pl, ss, MEAgamma); mea_en = energy_of_struct(seq, ss); fprintf(statsfile,"%s,%.2e;", ss, MEAgamma); free(pl); } fprintf(statsfile, "\t"); // Stochastic backtracking fprintf(stderr, "Sampling structures...\n"); if (sample_structure) { char* best_structure; char* curr_structure; double x; double curr_energy = 0.0; double min_energy = +1.0; int curr_distance = 0; int min_distance = 999999; best_structure = (char *) space((unsigned) length+1); for (i=1; i<=10000; i++) { curr_structure = pbacktrack_pb(seq); curr_energy = energy_of_struct(seq, curr_structure); curr_distance = 0.0; //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy); for (j = 1; j <= length; j++) { if (q_unpaired[j] > -0.5) { x = (curr_structure[j-1] == '.') ? 1.0 : 0.0; curr_distance += abs(x-q_unpaired[j]); } } if (curr_distance < min_distance) { min_distance = curr_distance; min_energy = curr_energy; strcpy(best_structure, curr_structure); } if (curr_distance == min_distance) { if (curr_energy < min_energy) { min_energy = curr_energy; strcpy(best_structure, curr_structure); } } //fprintf(stderr, "%i\n", curr_distance); free(curr_structure); } //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance); fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance); } else { fprintf(statsfile, "NA\tNA\tNA\t"); } for (i = 1; i <= length; i++) { fprintf(statsfile, "%.4f", epsilon[i]); if (!(i==length)) { fprintf(statsfile, ","); } } now = time ( NULL ); tm = localtime ( &now ); strftime ( timestamp, 40, "%Y-%m-%d %X", tm ); fprintf(statsfile, "\t%s\n", timestamp); /* Print dotplot only if not noPS is given and function call asks for it */ if (!noPS && printPS) { /* Print dotplot */ sprintf(fname,"%s/iteration%i.ps", psDir, iteration); pl1 = make_plist(length, 1e-5); if (struc != NULL) { pl2 = b2plist(struc); } else { pl2 = NULL; } sprintf(title,"Iteration %i, D = %.4f", iteration, D); (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title); } free_pf_arrays(); }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]){ struct RNAalifold_args_info args_info; unsigned int input_type; char ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH]; char *input_string, *string, *structure, *cstruc, *ParamFile, *ns_bases, *c; int n_seq, i, length, sym, r, noPS, with_sci; int endgaps, mis, circular, doAlnPS, doColor, doMEA, n_back, eval_energy, pf, istty; double min_en, real_en, sfact, MEAgamma, bppmThreshold, betaScale; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ FILE *clust_file = stdin; pf_paramT *pf_parameters; model_detailsT md; fname[0] = ffname[0] = gfname[0] = '\0'; string = structure = cstruc = ParamFile = ns_bases = NULL; pf_parameters = NULL; endgaps = mis = pf = circular = doAlnPS = doColor = n_back = eval_energy = oldAliEn = doMEA = ribo = noPS = 0; do_backtrack = 1; dangles = 2; gquad = 0; sfact = 1.07; bppmThreshold = 1e-6; MEAgamma = 1.0; betaScale = 1.; with_sci = 0; set_model_details(&md); /* ############################################# # check the command line prameters ############################################# */ if(RNAalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) md.special_hp = tetra_loop=0; /* set dangle model */ if(args_info.dangles_given){ if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2)) warn_user("required dangle model not implemented, falling back to default dangles=2"); else md.dangles = dangles=args_info.dangles_arg; } /* do not allow weak pairs */ if(args_info.noLP_given) md.noLP = noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) md.noGU = noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1; /* gquadruplex support */ if(args_info.gquad_given) md.gquad = gquad = 1; /* sci computation */ if(args_info.sci_given) with_sci = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* assume RNA sequence to be circular */ if(args_info.circ_given) circular=1; /* do not produce postscript output */ if(args_info.noPS_given) noPS = 1; /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* MEA (maximum expected accuracy) settings */ if(args_info.MEA_given){ pf = doMEA = 1; if(args_info.MEA_arg != -1) MEAgamma = args_info.MEA_arg; } if(args_info.betaScale_given) betaScale = args_info.betaScale_arg; /* set the bppm threshold for the dotplot */ if(args_info.bppmThreshold_given) bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg)); /* set cfactor */ if(args_info.cfactor_given) cv_fact = args_info.cfactor_arg; /* set nfactor */ if(args_info.nfactor_given) nc_fact = args_info.nfactor_arg; if(args_info.endgaps_given) endgaps = 1; if(args_info.mis_given) mis = 1; if(args_info.color_given) doColor=1; if(args_info.aln_given) doAlnPS=1; if(args_info.old_given) oldAliEn = 1; if(args_info.stochBT_given){ n_back = args_info.stochBT_arg; do_backtrack = 0; pf = 1; init_rand(); } if(args_info.stochBT_en_given){ n_back = args_info.stochBT_en_arg; do_backtrack = 0; pf = 1; eval_energy = 1; init_rand(); } if(args_info.ribosum_file_given){ RibosumFile = strdup(args_info.ribosum_file_arg); ribo = 1; } if(args_info.ribosum_scoring_given){ RibosumFile = NULL; ribo = 1; } if(args_info.layout_type_given) rna_plot_type = args_info.layout_type_arg; /* alignment file name given as unnamed option? */ if(args_info.inputs_num == 1){ clust_file = fopen(args_info.inputs[0], "r"); if (clust_file == NULL) { fprintf(stderr, "can't open %s\n", args_info.inputs[0]); } } /* free allocated memory of command line data structure */ RNAalifold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if(circular && gquad){ nrerror("G-Quadruplex support is currently not available for circular RNA structures"); } make_pair_matrix(); if (circular && noLonelyPairs) warn_user("depending on the origin of the circular sequence, " "some structures may be missed when using --noLP\n" "Try rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* ######################################################## # handle user input from 'stdin' if necessary ######################################################## */ if(fold_constrained){ if(istty){ print_tty_constraint_full(); print_tty_input_seq_str(""); } input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ return 0;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ cstruc = strdup(input_string); free(input_string); } else warn_user("constraints missing"); } if (istty && (clust_file == stdin)) print_tty_input_seq_str("Input aligned sequences in clustalw or stockholm format\n(enter a line starting with \"//\" to indicate the end of your input)"); n_seq = read_clustal(clust_file, AS, names); if (n_seq==0) nrerror("no sequences found"); if (clust_file != stdin) fclose(clust_file); /* ######################################################## # done with 'stdin' handling, now init everything properly ######################################################## */ length = (int) strlen(AS[0]); structure = (char *)space((unsigned) length+1); if(fold_constrained && cstruc != NULL) strncpy(structure, cstruc, length); if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); /* ######################################################## # begin actual calculations ######################################################## */ if (circular) { int i; double s = 0; min_en = circalifold((const char **)AS, structure); for (i=0; AS[i]!=NULL; i++) s += energy_of_circ_structure(AS[i], structure, -1); real_en = s/i; } else { float *ens = (float *)space(2*sizeof(float)); min_en = alifold((const char **)AS, structure); if(md.gquad) energy_of_ali_gquad_structure((const char **)AS, structure, n_seq, ens); else energy_of_alistruct((const char **)AS, structure, n_seq, ens); real_en = ens[0]; free(ens); } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); printf("%s\n%s", string, structure); if(istty){ if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf( "\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)" "\n SCI = %2.4f\n", min_en, real_en, min_en-real_en, sci); } else printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n", min_en, real_en, min_en - real_en); } else { if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf(" (%6.2f = %6.2f + %6.2f) [%2.4f]\n", min_en, real_en, min_en-real_en, sci); } else printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); } strcpy(ffname, "alirna.ps"); strcpy(gfname, "alirna.g"); if (!noPS) { char **A; A = annote(structure, (const char**) AS); if(md.gquad){ if (doColor) (void) PS_rna_plot_a_gquad(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a_gquad(string, structure, ffname, NULL, A[1]); } else { if (doColor) (void) PS_rna_plot_a(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, ffname, NULL, A[1]); } free(A[0]); free(A[1]); free(A); } if (doAlnPS) PS_color_aln(structure, "aln.ps", (const char const **) AS, (const char const **) names); /* free mfe arrays */ free_alifold_arrays(); if (pf) { float energy, kT; char * mfe_struc; mfe_struc = strdup(structure); kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); fflush(stdout); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); pf_parameters = get_boltzmann_factors_ali(n_seq, temperature, betaScale, md, pf_scale); energy = alipf_fold_par((const char **)AS, structure, NULL, pf_parameters, do_backtrack, fold_constrained, circular); if (n_back>0) { /*stochastic sampling*/ for (i=0; i<n_back; i++) { char *s; double prob=1.; s = alipbacktrack(&prob); printf("%s ", s); if (eval_energy ) printf("%6g %.2f ",prob, -1*(kT*log(prob)-energy)); printf("\n"); free(s); } } if (do_backtrack) { printf("%s", structure); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); printf(" frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; char *cent; double dist; FLT_OR_DBL *probs = export_ali_bppm(); plist *pl, *mfel; assign_plist_from_pr(&pl, probs, length, bppmThreshold); assign_plist_from_db(&mfel, mfe_struc, 0.95*0.95); if (!circular){ float *ens; cent = get_centroid_struct_pr(length, &dist, probs); ens=(float *)space(2*sizeof(float)); energy_of_alistruct((const char **)AS, cent, n_seq, ens); /*cent_en = energy_of_struct(string, cent);*/ /*ali*/ printf("%s %6.2f {%6.2f + %6.2f}\n",cent,ens[0]-ens[1],ens[0],(-1)*ens[1]); free(cent); free(ens); } if(doMEA){ float mea, *ens; plist *pl2; assign_plist_from_pr(&pl2, probs, length, 1e-4/(1+MEAgamma)); mea = MEA(pl2, structure, MEAgamma); ens = (float *)space(2*sizeof(float)); if(circular) energy_of_alistruct((const char **)AS, structure, n_seq, ens); else ens[0] = energy_of_structure(string, structure, 0); printf("%s {%6.2f MEA=%.2f}\n", structure, ens[0], mea); free(ens); free(pl2); } if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ali.out"); } else strcpy(ffname, "alifold.out"); aliout = fopen(ffname, "w"); if (!aliout) { fprintf(stderr, "can't open %s skipping output\n", ffname); } else { print_aliout(AS, pl, bppmThreshold, n_seq, mfe_struc, aliout); } fclose(aliout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "alidot.ps"); cp = make_color_pinfo(AS,pl, bppmThreshold, n_seq, mfel); (void) PS_color_dot_plot(string, cp, ffname); free(cp); free(pl); free(mfel); } free(mfe_struc); free_alipf_arrays(); free(pf_parameters); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } return 0; }