int main(int argc, char *argv[]) { struct RNAcofold_args_info args_info; unsigned int input_type; char *string, *input_string; char *structure, *cstruc, *rec_sequence, *orig_sequence, *rec_id, **rec_rest; char fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH]; char *ParamFile; char *ns_bases, *c; char *Concfile; int i, length, l, sym, r, cl; double min_en; double kT, sfact, betaScale; int pf, istty; int noconv, noPS; int doT; /*compute dimere free energies etc.*/ int doC; /*toggle to compute concentrations*/ int doQ; /*toggle to compute prob of base being paired*/ int cofi; /*toggle concentrations stdin / file*/ plist *prAB; plist *prAA; /*pair probabilities of AA dimer*/ plist *prBB; plist *prA; plist *prB; plist *mfAB; plist *mfAA; /*pair mfobabilities of AA dimer*/ plist *mfBB; plist *mfA; plist *mfB; double *ConcAandB; unsigned int rec_type, read_opt; pf_paramT *pf_parameters; model_detailsT md; /* ############################################# # init variables and parameter options ############################################# */ dangles = 2; sfact = 1.07; bppmThreshold = 1e-5; noconv = 0; noPS = 0; do_backtrack = 1; pf = 0; doT = 0; doC = 0; doQ = 0; cofi = 0; betaScale = 1.; gquad = 0; ParamFile = NULL; pf_parameters = NULL; string = NULL; Concfile = NULL; structure = NULL; cstruc = NULL; ns_bases = NULL; rec_type = read_opt = 0; rec_id = rec_sequence = orig_sequence = NULL; rec_rest = NULL; set_model_details(&md); /* ############################################# # check the command line prameters ############################################# */ if(RNAcofold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) md.special_hp = tetra_loop=0; /* set dangle model */ if(args_info.dangles_given){ if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3)) warn_user("required dangle model not implemented, falling back to default dangles=2"); else md.dangles = dangles = args_info.dangles_arg; } /* do not allow weak pairs */ if(args_info.noLP_given) md.noLP = noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) md.noGU = noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1; /* gquadruplex support */ if(args_info.gquad_given) md.gquad = gquad = 1; /* enforce canonical base pairs in any case? */ if(args_info.canonicalBPonly_given) md.canonicalBPonly = canonicalBPonly = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ if(args_info.noconv_given) noconv = 1; /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* */ if(args_info.noPS_given) noPS = 1; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; if(args_info.all_pf_given) doT = pf = 1; /* concentrations from stdin */ if(args_info.concentrations_given) doC = doT = pf = 1; /* set the bppm threshold for the dotplot */ if(args_info.bppmThreshold_given) bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg)); /* concentrations in file */ if(args_info.betaScale_given) betaScale = args_info.betaScale_arg; if(args_info.concfile_given){ Concfile = strdup(args_info.concfile_arg); doC = cofi = doT = pf = 1; } /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* free allocated memory of command line data structure */ RNAcofold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if(pf && gquad){ nrerror("G-Quadruplex support is currently not available for partition function computations"); } if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* print user help if we get input from tty */ if(istty){ printf("Use '&' to connect 2 sequences that shall form a complex.\n"); if(fold_constrained){ print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK); print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n"); } else print_tty_input_seq(); } /* set options we wanna pass to read_record */ if(istty) read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES; if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST; /* ############################################# # main loop: continue until end of file ############################################# */ while( !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt)) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ /* ######################################################## # init everything according to the data we've read ######################################################## */ if(rec_id){ if(!istty) printf("%s\n", rec_id); (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname); } else fname[0] = '\0'; cut_point = -1; rec_sequence = tokenize(rec_sequence); /* frees input_string and sets cut_point */ length = (int) strlen(rec_sequence); structure = (char *) space((unsigned) length+1); /* parse the rest of the current dataset to obtain a structure constraint */ if(fold_constrained){ cstruc = NULL; int cp = cut_point; unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0; coptions |= VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK; getConstraint(&cstruc, (const char **)rec_rest, coptions); cstruc = tokenize(cstruc); if(cut_point != cp) nrerror("cut point in sequence and structure constraint differs"); cl = (cstruc) ? (int)strlen(cstruc) : 0; if(cl == 0) warn_user("structure constraint is missing"); else if(cl < length) warn_user("structure constraint is shorter than sequence"); else if(cl > length) nrerror("structure constraint is too long"); if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1)); } /* convert DNA alphabet to RNA if not explicitely switched off */ if(!noconv) str_DNA2RNA(rec_sequence); /* store case-unmodified sequence */ orig_sequence = strdup(rec_sequence); /* convert sequence to uppercase letters only */ str_uppercase(rec_sequence); if(istty){ if (cut_point == -1) printf("length = %d\n", length); else printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1); } /* ######################################################## # begin actual computations ######################################################## */ if (doC) { FILE *fp; if (cofi) { /* read from file */ fp = fopen(Concfile, "r"); if (fp==NULL) { fprintf(stderr, "could not open concentration file %s", Concfile); nrerror("\n"); } ConcAandB = read_concentrations(fp); fclose(fp); } else { printf("Please enter concentrations [mol/l]\n format: ConcA ConcB\n return to end\n"); ConcAandB = read_concentrations(stdin); } } /*compute mfe of AB dimer*/ min_en = cofold(rec_sequence, structure); assign_plist_from_db(&mfAB, structure, 0.95); { char *pstring, *pstruct; if (cut_point == -1) { pstring = strdup(orig_sequence); pstruct = strdup(structure); } else { pstring = costring(orig_sequence); pstruct = costring(structure); } printf("%s\n%s", pstring, pstruct); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if (!noPS) { char annot[512] = ""; if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ss.ps"); } else { strcpy(ffname, "rna.ps"); } if (cut_point >= 0) sprintf(annot, "1 %d 9 0 0.9 0.2 omark\n%d %d 9 1 0.1 0.2 omark\n", cut_point-1, cut_point+1, length+1); if(gquad){ if (!noPS) (void) PS_rna_plot_a_gquad(pstring, pstruct, ffname, annot, NULL); } else { if (!noPS) (void) PS_rna_plot_a(pstring, pstruct, ffname, annot, NULL); } } free(pstring); free(pstruct); } if (length>2000) free_co_arrays(); /*compute partition function*/ if (pf) { cofoldF AB, AA, BB; FLT_OR_DBL *probs; if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = energy_of_structure(rec_sequence, structure, 0); dangles=1; } kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); AB = co_pf_fold_par(rec_sequence, structure, pf_parameters, do_backtrack, fold_constrained); if (do_backtrack) { char *costruc; costruc = (char *) space(sizeof(char)*(strlen(structure)+2)); if (cut_point<0) printf("%s", structure); else { strncpy(costruc, structure, cut_point-1); strcat(costruc, "&"); strcat(costruc, structure+cut_point-1); printf("%s", costruc); } if (!istty) printf(" [%6.2f]\n", AB.FAB); else printf("\n");/*8.6.04*/ } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", AB.FAB); printf(" frequency of mfe structure in ensemble %g", exp((AB.FAB-min_en)/kT)); printf(" , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB); probs = export_co_bppm(); assign_plist_from_pr(&prAB, probs, length, bppmThreshold); /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/ /* free_co_arrays(); */ if (doT) { /* cofold of all dimers, monomers */ int Blength, Alength; char *Astring, *Bstring, *orig_Astring, *orig_Bstring; char *Newstring; char Newname[30]; char comment[80]; if (cut_point<0) { printf("Sorry, i cannot do that with only one molecule, please give me two or leave it\n"); free(mfAB); free(prAB); continue; } if (dangles==1) dangles=2; Alength=cut_point-1; /*length of first molecule*/ Blength=length-cut_point+1; /*length of 2nd molecule*/ Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/ Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/ strncat(Astring,rec_sequence,Alength); strncat(Bstring,rec_sequence+Alength,Blength); orig_Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/ orig_Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/ strncat(orig_Astring,orig_sequence,Alength); strncat(orig_Bstring,orig_sequence+Alength,Blength); /* compute AA dimer */ AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA, pf_parameters); /* compute BB dimer */ BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB, pf_parameters); /*free_co_pf_arrays();*/ /* compute A monomer */ do_partfunc(Astring, Alength, 1, &prA, &mfA, pf_parameters); /* compute B monomer */ do_partfunc(Bstring, Blength, 1, &prB, &mfB, pf_parameters); compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength); compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength); compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength); printf("Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f\t%6f\t%6f\t%6f\t%6f\n", AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB); if (doC) { do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB); free(ConcAandB);/*freeen*/ } if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp5.ps"); } else strcpy(ffname, "dot5.ps"); /*output of the 5 dot plots*/ /*AB dot_plot*/ /*write Free Energy into comment*/ sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB); /*reset cut_point*/ cut_point=Alength+1; /*write New name*/ strcpy(Newname,"AB"); strcat(Newname,ffname); (void)PS_dot_plot_list(orig_sequence, Newname, prAB, mfAB, comment); /*AA dot_plot*/ sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB); /*write New name*/ strcpy(Newname,"AA"); strcat(Newname,ffname); /*write AA sequence*/ Newstring=(char*)space((2*Alength+1)*sizeof(char)); strcpy(Newstring,orig_Astring); strcat(Newstring,orig_Astring); (void)PS_dot_plot_list(Newstring, Newname, prAA, mfAA, comment); free(Newstring); /*BB dot_plot*/ sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB); /*write New name*/ strcpy(Newname,"BB"); strcat(Newname,ffname); /*write BB sequence*/ Newstring=(char*)space((2*Blength+1)*sizeof(char)); strcpy(Newstring,orig_Bstring); strcat(Newstring,orig_Bstring); /*reset cut_point*/ cut_point=Blength+1; (void)PS_dot_plot_list(Newstring, Newname, prBB, mfBB, comment); free(Newstring); /*A dot plot*/ /*reset cut_point*/ cut_point=-1; sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA); /*write New name*/ strcpy(Newname,"A"); strcat(Newname,ffname); /*write BB sequence*/ (void)PS_dot_plot_list(orig_Astring, Newname, prA, mfA, comment); /*B monomer dot plot*/ sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB); /*write New name*/ strcpy(Newname,"B"); strcat(Newname,ffname); /*write BB sequence*/ (void)PS_dot_plot_list(orig_Bstring, Newname, prB, mfB, comment); free(Astring); free(Bstring); free(orig_Astring); free(orig_Bstring); free(prAB); free(prAA); free(prBB); free(prA); free(prB); free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB); } /*end if(doT)*/ free(pf_parameters); }/*end if(pf)*/ if (do_backtrack) { if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); if (!doT) { if (pf) { (void) PS_dot_plot_list(rec_sequence, ffname, prAB, mfAB, "doof"); free(prAB);} free(mfAB); } } if (!doT) free_co_pf_arrays(); (void) fflush(stdout); /* clean up */ if(cstruc) free(cstruc); if(rec_id) free(rec_id); free(rec_sequence); free(orig_sequence); free(structure); /* free the rest of current dataset */ if(rec_rest){ for(i=0;rec_rest[i];i++) free(rec_rest[i]); free(rec_rest); } rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL; rec_rest = NULL; /* print user help for the next round if we get input from tty */ if(istty){ printf("Use '&' to connect 2 sequences that shall form a complex.\n"); if(fold_constrained){ print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK); print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n"); } else print_tty_input_seq(); } } return EXIT_SUCCESS; }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]){ struct RNAalifold_args_info args_info; unsigned int input_type; char ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH]; char *input_string, *string, *structure, *cstruc, *ParamFile, *ns_bases, *c; int n_seq, i, length, sym, r, noPS, with_sci; int endgaps, mis, circular, doAlnPS, doColor, doMEA, n_back, eval_energy, pf, istty; double min_en, real_en, sfact, MEAgamma, bppmThreshold, betaScale; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ FILE *clust_file = stdin; pf_paramT *pf_parameters; model_detailsT md; fname[0] = ffname[0] = gfname[0] = '\0'; string = structure = cstruc = ParamFile = ns_bases = NULL; pf_parameters = NULL; endgaps = mis = pf = circular = doAlnPS = doColor = n_back = eval_energy = oldAliEn = doMEA = ribo = noPS = 0; do_backtrack = 1; dangles = 2; gquad = 0; sfact = 1.07; bppmThreshold = 1e-6; MEAgamma = 1.0; betaScale = 1.; with_sci = 0; set_model_details(&md); /* ############################################# # check the command line prameters ############################################# */ if(RNAalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) md.special_hp = tetra_loop=0; /* set dangle model */ if(args_info.dangles_given){ if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2)) warn_user("required dangle model not implemented, falling back to default dangles=2"); else md.dangles = dangles=args_info.dangles_arg; } /* do not allow weak pairs */ if(args_info.noLP_given) md.noLP = noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) md.noGU = noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1; /* gquadruplex support */ if(args_info.gquad_given) md.gquad = gquad = 1; /* sci computation */ if(args_info.sci_given) with_sci = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* assume RNA sequence to be circular */ if(args_info.circ_given) circular=1; /* do not produce postscript output */ if(args_info.noPS_given) noPS = 1; /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* MEA (maximum expected accuracy) settings */ if(args_info.MEA_given){ pf = doMEA = 1; if(args_info.MEA_arg != -1) MEAgamma = args_info.MEA_arg; } if(args_info.betaScale_given) betaScale = args_info.betaScale_arg; /* set the bppm threshold for the dotplot */ if(args_info.bppmThreshold_given) bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg)); /* set cfactor */ if(args_info.cfactor_given) cv_fact = args_info.cfactor_arg; /* set nfactor */ if(args_info.nfactor_given) nc_fact = args_info.nfactor_arg; if(args_info.endgaps_given) endgaps = 1; if(args_info.mis_given) mis = 1; if(args_info.color_given) doColor=1; if(args_info.aln_given) doAlnPS=1; if(args_info.old_given) oldAliEn = 1; if(args_info.stochBT_given){ n_back = args_info.stochBT_arg; do_backtrack = 0; pf = 1; init_rand(); } if(args_info.stochBT_en_given){ n_back = args_info.stochBT_en_arg; do_backtrack = 0; pf = 1; eval_energy = 1; init_rand(); } if(args_info.ribosum_file_given){ RibosumFile = strdup(args_info.ribosum_file_arg); ribo = 1; } if(args_info.ribosum_scoring_given){ RibosumFile = NULL; ribo = 1; } if(args_info.layout_type_given) rna_plot_type = args_info.layout_type_arg; /* alignment file name given as unnamed option? */ if(args_info.inputs_num == 1){ clust_file = fopen(args_info.inputs[0], "r"); if (clust_file == NULL) { fprintf(stderr, "can't open %s\n", args_info.inputs[0]); } } /* free allocated memory of command line data structure */ RNAalifold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if(circular && gquad){ nrerror("G-Quadruplex support is currently not available for circular RNA structures"); } make_pair_matrix(); if (circular && noLonelyPairs) warn_user("depending on the origin of the circular sequence, " "some structures may be missed when using --noLP\n" "Try rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* ######################################################## # handle user input from 'stdin' if necessary ######################################################## */ if(fold_constrained){ if(istty){ print_tty_constraint_full(); print_tty_input_seq_str(""); } input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ return 0;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ cstruc = strdup(input_string); free(input_string); } else warn_user("constraints missing"); } if (istty && (clust_file == stdin)) print_tty_input_seq_str("Input aligned sequences in clustalw or stockholm format\n(enter a line starting with \"//\" to indicate the end of your input)"); n_seq = read_clustal(clust_file, AS, names); if (n_seq==0) nrerror("no sequences found"); if (clust_file != stdin) fclose(clust_file); /* ######################################################## # done with 'stdin' handling, now init everything properly ######################################################## */ length = (int) strlen(AS[0]); structure = (char *)space((unsigned) length+1); if(fold_constrained && cstruc != NULL) strncpy(structure, cstruc, length); if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); /* ######################################################## # begin actual calculations ######################################################## */ if (circular) { int i; double s = 0; min_en = circalifold((const char **)AS, structure); for (i=0; AS[i]!=NULL; i++) s += energy_of_circ_structure(AS[i], structure, -1); real_en = s/i; } else { float *ens = (float *)space(2*sizeof(float)); min_en = alifold((const char **)AS, structure); if(md.gquad) energy_of_ali_gquad_structure((const char **)AS, structure, n_seq, ens); else energy_of_alistruct((const char **)AS, structure, n_seq, ens); real_en = ens[0]; free(ens); } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); printf("%s\n%s", string, structure); if(istty){ if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf( "\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)" "\n SCI = %2.4f\n", min_en, real_en, min_en-real_en, sci); } else printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n", min_en, real_en, min_en - real_en); } else { if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf(" (%6.2f = %6.2f + %6.2f) [%2.4f]\n", min_en, real_en, min_en-real_en, sci); } else printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); } strcpy(ffname, "alirna.ps"); strcpy(gfname, "alirna.g"); if (!noPS) { char **A; A = annote(structure, (const char**) AS); if(md.gquad){ if (doColor) (void) PS_rna_plot_a_gquad(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a_gquad(string, structure, ffname, NULL, A[1]); } else { if (doColor) (void) PS_rna_plot_a(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, ffname, NULL, A[1]); } free(A[0]); free(A[1]); free(A); } if (doAlnPS) PS_color_aln(structure, "aln.ps", (const char const **) AS, (const char const **) names); /* free mfe arrays */ free_alifold_arrays(); if (pf) { float energy, kT; char * mfe_struc; mfe_struc = strdup(structure); kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); fflush(stdout); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); pf_parameters = get_boltzmann_factors_ali(n_seq, temperature, betaScale, md, pf_scale); energy = alipf_fold_par((const char **)AS, structure, NULL, pf_parameters, do_backtrack, fold_constrained, circular); if (n_back>0) { /*stochastic sampling*/ for (i=0; i<n_back; i++) { char *s; double prob=1.; s = alipbacktrack(&prob); printf("%s ", s); if (eval_energy ) printf("%6g %.2f ",prob, -1*(kT*log(prob)-energy)); printf("\n"); free(s); } } if (do_backtrack) { printf("%s", structure); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); printf(" frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; char *cent; double dist; FLT_OR_DBL *probs = export_ali_bppm(); plist *pl, *mfel; assign_plist_from_pr(&pl, probs, length, bppmThreshold); assign_plist_from_db(&mfel, mfe_struc, 0.95*0.95); if (!circular){ float *ens; cent = get_centroid_struct_pr(length, &dist, probs); ens=(float *)space(2*sizeof(float)); energy_of_alistruct((const char **)AS, cent, n_seq, ens); /*cent_en = energy_of_struct(string, cent);*/ /*ali*/ printf("%s %6.2f {%6.2f + %6.2f}\n",cent,ens[0]-ens[1],ens[0],(-1)*ens[1]); free(cent); free(ens); } if(doMEA){ float mea, *ens; plist *pl2; assign_plist_from_pr(&pl2, probs, length, 1e-4/(1+MEAgamma)); mea = MEA(pl2, structure, MEAgamma); ens = (float *)space(2*sizeof(float)); if(circular) energy_of_alistruct((const char **)AS, structure, n_seq, ens); else ens[0] = energy_of_structure(string, structure, 0); printf("%s {%6.2f MEA=%.2f}\n", structure, ens[0], mea); free(ens); free(pl2); } if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ali.out"); } else strcpy(ffname, "alifold.out"); aliout = fopen(ffname, "w"); if (!aliout) { fprintf(stderr, "can't open %s skipping output\n", ffname); } else { print_aliout(AS, pl, bppmThreshold, n_seq, mfe_struc, aliout); } fclose(aliout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "alidot.ps"); cp = make_color_pinfo(AS,pl, bppmThreshold, n_seq, mfel); (void) PS_color_dot_plot(string, cp, ffname); free(cp); free(pl); free(mfel); } free(mfe_struc); free_alipf_arrays(); free(pf_parameters); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } return 0; }
/*---------------------------------------------------------------------------*/ int PS_rna_plot(char *string, char *structure, char *ssfile) { return PS_rna_plot_a(string, structure, ssfile, NULL, NULL); }
int main(int argc, char *argv[]) { char *string; char *structure=NULL; char *cstruc=NULL; char *ns_bases=NULL; char *c; int n_seq; int i; int length; int sym; int endgaps = 0; int mis = 0; double min_en; double real_en; double sfact = 1.07; int pf = 0; int istty; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ AjPSeqset seq = NULL; AjPFile confile = NULL; AjPFile alifile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfile = NULL; AjPStr constring = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; AjPSeq tseq = NULL; AjPStr tname = NULL; int circ = 0; int doAlnPS = 0; int doColor = 0; embInitPV("vrnaalifoldpf",argc,argv,"VIENNA",VERSION); constring = ajStrNew(); seq = ajAcdGetSeqset("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); mis = !!ajAcdGetBoolean("most"); endgaps = !!ajAcdGetBoolean("endgaps"); nc_fact = (double) ajAcdGetFloat("nspenalty"); cv_fact = (double) ajAcdGetFloat("covariance"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); alifile = ajAcdGetOutfile("alignoutfile"); circ = !!ajAcdGetBoolean("circular"); doColor = !!ajAcdGetBoolean("colour"); dotfile = ajAcdGetOutfile("dotoutfile"); do_backtrack = 1; pf = 1; string = NULL; istty = 0; dangles = 2; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(alifile) doAlnPS = 1; if(confile) vienna_GetConstraints(confile,&constring); n_seq = ajSeqsetGetSize(seq); if(n_seq > MAX_NUM_NAMES - 1) ajFatal("[e]RNAalifold is restricted to %d sequences\n", MAX_NUM_NAMES - 1); if (n_seq==0) ajFatal("No sequences found"); for(i=0;i<n_seq;++i) { tseq = (AjPSeq) ajSeqsetGetseqSeq(seq,i); ajSeqGapStandard(tseq, '-'); tname = (AjPStr) ajSeqsetGetseqNameS(seq,i); len = ajSeqGetLen(tseq); AS[i] = (char *) space(len+1); names[i] = (char *) space(ajStrGetLen(tname)+1); strcpy(AS[i],ajSeqGetSeqC(tseq)); strcpy(names[i],ajStrGetPtr(tname)); } AS[n_seq] = NULL; names[n_seq] = NULL; if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); length = (int) strlen(AS[0]); structure = (char *) space((unsigned) length+1); if(confile) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } if (circ && noLonelyPairs) ajWarn( "warning, depending on the origin of the circular sequence, " "some structures may be missed when using -noLP\n" "Try rotating your sequence a few times\n"); if (circ) min_en = circalifold((const char **)AS, structure); else min_en = alifold(AS, structure); { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; AS[i]!=NULL; i++) if (circ) s += energy_of_circ_struct(AS[i], structure); else s += energy_of_struct(AS[i], structure); real_en = s/i; } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); ajFmtPrintF(outf,"%s\n%s", string, structure); ajFmtPrintF(outf," (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); if (length<=2500) { char **A; A = annote(structure, (const char**) AS); if (doColor) (void) PS_rna_plot_a(string, structure, essfile, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, essfile, NULL, A[1]); free(A[0]); free(A[1]);free(A); } else ajWarn("INFO: structure too long, not doing xy_plot\n"); if (doAlnPS) PS_color_aln(structure, alifile, AS, names); { /* free mfe arrays but preserve base_pair for PS_dot_plot */ struct bond *bp; bp = base_pair; base_pair = space(16); free_alifold_arrays(); /* free's base_pair */ free_alipf_arrays(); base_pair = bp; } if (pf) { double energy, kT; pair_info *pi; char * mfe_struc; mfe_struc = strdup(structure); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); /* init_alipf_fold(length); */ if (confile) strncpy(structure, ajStrGetPtr(constring), length+1); energy = (circ) ? alipf_circ_fold(AS, structure, &pi) : alipf_fold(AS, structure, &pi); if (do_backtrack) { ajFmtPrintF(outf,"%s", structure); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); ajFmtPrintF(outf," frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; short *ptable; int k; ptable = make_pair_table(mfe_struc); ajFmtPrintF(outf,"\n# Alignment section\n\n"); aliout = ajFileGetFileptr(outf); fprintf(aliout, "%d sequences; length of alignment %d\n", n_seq, length); fprintf(aliout, "alifold output\n"); for (k=0; pi[k].i>0; k++) { pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0; print_pi(pi[k], aliout); } fprintf(aliout, "%s\n", structure); free(ptable); cp = make_color_pinfo(pi); (void) PS_color_dot_plot(string, cp, dotfile); free(cp); free(mfe_struc); free(pi); } } if (cstruc!=NULL) free(cstruc); free(base_pair); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } ajSeqsetDel(&seq); ajStrDel(&constring); ajStrDel(&eenergy); ajStrDel(&edangles); ajStrDel(&ensbases); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); ajFileClose(&alifile); ajFileClose(&dotfile); embExit(); return 0; }