PRIVATE double mfe_cost(const char *string, char *structure, const char *target) { #if TDIST Tree *T1; char *xstruc; #endif double energy, distance; if (strlen(string)!=strlen(target)) { fprintf(stderr, "%s\n%s\n", string, target); nrerror("unequal length in mfe_cost"); } energy = fold(string, structure); #if TDIST if (T0 == NULL) { xstruc = expand_Full(target); T0=make_tree(xstruc); free(xstruc); } xstruc = expand_Full(structure); T1=make_tree(xstruc); distance = tree_edit_distance(T0,T1); free(xstruc); free_tree(T1); #else distance = (double) bp_distance(target, structure); #endif cost2 = energy_of_struct(string, target) - energy; return (double) distance; }
void print_path(char *seq, char *struc) { int d; char *s; s = (char*)space((strlen(struc)+1)*sizeof(char)); strcpy(s, struc); printf("%s\n%s %6.2f\n", seq, s, energy_of_struct(seq,s)); qsort(path, BP_dist, sizeof(move_t), compare_moves_when); for (d=0; d<BP_dist; d++) { int i,j; i = path[d].i; j=path[d].j; if (i<0) { /* delete */ s[(-i)-1] = s[(-j)-1] = '.'; } else { s[i-1] = '('; s[j-1] = ')'; } printf("%s %6.2f - %6.2f\n", s, energy_of_struct(seq,s), path[d].E/100.0); } free(s); }
void ini_or_reset_rl(void) { /* if there is no ringList-tree make a new one */ if (wurzl == NULL) { ini_ringlist(); /* start structure */ struc2tree(GAV.startform); GSV.currE = GSV.startE = energy_of_struct(GAV.farbe, GAV.startform); /* stop structure(s) */ if ( GTV.stop ) { int i; qsort(GAV.stopform, GSV.maxS, sizeof(char *), comp_struc); for (i = 0; i< GSV.maxS; i++) GAV.sE[i] = energy_of_struct(GAV.farbe_full, GAV.stopform[i]); } else { if(GTV.noLP) noLonelyPairs=1; initialize_fold(GSV.len); /* fold sequence to get Minimum free energy structure (Mfe) */ GAV.sE[0] = fold(GAV.farbe_full, GAV.stopform[0]); free_arrays(); /* revaluate energy of Mfe (maye differ if --logML=logarthmic */ GAV.sE[0] = energy_of_struct(GAV.farbe_full, GAV.stopform[0]); } GSV.stopE = GAV.sE[0]; ini_nbList(strlen(GAV.farbe_full)*strlen(GAV.farbe_full)); } else { /* reset ringlist-tree to start conditions */ reset_ringlist(); if(GTV.start) struc2tree(GAV.startform); else { GSV.currE = GSV.startE; } } }
PRIVATE double pf_cost(const char *string, char *structure, const char *target) { #if PF double f, e; f = pf_fold(string, structure); e = energy_of_struct(string, target); return (double) (e-f-final_cost); #else nrerror("this version not linked with pf_fold"); return 0; #endif }
int main(int argc, char *argv[]) { char *modelDir=NULL; /* Directory with model files */ struct svm_model* decision_model; /* SVM classification model */ /* Command line options */ int reverse=0; /* Scan reverse complement */ int showVersion=0; /* Shows version and exits */ int showHelp=0; /* Show short help and exits */ int from=-1; /* Scan slice from-to */ int to=-1; FILE *clust_file=stdin; /* Input file */ FILE *out=stdout; /* Output file */ struct aln *AS[MAX_NUM_NAMES]; struct aln *window[MAX_NUM_NAMES]; char *tmpAln[MAX_NUM_NAMES]; int n_seq; /* number of input sequences */ int length; /* length of alignment/window */ int z_score_type; int decision_model_type; char *structure=NULL; char *singleStruc,*gapStruc, *output,*woGapsSeq; char strand[8]; char warningString[2000]; char warningString_regression[2000]; char *string=NULL; double singleMFE,sumMFE,singleZ,sumZ,z,sci,id,decValue,prob,comb,entropy,GC; double min_en, real_en; int i,j,k,l,ll,r,countAln,nonGaps,singleGC; int (*readFunction)(FILE *clust,struct aln *alignedSeqs[]); char** lines=NULL; int directions[3]={FORWARD,0,0}; int currDirection; struct gengetopt_args_info args; double meanMFE_fwd=0; double consensusMFE_fwd=0; double sci_fwd=0; double z_fwd=0; int strandGuess; int avoid_shuffle=0; double strandProb,strandDec; if (cmdline_parser (argc, argv, &args) != 0){ usage(); exit(EXIT_FAILURE); } if (args.help_given){ help(); exit(EXIT_SUCCESS); } if (args.version_given){ version(); exit(EXIT_SUCCESS); } if (args.outfile_given){ out = fopen(args.outfile_arg, "w"); if (out == NULL){ fprintf(stderr, "ERROR: Can't open output file %s\n", args.outfile_arg); exit(1); } } /* Strand prediction implies both strands scored */ if (args.predict_strand_flag){ args.both_strands_flag=1; } if (args.forward_flag && !args.reverse_flag){ directions[0]=FORWARD; directions[1]=directions[2]=0; } if (!args.forward_flag && args.reverse_flag){ directions[0]=REVERSE; directions[1]=directions[2]=0; } if ((args.forward_flag && args.reverse_flag) || args.both_strands_flag){ directions[0]=FORWARD; directions[1]=REVERSE; } if (args.window_given){ if (sscanf(args.window_arg,"%d-%d",&from,&to)!=2){ nrerror("ERROR: Invalid --window/-w command. " "Use it like '--window 100-200'\n"); } printf("from:%d,to:%d\n",from,to); } if (args.inputs_num>=1){ clust_file = fopen(args.inputs[0], "r"); if (clust_file == NULL){ fprintf(stderr, "ERROR: Can't open input file %s\n", args.inputs[0]); exit(1); } } /* Global RNA package variables */ do_backtrack = 1; dangles=2; switch(checkFormat(clust_file)){ case CLUSTAL: readFunction=&read_clustal; break; case MAF: readFunction=&read_maf; break; case 0: nrerror("ERROR: Unknown alignment file format. Use Clustal W or MAF format.\n"); } /* Set z-score type (mono/dinucleotide) here */ z_score_type = 2; if (args.mononucleotide_given) z_score_type = 0; /* now let's decide which decision model to take */ /* decision_model_type = 1 for normal model used in RNAz 1.0 */ /* decision_model_type = 2 for normal model using dinucelotide background */ /* decision_model_type = 3 for structural model using dinucelotide background */ decision_model_type = 2; if (args.mononucleotide_given) decision_model_type = 1; if (args.locarnate_given) decision_model_type = 3; if ((args.mononucleotide_given) && args.locarnate_given){ z_score_type=2; nrerror("ERROR: Structural decision model only trained with dinucleotide background model.\n"); } if (args.no_shuffle_given) avoid_shuffle = 1; decision_model=get_decision_model(NULL, decision_model_type); /* Initialize Regression Models for mononucleotide */ /* Not needed if we score with dinucleotides */ if (z_score_type == 0) regression_svm_init(); countAln=0; while ((n_seq=readFunction(clust_file, AS))!=0){ if (n_seq ==1){ nrerror("ERROR: You need at least two sequences in the alignment.\n"); } countAln++; length = (int) strlen(AS[0]->seq); /* if a slice is specified by the user */ if ((from!=-1 || to!=-1) && (countAln==1)){ if ((from>=to)||(from<=0)||(to>length)){ nrerror("ERROR: Invalid window range given.\n"); } sliceAln((const struct aln**)AS, (struct aln **)window, from, to); length=to-from+1; } else { /* take complete alignment */ /* window=AS does not work..., deep copy seems not necessary here*/ from=1; to=length; sliceAln((const struct aln **)AS, (struct aln **)window, 1, length); } /* Convert all Us to Ts for RNAalifold. There is a slight difference in the results. During training we used alignments with Ts, so we use Ts here as well. */ for (i=0;i<n_seq;i++){ j=0; while (window[i]->seq[j]){ window[i]->seq[j]=toupper(window[i]->seq[j]); if (window[i]->seq[j]=='U') window[i]->seq[j]='T'; ++j; } } k=0; while ((currDirection=directions[k++])!=0){ if (currDirection==REVERSE){ revAln((struct aln **)window); strcpy(strand,"reverse"); } else { strcpy(strand,"forward"); } structure = (char *) space((unsigned) length+1); for (i=0;window[i]!=NULL;i++){ tmpAln[i]=window[i]->seq; } tmpAln[i]=NULL; min_en = alifold(tmpAln, structure); free_alifold_arrays(); comb=combPerPair(window,structure); sumZ=0.0; sumMFE=0.0; GC=0.0; output=(char *)space(sizeof(char)*(length+160)*(n_seq+1)*3); strcpy(warningString,""); strcpy(warningString_regression,""); for (i=0;i<n_seq;i++){ singleStruc = space(strlen(window[i]->seq)+1); woGapsSeq = space(strlen(window[i]->seq)+1); j=0; nonGaps=0; singleGC=0; while (window[i]->seq[j]){ /* Convert all Ts to Us for RNAfold. There is a difference between the results. With U in the function call, we get the results as RNAfold gives on the command line. Since this variant was also used during training, we use it here as well. */ if (window[i]->seq[j]=='T') window[i]->seq[j]='U'; if (window[i]->seq[j]=='C') singleGC++; if (window[i]->seq[j]=='G') singleGC++; if (window[i]->seq[j]!='-'){ nonGaps++; woGapsSeq[strlen(woGapsSeq)]=window[i]->seq[j]; woGapsSeq[strlen(woGapsSeq)]='\0'; } ++j; } /* z-score is calculated here! */ singleMFE = fold(woGapsSeq, singleStruc); free_arrays(); /* z-score type may be overwritten. If it is out of training bounds, we switch to shuffling if allowed (avoid_shuffle). */ int z_score_type_orig = z_score_type; singleZ=mfe_zscore(woGapsSeq,singleMFE, &z_score_type, avoid_shuffle, warningString_regression); GC+=(double) singleGC/nonGaps; sumZ+=singleZ; sumMFE+=singleMFE; if (window[1]->strand!='?' && !args.window_given){ sprintf(output+strlen(output), ">%s %d %d %c %d\n", window[i]->name,window[i]->start, window[i]->length,window[i]->strand, window[i]->fullLength); } else { sprintf(output+strlen(output),">%s\n",window[i]->name); } gapStruc= (char *) space(sizeof(char)*(strlen(window[i]->seq)+1)); l=ll=0; while (window[i]->seq[l]!='\0'){ if (window[i]->seq[l]!='-'){ gapStruc[l]=singleStruc[ll]; l++; ll++; } else { gapStruc[l]='-'; l++; } } char ch; ch = 'R'; if (z_score_type == 1 || z_score_type == 3) ch = 'S'; sprintf(output+strlen(output),"%s\n%s ( %6.2f, z-score = %6.2f, %c)\n", window[i]->seq,gapStruc,singleMFE,singleZ,ch); z_score_type = z_score_type_orig; free(woGapsSeq); free(singleStruc); } { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; window[i]!=NULL; i++) s += energy_of_struct(window[i]->seq, structure); real_en = s/i; } string = consensus((const struct aln**) window); sprintf(output+strlen(output), ">consensus\n%s\n%s (%6.2f = %6.2f + %6.2f) \n", string, structure, min_en, real_en, min_en-real_en ); free(string); id=meanPairID((const struct aln**)window); entropy=NormShannonEntropy((const struct aln**)window); z=sumZ/n_seq; GC=(double)GC/n_seq; if (sumMFE==0){ /*Set SCI to 0 in the weird case of no structure in single sequences*/ sci=0; } else { sci=min_en/(sumMFE/n_seq); } decValue=999; prob=0; classify(&prob,&decValue,decision_model,id,n_seq,z,sci,entropy,decision_model_type); if (args.cutoff_given){ if (prob<args.cutoff_arg){ continue; } } warning(warningString,id,n_seq,z,sci,entropy,(struct aln **)window,decision_model_type); fprintf(out,"\n############################ RNAz "PACKAGE_VERSION" ##############################\n\n"); fprintf(out," Sequences: %u\n", n_seq); if (args.window_given){ fprintf(out," Slice: %u to %u\n",from,to); } fprintf(out," Columns: %u\n",length); fprintf(out," Reading direction: %s\n",strand); fprintf(out," Mean pairwise identity: %6.2f\n", id); fprintf(out," Shannon entropy: %2.5f\n", entropy); fprintf(out," G+C content: %2.5f\n", GC); fprintf(out," Mean single sequence MFE: %6.2f\n", sumMFE/n_seq); fprintf(out," Consensus MFE: %6.2f\n",min_en); fprintf(out," Energy contribution: %6.2f\n",real_en); fprintf(out," Covariance contribution: %6.2f\n",min_en-real_en); fprintf(out," Combinations/Pair: %6.2f\n",comb); fprintf(out," Mean z-score: %6.2f\n",z); fprintf(out," Structure conservation index: %6.2f\n",sci); if (decision_model_type == 1) { fprintf(out," Background model: mononucleotide\n"); fprintf(out," Decision model: sequence based alignment quality\n"); } if (decision_model_type == 2) { fprintf(out," Background model: dinucleotide\n"); fprintf(out," Decision model: sequence based alignment quality\n"); } if (decision_model_type == 3) { fprintf(out," Background model: dinucleotide\n"); fprintf(out," Decision model: structural RNA alignment quality\n"); } fprintf(out," SVM decision value: %6.2f\n",decValue); fprintf(out," SVM RNA-class probability: %6f\n",prob); if (prob>0.5){ fprintf(out," Prediction: RNA\n"); } else { fprintf(out," Prediction: OTHER\n"); } fprintf(out,"%s",warningString_regression); fprintf(out,"%s",warningString); fprintf(out,"\n######################################################################\n\n"); fprintf(out,"%s",output); fflush(out); free(structure); free(output); if (currDirection==FORWARD && args.predict_strand_flag){ meanMFE_fwd=sumMFE/n_seq; consensusMFE_fwd=min_en; sci_fwd=sci; z_fwd=z; } if (currDirection==REVERSE && args.predict_strand_flag){ if (predict_strand(sci_fwd-sci, meanMFE_fwd-(sumMFE/n_seq), consensusMFE_fwd-min_en, z_fwd-z, n_seq, id, &strandGuess, &strandProb, &strandDec, NULL)){ if (strandGuess==1){ fprintf(out, "\n# Strand winner: forward (%.2f)\n",strandProb); } else { fprintf(out, "\n# Strand winner: reverse (%.2f)\n",1-strandProb); } } else { fprintf(out, "\n# WARNING: No strand prediction (values out of range)\n"); } } } freeAln((struct aln **)AS); freeAln((struct aln **)window); } if (args.inputs_num>=1){ fclose(clust_file); } cmdline_parser_free (&args); if (countAln==0){ nrerror("ERROR: Empty alignment file\n"); } svm_destroy_model(decision_model); regression_svm_free(); return 0; }
void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) { plist *pl, *pl1,*pl2; char fname[100]; char title[100]; char* ss; double MEAgamma, mea, mea_en; char* output; int i,j; static char timestamp[40]; const struct tm *tm; time_t now; ss = (char *) space((unsigned) length+1); memset(ss,'.',length); init_pf_fold(length); pf_fold_pb(seq, NULL); for (i = 1; i < length; i++) { for (j = i+1; j<= length; j++) { p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j]; } } get_pair_prob_vector(p_pp, p_unpaired, length, 1); fprintf (stderr, "\nITERATION: %i\n", iteration); fprintf(stderr, "DISCREPANCY: %.4f\n", D); fprintf(stderr, "NORM: %.2f\n", norm); if (prev_D > -1.0) { fprintf(stderr, "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100); } fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations); for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) { pl = make_plist(length, 1e-4/(1+MEAgamma)); mea = MEA(pl, ss, MEAgamma); mea_en = energy_of_struct(seq, ss); fprintf(statsfile,"%s,%.2e;", ss, MEAgamma); free(pl); } fprintf(statsfile, "\t"); // Stochastic backtracking fprintf(stderr, "Sampling structures...\n"); if (sample_structure) { char* best_structure; char* curr_structure; double x; double curr_energy = 0.0; double min_energy = +1.0; int curr_distance = 0; int min_distance = 999999; best_structure = (char *) space((unsigned) length+1); for (i=1; i<=10000; i++) { curr_structure = pbacktrack_pb(seq); curr_energy = energy_of_struct(seq, curr_structure); curr_distance = 0.0; //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy); for (j = 1; j <= length; j++) { if (q_unpaired[j] > -0.5) { x = (curr_structure[j-1] == '.') ? 1.0 : 0.0; curr_distance += abs(x-q_unpaired[j]); } } if (curr_distance < min_distance) { min_distance = curr_distance; min_energy = curr_energy; strcpy(best_structure, curr_structure); } if (curr_distance == min_distance) { if (curr_energy < min_energy) { min_energy = curr_energy; strcpy(best_structure, curr_structure); } } //fprintf(stderr, "%i\n", curr_distance); free(curr_structure); } //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance); fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance); } else { fprintf(statsfile, "NA\tNA\tNA\t"); } for (i = 1; i <= length; i++) { fprintf(statsfile, "%.4f", epsilon[i]); if (!(i==length)) { fprintf(statsfile, ","); } } now = time ( NULL ); tm = localtime ( &now ); strftime ( timestamp, 40, "%Y-%m-%d %X", tm ); fprintf(statsfile, "\t%s\n", timestamp); /* Print dotplot only if not noPS is given and function call asks for it */ if (!noPS && printPS) { /* Print dotplot */ sprintf(fname,"%s/iteration%i.ps", psDir, iteration); pl1 = make_plist(length, 1e-5); if (struc != NULL) { pl2 = b2plist(struc); } else { pl2 = NULL; } sprintf(title,"Iteration %i, D = %.4f", iteration, D); (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title); } free_pf_arrays(); }
int main(int argc, char *argv[]) { char *string/*, *line*/; char *structure=NULL, *cstruc=NULL; /*char fname[13], ffname[20], gfname[20];*/ /*char *ParamFile=NULL;*/ char *ns_bases=NULL, *c; int i, length, l, sym/*, r*/; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; AjPSeq seq = NULL; AjPFile confile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfilea = NULL; AjPFile dotfileb = NULL; AjPStr seqstring = NULL; AjPStr constring = NULL; AjPStr seqname = NULL; float eT = 0.; AjBool eGU; AjBool ecirc = ajFalse; AjBool eclose; AjBool lonely; AjBool convert; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; embInitPV("vrnafold",argc,argv,"VIENNA",VERSION); seqstring = ajStrNew(); constring = ajStrNew(); seqname = ajStrNew(); seq = ajAcdGetSeq("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); ecirc = ajAcdGetBoolean("circular"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); convert = ajAcdGetBoolean("convert"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); /* dotfilea = ajAcdGetOutfile("adotoutfile"); dotfileb = ajAcdGetOutfile("bdotoutfile"); */ do_backtrack = 2; pf = 0; string = NULL; istty = 0; temperature = (double) eT; circ = !!ecirc; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; noconv = (convert) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(circ && noLonelyPairs) { ajWarn("Depending on the origin of the circular sequence\n" "some structures may be missed when using -noLP\nTry " "rotating your sequence a few times\n"); } if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(confile) vienna_GetConstraints(confile,&constring); string = NULL; structure = NULL; length = ajSeqGetLen(seq); string = (char *) space(length+1); strcpy(string,ajSeqGetSeqC(seq)); len = ajStrGetLen(constring); structure = (char *) space(length+1); if(len) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); ajFmtPrintF(outf,"%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else ajFmtPrintF(outf," (%6.2f)\n", min_en); if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, essfile); else ajWarn("Structure too long, not doing xy_plot\n"); } if (length>=2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { ajFmtPrintF(outf,"%s", pf_struc); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) : energy_of_struct(string, cent); ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); PS_dot_plot_list(string, dotfileb, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length)); ajFmtPrintF(outf,"\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); free(string); free(structure); ajStrDel(&seqstring); ajStrDel(&constring); ajStrDel(&seqname); ajStrDel(&ensbases); ajStrDel(&eenergy); ajStrDel(&edangles); ajSeqDel(&seq); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); /* ajFileClose(&dotfilea); ajFileClose(&dotfileb); */ if (length<2000) free_arrays(); embExit(); return 0; }
int main(int argc, char *argv[]) { char *string/*, *line*/; char *structure=NULL, *cstruc=NULL; /*char fname[53], ffname[60]; */ /*char *ParamFile=NULL; */ char *ns_bases=NULL, *c; char *Concfile; int i, length, l, sym/*, r*/; double min_en; double kT, sfact=1.07; int pf=0, istty; int noconv=0; int doT=0; /*compute dimere free energies etc.*/ int doC=0; /*toggle to compute concentrations*/ int doQ=0; /*toggle to compute prob of base being paired*/ int cofi=0; /*toggle concentrations stdin / file*/ struct plist *prAB; struct plist *prAA; /*pair probabilities of AA dimer*/ struct plist *prBB; struct plist *prA; struct plist *prB; struct plist *mfAB; struct plist *mfAA; /*pair mfobabilities of AA dimer*/ struct plist *mfBB; struct plist *mfA; struct plist *mfB; double *ConcAandB; AjPSeq seq1 = NULL; AjPFile confile1 = NULL; AjPSeq seq2 = NULL; AjPFile confile2 = NULL; AjPFile concfile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfile = NULL; AjPFile aoutf = NULL; AjPFile aaoutf = NULL; AjPFile boutf = NULL; AjPFile bboutf = NULL; AjPFile aboutf = NULL; AjPStr seqstring1 = NULL; AjPStr constring1 = NULL; AjPStr constring2 = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjBool convert; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; /* AjBool dimers; */ /* AjBool paired; */ embInitPV("vrnacofold",argc,argv,"VIENNA",VERSION); seqstring1 = ajStrNew(); constring1 = ajStrNew(); constring2 = ajStrNew(); seq1 = ajAcdGetSeq("asequence"); confile1 = ajAcdGetInfile("aconstraintfile"); seq2 = ajAcdGetSeq("bsequence"); confile2 = ajAcdGetInfile("bconstraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); convert = ajAcdGetBoolean("convert"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); /* dimers = ajAcdGetBoolean("dimers"); */ /* paired = ajAcdGetBoolean("paired"); */ outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); /* concfile = ajAcdGetInfile("concentrationfile"); */ /* dotfile = ajAcdGetOutfile("dotoutfile"); */ /* aoutf = ajAcdGetOutfile("aoutfile"); aaoutf = ajAcdGetOutfile("aaoutfile"); boutf = ajAcdGetOutfile("boutfile"); bboutf = ajAcdGetOutfile("bboutfile"); aboutf = ajAcdGetOutfile("aboutfile"); */ do_backtrack = 1; pf = 0; doT = 0; doC = 0; cofi = 0; doQ = 0; string = NULL; Concfile = NULL; istty = 0; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; noconv = (convert) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } cut_point = -1; ajFmtPrintS(&seqstring1,"%s&%s",ajSeqGetSeqC(seq1),ajSeqGetSeqC(seq2)); string = tokenize(MAJSTRGETPTR(seqstring1)); /* frees line */ length = (int) strlen(string); if (doC) { ConcAandB = read_concentrations(concfile); } structure = (char *) space((unsigned) length+1); if(confile1) { vienna_GetConstraints(confile1,&constring1); vienna_GetConstraints(confile2,&constring2); ajStrAppendK(&constring1,'&'); ajStrAppendS(&constring1,constring2); cstruc = tokenize(MAJSTRGETPTR(constring1)); if (cstruc!=NULL) strncpy(structure, cstruc, length); else ajFatal("Constraints missing\n"); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } /*compute mfe of AB dimer*/ min_en = cofold(string, structure); mfAB=(struct plist *) space(sizeof(struct plist) * (length+1)); mfAB=get_mfe_plist(mfAB); if (cut_point == -1) ajFmtPrintF(outf,"%s\n%s", string, structure); /*no cofold*/ else { char *pstring, *pstruct; pstring = costring(string); pstruct = costring(structure); ajFmtPrintF(outf,"%s\n%s", pstring, pstruct); free(pstring); free(pstruct); } ajFmtPrintF(outf," (%6.2f)\n", min_en); if (length<2000) (void) PS_rna_plot(string, structure, essfile); else { ajWarn("Structure too long, not doing xy_plot\n"); free_co_arrays(); } /*compute partition function*/ if (pf) { cofoldF AB, AA, BB; if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); init_co_pf_fold(length); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); AB = co_pf_fold(string, structure); if (do_backtrack) { char *costruc; costruc = (char *) space(sizeof(char)*(strlen(structure)+2)); if (cut_point<0) ajFmtPrintF(outf,"%s", structure); else { strncpy(costruc, structure, cut_point-1); strcat(costruc, "&"); strcat(costruc, structure+cut_point-1); ajFmtPrintF(outf,"%s", costruc); } ajFmtPrintF(outf," [%6.2f]\n", AB.FAB); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", AB.FAB); ajFmtPrintF(outf," frequency of mfe structure in ensemble %g", exp((AB.FAB-min_en)/kT)); ajFmtPrintF(outf," , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB); prAB=(struct plist *) space(sizeof(struct plist) * (2*length)); prAB=get_plist(prAB, length,0.00001); /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/ /* free_co_arrays(); */ if (doT) { /* cofold of all dimers, monomers */ int Blength, Alength; char *Astring, *Bstring; char *Newstring; /*char Newname[30];*/ char comment[80]; if (cut_point<0) { free(mfAB); free(prAB); ajFatal("Sorry, I cannot do that with only one molecule, " "please give me two\n"); } if (dangles==1) dangles=2; Alength=cut_point-1; /*length of first molecule*/ Blength=length-cut_point+1; /*length of 2nd molecule*/ /*Sequence of first molecule*/ Astring=(char *)space(sizeof(char)*(Alength+1)); /*Sequence of second molecule*/ Bstring=(char *)space(sizeof(char)*(Blength+1)); strncat(Astring,string,Alength); strncat(Bstring,string+Alength,Blength); /* compute AA dimer */ prAA=(struct plist *) space(sizeof(struct plist) * (4*Alength)); mfAA=(struct plist *) space(sizeof(struct plist) * (Alength+1)); AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA); /* compute BB dimer */ prBB=(struct plist *) space(sizeof(struct plist) * (4*Blength)); mfBB=(struct plist *) space(sizeof(struct plist) * (Blength+1)); BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB); /*free_co_pf_arrays();*/ /* compute A monomer */ prA=(struct plist *) space(sizeof(struct plist) * (2*Alength)); mfA=(struct plist *) space(sizeof(struct plist) * (Alength+1)); do_partfunc(Astring, Alength, 1, &prA, &mfA); /* compute B monomer */ prB=(struct plist *) space(sizeof(struct plist) * (2*Blength)); mfB=(struct plist *) space(sizeof(struct plist) * (Blength+1)); do_partfunc(Bstring, Blength, 1, &prB, &mfB); compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength); compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength); compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength); ajFmtPrintF(outf,"Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f" "\t%6f\t%6f\t%6f\t%6f\n", AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB); if (doC) { do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB, outf); free(ConcAandB);/*freeen*/ } /*AB dot_plot*/ /*write Free Energy into comment*/ sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB); /*reset cut_point*/ cut_point=Alength+1; (void)PS_dot_plot_list(string, aboutf, prAB, mfAB, comment); /*AA dot_plot*/ sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB); /*write AA sequence*/ Newstring=(char*)space((2*Alength+1)*sizeof(char)); strcpy(Newstring,Astring); strcat(Newstring,Astring); (void)PS_dot_plot_list(Newstring, aaoutf, prAA, mfAA, comment); free(Newstring); /*BB dot_plot*/ sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB); /*write BB sequence*/ Newstring=(char*)space((2*Blength+1)*sizeof(char)); strcpy(Newstring,Bstring); strcat(Newstring,Bstring); /*reset cut_point*/ cut_point=Blength+1; (void)PS_dot_plot_list(Newstring, bboutf, prBB, mfBB, comment); free(Newstring); /*A dot plot*/ /*reset cut_point*/ cut_point=-1; sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA); /*write A sequence*/ (void)PS_dot_plot_list(Astring, aoutf, prA, mfA, comment); /*B monomer dot plot*/ sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB); /*write B sequence*/ (void)PS_dot_plot_list(Bstring, boutf, prB, mfB, comment); free(Astring); free(Bstring); free(prAB); free(prAA); free(prBB); free(prA); free(prB); free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB); } /*end if(doT)*/ }/*end if(pf)*/ if (do_backtrack) { if (!doT) { if (pf) { (void) PS_dot_plot_list(string, dotfile, prAB, mfAB, "doof"); free(prAB); } free(mfAB); } } if (!doT) free_co_pf_arrays(); if (cstruc!=NULL) free(cstruc); free(string); free(structure); ajStrDel(&seqstring1); ajStrDel(&constring1); ajStrDel(&constring2); ajSeqDel(&seq1); ajSeqDel(&seq2); ajStrDel(&ensbases); ajStrDel(&eenergy); ajStrDel(&edangles); ajFileClose(&confile1); ajFileClose(&confile2); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); if (length<2000) free_co_arrays(); embExit(); return 0; }
path_t* get_path(char *seq, char *s1, char* s2, int maxkeep, int *num_entry) { int E, d; path_t *route; E = find_saddle(seq, s1, s2, maxkeep); *num_entry = BP_dist+1; route = (path_t *)space((BP_dist+1)*sizeof(path_t)); qsort(path, BP_dist, sizeof(move_t), compare_moves_when); if (path_fwd) { /* memorize start of path */ route[0].s = (char*)space((strlen(s1)+1)*sizeof(char)); strcpy(route[0].s, s1); route[0].en = energy_of_struct(seq, s1); for (d=0; d<BP_dist; d++) { int i,j; route[d+1].s = (char*)space((strlen(route[d].s)+1)*sizeof(char)); strcpy(route[d+1].s, route[d].s); i = path[d].i; j=path[d].j; if (i<0) { /* delete */ route[d+1].s[(-i)-1] = route[d+1].s[(-j)-1] = '.'; } else { route[d+1].s[i-1] = '('; route[d+1].s[j-1] = ')'; } route[d+1].en = path[d].E/100.0; } } else { /* memorize start of path */ route[BP_dist].s = (char*)space((strlen(s2)+1)*sizeof(char)); strcpy(route[BP_dist].s, s2); route[BP_dist].en = energy_of_struct(seq, s2); for (d=0; d<BP_dist; d++) { int i,j; route[BP_dist-d-1].s = (char*)space((strlen(route[BP_dist-d].s)+1)*sizeof(char)); strcpy(route[BP_dist-d-1].s, route[BP_dist-d].s); i = path[d].i; j=path[d].j; if (i<0) { /* delete */ route[BP_dist-d-1].s[(-i)-1] = route[BP_dist-d-1].s[(-j)-1] = '.'; } else { route[BP_dist-d-1].s[i-1] = '('; route[BP_dist-d-1].s[j-1] = ')'; } route[BP_dist-d-1].en = path[d].E/100.0; } } #if _DEBUG_FINDPATH_ fprintf(stderr, "\n%s\n%s\n%s\n\n", seq, s1, s2); for (d=0; d<=BP_dist; d++) fprintf(stderr, "%s %6.2f\n", route[d].s, route[d].en); fprintf(stderr, "%d\n", *num_entry); #endif free(path); path_fwd = 0; return (route); }
int main(int argc, char *argv[]) { char *string, *line; char *structure=NULL, *cstruc=NULL; char fname[13], ffname[20], gfname[20]; char *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length, l, sym, r; double energy, min_en; double kT, sfact=1.07; int pf=0, noPS=0, istty; int noconv=0; int circ=0; do_backtrack = 1; string=NULL; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'p': pf=1; if (argv[i][2]!='\0') (void) sscanf(argv[i]+2, "%d", &do_backtrack); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-noPS")==0) noPS=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'c': if ( strcmp(argv[i], "-circ")==0) circ=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; default: usage(); } } if (circ && noLonelyPairs) fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("%s%s\n", scale1, scale2); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%12s", fname); printf("%s\n", line); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line ==NULL) || (strcmp(line, "@") == 0)) break; string = (char *) space(strlen(line)+1); (void) sscanf(line,"%s",string); free(line); length = (int) strlen(string); structure = (char *) space((unsigned) length+1); if (fold_constrained) { cstruc = get_line(stdin); if (cstruc!=NULL) strncpy(structure, cstruc, length); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } if (istty) printf("length = %d\n", length); /* initialize_fold(length); */ if (circ) min_en = circfold(string, structure); else min_en = fold(string, structure); printf("%s\n%s", string, structure); if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ss.ps"); strcpy(gfname, fname); strcat(gfname, "_ss.g"); } else { strcpy(ffname, "rna.ps"); strcpy(gfname, "rna.g"); } if (!noPS) { if (length<2000) (void) PS_rna_plot(string, structure, ffname); else fprintf(stderr,"INFO: structure too long, not doing xy_plot\n"); } if (length>2000) free_arrays(); if (pf) { char *pf_struc; pf_struc = (char *) space((unsigned) length+1); if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); (circ) ? init_pf_circ_fold(length) : init_pf_fold(length); if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1); energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc); if (do_backtrack) { printf("%s", pf_struc); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); if (do_backtrack) { plist *pl1,*pl2; char *cent; double dist, cent_en; cent = centroid(length, &dist); cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent); printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist); free(cent); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); pl1 = make_plist(length, 1e-5); pl2 = b2plist(structure); (void) PS_dot_plot_list(string, ffname, pl1, pl2, ""); free(pl2); if (do_backtrack==2) { pl2 = stackProb(1e-5); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp2.ps"); } else strcpy(ffname, "dot2.ps"); PS_dot_plot_list(string, ffname, pl1, pl2, "Probabilities for stacked pairs (i,j)(i+1,j-1)"); free(pl2); } free(pl1); free(pf_struc); } printf(" frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT)); if (do_backtrack) printf("ensemble diversity %-6.2f", mean_bp_dist(length)); printf("\n"); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); } while (1); return 0; }
/* PURPOSE: Sets the predictor variables PARAMS: char *window: Window of alignment for which strand is predicted char *window_reverse: Reverse complement of window int length: Length of alignment window int n_seq: Number of sequences in alignment window double *deltaSCI: 1st predictor double *deltaMeanMFE: 2nd predictor double *deltaConsMFE: 3rd predictor double *deltaZ: 4th predictor char *modelDir: Directory with SVM models RETURNS: 0 on success */ void get_strand_predictors( char *window[], char *window_reverse[], int length, int n_seq, double *deltaSCI, double *deltaMeanMFE, double *deltaConsMFE, double *deltaZ, char *modelDir) { int i,j; char *structure=NULL, *structure_reverse=NULL; char *singleStruc, *woGapsSeq, *singleStruc_reverse, *woGapsSeq_reverse; double decValue=0; double singleMFE,sumMFE,singleZ,sumZ,z,sci; double singleMFE_reverse,sumMFE_reverse,singleZ_reverse,sumZ_reverse,z_reverse,sci_reverse; double min_en, real_en; double min_en_reverse, real_en_reverse; int z_score_type; char warningString_strand[2000]; sumZ=0; sumMFE=0, sumZ_reverse=0, sumMFE_reverse=0; /* RNAstrand is only trained on mononucleotide shuffled sequences !!! */ z_score_type = 0; /* Init regression SVM for mean and standard deviation of MFE */ regression_svm_init(modelDir); structure = (char *) space((unsigned) length+1); structure_reverse = (char *) space((unsigned) length+1); min_en = alifold(window, structure); min_en_reverse = alifold(window_reverse, structure_reverse); /* get z-score, mean mfe, average single mfe and sci*/ for (i=0;i<n_seq;i++) { singleStruc = space(strlen(window[i])+1); woGapsSeq = space(strlen(window[i])+1); singleStruc_reverse = space(strlen(window_reverse[i])+1); woGapsSeq_reverse = space(strlen(window_reverse[i])+1); j=0; while (window[i][j]) { window[i][j]=toupper(window[i][j]); if (window[i][j]=='T') window[i][j]='U'; if (window[i][j]!='-') { woGapsSeq[strlen(woGapsSeq)]=window[i][j]; woGapsSeq[strlen(woGapsSeq)]='\0'; } ++j; } j=0; while (window_reverse[i][j]) { window_reverse[i][j]=toupper(window_reverse[i][j]); if (window_reverse[i][j]=='T') window_reverse[i][j]='U'; if (window_reverse[i][j]!='-') { woGapsSeq_reverse[strlen(woGapsSeq_reverse)]=window_reverse[i][j]; woGapsSeq_reverse[strlen(woGapsSeq_reverse)]='\0'; } ++j; } singleMFE = fold(woGapsSeq, singleStruc); singleZ=mfe_zscore(woGapsSeq,singleMFE,z_score_type,0,warningString_strand); singleMFE_reverse = fold(woGapsSeq_reverse, singleStruc_reverse); singleZ_reverse=mfe_zscore(woGapsSeq_reverse,singleMFE_reverse,z_score_type,0,warningString_strand); sumZ+=singleZ; sumMFE+=singleMFE; sumZ_reverse+=singleZ_reverse; sumMFE_reverse+=singleMFE_reverse; free(woGapsSeq); free(singleStruc); free(woGapsSeq_reverse); free(singleStruc_reverse); } { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; window[i]!=NULL; i++) s += energy_of_struct(window[i], structure); real_en = s/i; } z=sumZ/n_seq; sci=min_en/(sumMFE/n_seq); z_reverse=sumZ_reverse/n_seq; sci_reverse=min_en_reverse/(sumMFE_reverse/n_seq); *deltaSCI = sci - sci_reverse; *deltaMeanMFE = sumMFE/n_seq - sumMFE_reverse/n_seq; *deltaConsMFE = min_en - min_en_reverse; *deltaZ = z - z_reverse; /* Free svm regression model */ regression_svm_free(); return; }
int main(int argc, char *argv[]) { char *string; char *structure=NULL; char *cstruc=NULL; char *ns_bases=NULL; char *c; int n_seq; int i; int length; int sym; int endgaps = 0; int mis = 0; double min_en; double real_en; double sfact = 1.07; int pf = 0; int istty; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ AjPSeqset seq = NULL; AjPFile confile = NULL; AjPFile alifile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfile = NULL; AjPStr constring = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; AjPSeq tseq = NULL; AjPStr tname = NULL; int circ = 0; int doAlnPS = 0; int doColor = 0; embInitPV("vrnaalifoldpf",argc,argv,"VIENNA",VERSION); constring = ajStrNew(); seq = ajAcdGetSeqset("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); mis = !!ajAcdGetBoolean("most"); endgaps = !!ajAcdGetBoolean("endgaps"); nc_fact = (double) ajAcdGetFloat("nspenalty"); cv_fact = (double) ajAcdGetFloat("covariance"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); alifile = ajAcdGetOutfile("alignoutfile"); circ = !!ajAcdGetBoolean("circular"); doColor = !!ajAcdGetBoolean("colour"); dotfile = ajAcdGetOutfile("dotoutfile"); do_backtrack = 1; pf = 1; string = NULL; istty = 0; dangles = 2; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(alifile) doAlnPS = 1; if(confile) vienna_GetConstraints(confile,&constring); n_seq = ajSeqsetGetSize(seq); if(n_seq > MAX_NUM_NAMES - 1) ajFatal("[e]RNAalifold is restricted to %d sequences\n", MAX_NUM_NAMES - 1); if (n_seq==0) ajFatal("No sequences found"); for(i=0;i<n_seq;++i) { tseq = (AjPSeq) ajSeqsetGetseqSeq(seq,i); ajSeqGapStandard(tseq, '-'); tname = (AjPStr) ajSeqsetGetseqNameS(seq,i); len = ajSeqGetLen(tseq); AS[i] = (char *) space(len+1); names[i] = (char *) space(ajStrGetLen(tname)+1); strcpy(AS[i],ajSeqGetSeqC(tseq)); strcpy(names[i],ajStrGetPtr(tname)); } AS[n_seq] = NULL; names[n_seq] = NULL; if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); length = (int) strlen(AS[0]); structure = (char *) space((unsigned) length+1); if(confile) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } if (circ && noLonelyPairs) ajWarn( "warning, depending on the origin of the circular sequence, " "some structures may be missed when using -noLP\n" "Try rotating your sequence a few times\n"); if (circ) min_en = circalifold((const char **)AS, structure); else min_en = alifold(AS, structure); { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; AS[i]!=NULL; i++) if (circ) s += energy_of_circ_struct(AS[i], structure); else s += energy_of_struct(AS[i], structure); real_en = s/i; } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); ajFmtPrintF(outf,"%s\n%s", string, structure); ajFmtPrintF(outf," (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); if (length<=2500) { char **A; A = annote(structure, (const char**) AS); if (doColor) (void) PS_rna_plot_a(string, structure, essfile, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, essfile, NULL, A[1]); free(A[0]); free(A[1]);free(A); } else ajWarn("INFO: structure too long, not doing xy_plot\n"); if (doAlnPS) PS_color_aln(structure, alifile, AS, names); { /* free mfe arrays but preserve base_pair for PS_dot_plot */ struct bond *bp; bp = base_pair; base_pair = space(16); free_alifold_arrays(); /* free's base_pair */ free_alipf_arrays(); base_pair = bp; } if (pf) { double energy, kT; pair_info *pi; char * mfe_struc; mfe_struc = strdup(structure); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); /* init_alipf_fold(length); */ if (confile) strncpy(structure, ajStrGetPtr(constring), length+1); energy = (circ) ? alipf_circ_fold(AS, structure, &pi) : alipf_fold(AS, structure, &pi); if (do_backtrack) { ajFmtPrintF(outf,"%s", structure); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); ajFmtPrintF(outf," frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; short *ptable; int k; ptable = make_pair_table(mfe_struc); ajFmtPrintF(outf,"\n# Alignment section\n\n"); aliout = ajFileGetFileptr(outf); fprintf(aliout, "%d sequences; length of alignment %d\n", n_seq, length); fprintf(aliout, "alifold output\n"); for (k=0; pi[k].i>0; k++) { pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0; print_pi(pi[k], aliout); } fprintf(aliout, "%s\n", structure); free(ptable); cp = make_color_pinfo(pi); (void) PS_color_dot_plot(string, cp, dotfile); free(cp); free(mfe_struc); free(pi); } } if (cstruc!=NULL) free(cstruc); free(base_pair); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } ajSeqsetDel(&seq); ajStrDel(&constring); ajStrDel(&eenergy); ajStrDel(&edangles); ajStrDel(&ensbases); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); ajFileClose(&alifile); ajFileClose(&dotfile); embExit(); return 0; }
int main(int argc, char *argv[]) { char *string, *line; char *structure=NULL, *cstruc=NULL; char fname[53], ffname[60]; char *ParamFile=NULL; char *ns_bases=NULL, *c; char *Concfile; int i, length, l, sym, r; double min_en; double kT, sfact=1.07; int pf=0, istty; int noconv=0; int doT=0; /*compute dimere free energies etc.*/ int doC=0; /*toggle to compute concentrations*/ int doQ=0; /*toggle to compute prob of base being paired*/ int cofi=0; /*toggle concentrations stdin / file*/ struct plist *prAB; struct plist *prAA; /*pair probabilities of AA dimer*/ struct plist *prBB; struct plist *prA; struct plist *prB; struct plist *mfAB; struct plist *mfAA; /*pair mfobabilities of AA dimer*/ struct plist *mfBB; struct plist *mfA; struct plist *mfB; double *ConcAandB; do_backtrack = 1; string=NULL; Concfile=NULL; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'p': pf=1; if (argv[i][2]!='\0') (void) sscanf(argv[i]+2, "%d", &do_backtrack); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; case 'a': doT=1; pf=1; break; case 'c':/*concentrations from stdin*/ doC=1; doT=1; pf=1; break; case 'f':/*concentrations in file*/ if (i==argc-1) usage(); Concfile = argv[++i]; doC=1; cofi=1; doT=1; pf=1; break; case 'q': pf=1; doQ=1; break; default: usage(); } } if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ cut_point = -1; if (istty) { printf("\nInput sequence(s); @ to quit\n"); printf("Use '&' as spearator between 2 sequences that shall form a complex.\n"); printf("%s\n", scale); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%51s", fname); printf("%s\n", line); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line ==NULL) || (strcmp(line, "@") == 0)) break; string = tokenize(line); /* frees line */ length = (int) strlen(string); if (doC) { FILE *fp; if (cofi) { /* read from file */ fp = fopen(Concfile, "r"); if (fp==NULL) { fprintf(stderr, "could not open concentration file %s", Concfile); nrerror("\n"); } ConcAandB = read_concentrations(fp); fclose(fp); } else { printf("Please enter concentrations [mol/l]\n format: ConcA ConcB\n return to end\n"); ConcAandB = read_concentrations(stdin); } } structure = (char *) space((unsigned) length+1); if (fold_constrained) { cstruc = tokenize(get_line(stdin)); if (cstruc!=NULL) strncpy(structure, cstruc, length); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length; l++) { string[l] = toupper(string[l]); if (!noconv && string[l] == 'T') string[l] = 'U'; } if (istty) { if (cut_point == -1) printf("length = %d\n", length); else printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1); } /*compute mfe of AB dimer*/ min_en = cofold(string, structure); mfAB=(struct plist *) space(sizeof(struct plist) * (length+1)); mfAB=get_mfe_plist(mfAB); if (cut_point == -1) printf("%s\n%s", string, structure); /*no cofold*/ else { char *pstring, *pstruct; pstring = costring(string); pstruct = costring(structure); printf("%s\n%s", pstring, pstruct); free(pstring); free(pstruct); } if (istty) printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ss.ps"); } else { strcpy(ffname, "rna.ps"); } if (length<2000) (void) PS_rna_plot(string, structure, ffname); else { fprintf(stderr,"INFO: structure too long, not doing xy_plot\n"); free_co_arrays(); } /*compute partition function*/ if (pf) { cofoldF AB, AA, BB; if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = energy_of_struct(string, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_co_pf_fold(length); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); AB = co_pf_fold(string, structure); if (do_backtrack) { char *costruc; costruc = (char *) space(sizeof(char)*(strlen(structure)+2)); if (cut_point<0) printf("%s", structure); else { strncpy(costruc, structure, cut_point-1); strcat(costruc, "&"); strcat(costruc, structure+cut_point-1); printf("%s", costruc); } if (!istty) printf(" [%6.2f]\n", AB.FAB); else printf("\n");/*8.6.04*/ } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", AB.FAB); printf(" frequency of mfe structure in ensemble %g", exp((AB.FAB-min_en)/kT)); printf(" , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB); prAB=(struct plist *) space(sizeof(struct plist) * (2*length)); prAB=get_plist(prAB, length,0.00001); /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/ /* free_co_arrays(); */ if (doT) { /* cofold of all dimers, monomers */ int Blength, Alength; char *Astring, *Bstring; char *Newstring; char Newname[30]; char comment[80]; if (cut_point<0) { printf("Sorry, i cannot do that with only one molecule, please give me two or leave it\n"); free(mfAB); free(prAB); continue; } if (dangles==1) dangles=2; Alength=cut_point-1; /*length of first molecule*/ Blength=length-cut_point+1; /*length of 2nd molecule*/ Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/ Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/ strncat(Astring,string,Alength); strncat(Bstring,string+Alength,Blength); /* compute AA dimer */ prAA=(struct plist *) space(sizeof(struct plist) * (4*Alength)); mfAA=(struct plist *) space(sizeof(struct plist) * (Alength+1)); AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA); /* compute BB dimer */ prBB=(struct plist *) space(sizeof(struct plist) * (4*Blength)); mfBB=(struct plist *) space(sizeof(struct plist) * (Blength+1)); BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB); /*free_co_pf_arrays();*/ /* compute A monomer */ prA=(struct plist *) space(sizeof(struct plist) * (2*Alength)); mfA=(struct plist *) space(sizeof(struct plist) * (Alength+1)); do_partfunc(Astring, Alength, 1, &prA, &mfA); /* compute B monomer */ prB=(struct plist *) space(sizeof(struct plist) * (2*Blength)); mfB=(struct plist *) space(sizeof(struct plist) * (Blength+1)); do_partfunc(Bstring, Blength, 1, &prB, &mfB); compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength); compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength); compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength); printf("Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f\t%6f\t%6f\t%6f\t%6f\n", AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB); if (doC) { do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB); free(ConcAandB);/*freeen*/ } if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp5.ps"); } else strcpy(ffname, "dot5.ps"); /*output of the 5 dot plots*/ /*AB dot_plot*/ /*write Free Energy into comment*/ sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB); /*reset cut_point*/ cut_point=Alength+1; /*write New name*/ strcpy(Newname,"AB"); strcat(Newname,ffname); (void)PS_dot_plot_list(string, Newname, prAB, mfAB, comment); /*AA dot_plot*/ sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB); /*write New name*/ strcpy(Newname,"AA"); strcat(Newname,ffname); /*write AA sequence*/ Newstring=(char*)space((2*Alength+1)*sizeof(char)); strcpy(Newstring,Astring); strcat(Newstring,Astring); (void)PS_dot_plot_list(Newstring, Newname, prAA, mfAA, comment); free(Newstring); /*BB dot_plot*/ sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB); /*write New name*/ strcpy(Newname,"BB"); strcat(Newname,ffname); /*write BB sequence*/ Newstring=(char*)space((2*Blength+1)*sizeof(char)); strcpy(Newstring,Bstring); strcat(Newstring,Bstring); /*reset cut_point*/ cut_point=Blength+1; (void)PS_dot_plot_list(Newstring, Newname, prBB, mfBB, comment); free(Newstring); /*A dot plot*/ /*reset cut_point*/ cut_point=-1; sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA); /*write New name*/ strcpy(Newname,"A"); strcat(Newname,ffname); /*write BB sequence*/ (void)PS_dot_plot_list(Astring, Newname, prA, mfA, comment); /*B monomer dot plot*/ sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB); /*write New name*/ strcpy(Newname,"B"); strcat(Newname,ffname); /*write BB sequence*/ (void)PS_dot_plot_list(Bstring, Newname, prB, mfB, comment); free(Astring); free(Bstring); free(prAB); free(prAA); free(prBB); free(prA); free(prB); free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB); } /*end if(doT)*/ }/*end if(pf)*/ if (do_backtrack) { if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "dot.ps"); if (!doT) { if (pf) { (void) PS_dot_plot_list(string, ffname, prAB, mfAB, "doof"); free(prAB); } free(mfAB); } } if (!doT) free_co_pf_arrays(); if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); } while (1); return 0; }
static int comp_struc(const void *A, const void *B) { int aE, bE; aE = (int)(100 * energy_of_struct(GAV.farbe_full, ((char **)A)[0])); bE = (int)(100 * energy_of_struct(GAV.farbe_full, ((char **)B)[0])); return (aE-bE); }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]) { char *string1=NULL, *string2=NULL, *temp, *line; char *structure=NULL, *cstruc=NULL; char fname[53], my_contrib[10], *up_out; char *ParamFile=NULL; char *ns_bases=NULL, *c; int i, length1,length2,length, l, sym, r; double energy, min_en; double kT, sfact=1.07; int pf, istty; int noconv=0; double Zu, Zup; /* variables for output */ pu_contrib *unstr_out, *unstr_short; FLT_OR_DBL **inter_out; char *title; /* commandline parameters */ int w; /* length of region of interaction */ int incr3; /* add x unpaired bases after 3'end of short RNA*/ int incr5; /* add x unpaired bases after 5'end of short RNA*/ int unstr; /* length of unpaired region for output*/ int upmode; /* output mode for pf_unpaired and pf_up()*/ upmode = 0; unstr = 4; incr3=0; incr5=0; w=25; do_backtrack = 1; pf=1; /* partition function has to be calculated */ length1=length2=0; up_out=NULL; title=NULL; unstr_out=NULL; inter_out=NULL; my_contrib[0] = 'S'; my_contrib[1] = '\0'; for (i=1; i<argc; i++) { if (argv[i][0]=='-') switch ( argv[i][1] ) { case 'T': if (argv[i][2]!='\0') usage(); if(i==argc-1) usage(); r=sscanf(argv[++i], "%lf", &temperature); if (!r) usage(); break; case 'w': /* -w maximal length of unstructured region */ if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &w); if (!r) usage(); break; case 'n': if ( strcmp(argv[i], "-noGU")==0) noGU=1; if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1; if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1; if ( strcmp(argv[i], "-nsp") ==0) { if (i==argc-1) usage(); ns_bases = argv[++i]; } if ( strcmp(argv[i], "-noconv")==0) noconv=1; break; case '4': tetra_loop=0; break; case 'e': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &energy_set); if (!r) usage(); break; case 'C': fold_constrained=1; break; case 'S': if(i==argc-1) usage(); r=sscanf(argv[++i],"%lf", &sfact); if (!r) usage(); break; case 'd': dangles=0; if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &dangles); if (r!=1) usage(); } break; case 'o': upmode=1; /* output mode 0: non, 1:only pr_unpaired, 2: pr_unpaired + pr_up */ if (argv[i][2]!='\0') { r=sscanf(argv[i]+2, "%d", &upmode); if (r!=1) usage(); } break; case 'u': /* -u length of unstructured region in pr_unpaired output makes only sense in combination with -o1 or -o2 */ if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &unstr); if (!r) usage(); break; /* incr5 and incr3 are only for the longer (target) sequence */ /* increments w (length of the unpaired region) to incr5+w+incr3*/ /* the longer sequence is given in 5'(= position 1) to */ /* 3' (=position n) direction */ /* incr5 adds incr5 residues to the 5' end of w */ case '5': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr5); if (!r) usage(); break; /* incr3 adds incr3 residues to the 3' end of w */ case '3': if(i==argc-1) usage(); r=sscanf(argv[++i],"%d", &incr3); if (!r) usage(); break; case 'P': if (i==argc-1) usage(); ParamFile = argv[++i]; break; case 'x': if(i==argc-1) usage(); r=sscanf(argv[++i], "%s", my_contrib); if (!r) usage(); break; default: usage(); } } if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if ((fold_constrained)&&(istty)) { printf("Input constraints using the following notation:\n"); printf("| : paired with another base\n"); printf(". : no constraint at all\n"); printf("x : base must not pair\n"); printf("< : base i is paired with a base j<i\n"); printf("> : base i is paired with a base j>i\n"); printf("matching brackets ( ): base i pairs base j\n"); } do { /* main loop: continue until end of file */ cut_point=-1; if (istty) { printf("\nInput string (upper or lower case); @ to quit\n"); printf("Use '&' to connect 2 sequences that shall form a complex.\n"); printf("%s%s\n", scale1, scale2); } fname[0]='\0'; if ((line = get_line(stdin))==NULL) break; /* skip comment lines and get filenames */ while ((*line=='*')||(*line=='\0')||(*line=='>')) { if (*line=='>') (void) sscanf(line, ">%51s", fname); free(line); if ((line = get_line(stdin))==NULL) break; } if ((line == NULL) || (strcmp(line, "@") == 0)) break; tokenize(line,&string1,&string2); if(upmode != 0){ if(cut_point == -1 && upmode == 2) { nrerror("only one sequence - can not cofold one sequence!"); } } else { if(cut_point == -1){ upmode=1; } else { upmode=2; } } if(string1 != NULL) length1 = (int) strlen(string1); if(string2 != NULL) length2 = (int) strlen(string2); else length2=0; /* write longer seq in string1 and and shorter one in string2 */ if(length1 < length2) { length=length1; length1=length2; length2=length; temp=(char *) space(strlen(string1)+1); (void) sscanf(string1,"%s",temp); string1 = (char *) xrealloc (string1,sizeof(char)*length1+1); (void) sscanf(string2,"%s",string1); string2 = (char *) xrealloc(string2,sizeof(char)*length2+1); (void) sscanf(temp,"%s",string2); free(temp); } structure = (char *) space((unsigned) length1+1); if (fold_constrained) { cstruc = get_line(stdin); if (cstruc!=NULL) strncpy(structure, cstruc, length1); else fprintf(stderr, "constraints missing\n"); } for (l = 0; l < length1; l++) { string1[l] = toupper(string1[l]); if (!noconv && string1[l] == 'T') string1[l] = 'U'; } for (l = 0; l < length2; l++) { string2[l] = toupper(string2[l]); if (!noconv && string2[l] == 'T') string2[l] = 'U'; } if (istty) printf("length1 = %d\n", length1); /* initialize_fold(length); */ update_fold_params(); printf("\n%s", string1); min_en = fold(string1, structure); if (istty) { printf("\n minimum free energy = %6.2f kcal/mol\n", min_en); } else printf(" (%6.2f)\n", min_en); (void) fflush(stdout); /* parse cml parameters for the filename*/ if(upmode > 0) { char wuadd[10]; up_out = (char*) space(sizeof(char)*53); /* create the name of the output file */ if(fname[0]!='\0' && up_out[0] =='\0' ){ if(strlen(fname)< 30){ strcpy(up_out, fname); } else { strncpy(up_out, fname,30); } } else if(fname[0]=='\0' && up_out[0] == '\0'){ char defaultn[10] = "RNA"; sprintf(up_out,"%s",defaultn); } sprintf(wuadd,"%d",w); strcat(up_out, "_w"); strcat(up_out, wuadd); strcat(up_out, "u"); sprintf(wuadd,"%d",unstr); strcat(up_out, wuadd); strcat(up_out, "_up.out"); printf("RNAup output in file: %s\n",up_out); /* create the title for the output file */ if (title == NULL) { char wuadd[10]; title = (char*) space(sizeof(char)*60); if(fname[0]!='\0'){ if(strlen(fname)< 30){ strcpy(title, fname); } else { strncpy(title, fname,30); } } else if (fname[0]=='\0'){ char defaultn[10]= "RNAup"; sprintf(title,"%s",defaultn); } sprintf(wuadd,"%d",unstr); strcat(title," u="); strcat(title, wuadd); sprintf(wuadd,"%d",w); strcat(title," w="); strcat(title, wuadd); sprintf(wuadd,"%d",length1); strcat(title," n="); strcat(title, wuadd); } } else { nrerror("no output format given: use [-o[1|2]] to select output format"); } if (pf) { if (dangles==1) { dangles=2; /* recompute with dangles as in pf_fold() */ min_en = energy_of_struct(string1, structure); dangles=1; } kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ if(upmode != 0){ int wplus; wplus=w+incr3+incr5; /* calculate prob. unstructured for the shorter seq */ if(upmode == 3) { min_en = fold(string2, structure); pf_scale = exp(-(sfact*min_en)/kT/length2); if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length2); if (cstruc!=NULL) strncpy(structure, cstruc, length2+1); energy = pf_fold(string2, structure); if(wplus > length2){ wplus = length2;} /* for the shorter seq */ unstr_short = pf_unstru(string2, structure, wplus); free_pf_unstru(); free_pf_arrays(); /* for arrays for pf_fold(...) */ } /* calculate prob. unstructured for the longer seq */ wplus=w+incr3+incr5; min_en = fold(string1, structure); pf_scale = exp(-(sfact*min_en)/kT/length1); if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); init_pf_fold(length1); if (cstruc!=NULL) strncpy(structure, cstruc, length1+1); energy = pf_fold(string1, structure); unstr_out = pf_unstru(string1, structure, wplus); free_pf_unstru(); free_pf_arrays(); /* for arrays for pf_fold(...) */ /* calculate the interaction between the two sequences */ if(upmode > 1 && cut_point > -1){ inter_out = pf_interact(string1,string2,unstr_out,w, incr3, incr5); if(Up_plot(unstr_out,inter_out,length1,up_out,unstr,my_contrib)==0){ nrerror("Up_plot: no output values assigned"); } } else if(cut_point == -1 && upmode > 1) { /* no second seq given */ nrerror("only one sequence given - cannot cofold one sequence!"); } else { /* plot only the results for prob unstructured */ if(Up_plot(unstr_out,NULL,length1,up_out,unstr,my_contrib)==0){ nrerror("Up_plot: no output values assigned"); } } } else { nrerror("no output format given: use [-o[1|2]] to select output format"); } if (do_backtrack) { printf("%s", structure); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); energy = pf_fold(string1, structure); printf(" frequency of mfe structure in ensemble %g; " "ensemble diversity %-6.2f\n", exp((energy-min_en)/kT), mean_bp_dist(length1)); free_pf_arrays(); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); if (string1!=NULL) free(string1); if (string2!=NULL) free(string2); free(structure); if(up_out != NULL) free(up_out); up_out=NULL; if(title != NULL) free(title); title=NULL; if(upmode == 1) free_pf_two(unstr_out,NULL); if(upmode > 1) free_pf_two(unstr_out,inter_out); if(upmode == 3)free_pf_two(unstr_short,NULL); free_arrays(); /* for arrays for fold(...) */ } while (1); return 0; }