SetPartition CCPivot::_Run(const SetPartitionVector &SPV) { unsigned int k = SPV.size(); unsigned int n = SPV[0].GetN(); SetPartition consensus(n); consensus.MakeLast(); Matrix<double> mat(n + 1, n + 1); mat.FillWithValue(0.0); for (int p = 0; p < SPV.size(); ++p) { for (int i = 1; i <= n; ++i) { for (int j = 1; j <= n; ++j) { if (SPV[p].CoClustered(i, j)) mat(i, j) += 1.0 / (double)k; } } } set<int> isClustered; for (int i = 1; i <= n; ++i) isClustered.insert(i); while (true) { int which = ChooseRandom(isClustered, SPV[0]); if (which == -1) break; isClustered.erase(which); // cluster everyone with which for (int i = 1; i <= n; ++i) { if (isClustered.find(i) == isClustered.end()) continue; if (mat(which, i) > cutoff) { isClustered.erase(i); consensus.MergeBlocks(consensus.WhichBlock(i), consensus.WhichBlock(which)); } } } return consensus; }
int main(int argc, char** argv) { if(argc > 3) { image_height=atoi(argv[1]); image_width=atoi(argv[1]); if(atoi(argv[2]) == 1) create_image(); image = (char*)malloc(sizeof(char)*image_height*image_width); meet=(ConsensusGrid*)malloc(sizeof(ConsensusGrid)*image_height*image_width); basis = (int*)malloc(sizeof(int)*image_height*image_width); read_raw_image("image.raw",image,image_height,image_width); for(int i=0; i<image_height; i++) { for(int j = 0; j<image_width; j++) { if(atoi(argv[3]) == 1) meet[i*image_width+j]=consensus_parallel(i, j, image,image_height,image_width); else meet[i*image_width+j]=consensus(i, j, image,image_height,image_width); } } printf("Calculating list\n"); if(atoi(argv[3]) == 1) calculate_list_parallel(); else calculate_list(); printf("Calculating basis\n"); if(atoi(argv[3]) == 1) calculate_basis_parallel(); else calculate_basis(); cout << "Basis count = " << basis_count << endl; } else { cout << "Insufficient arguments\n" << endl; cout << "First argument = Image Size" << endl; cout << "Second argument = 1 to create image, 0 not to create image" << endl; cout << "Third argument = 1 for parallel, 0 for not parallel\n" << endl; } }
int main_consensus(int argc, char *argv[]) { args_t *args = (args_t*) calloc(1,sizeof(args_t)); args->argc = argc; args->argv = argv; static struct option loptions[] = { {"sample",1,0,'s'}, {"iupac-codes",0,0,'i'}, {"haplotype",1,0,'H'}, {"output",1,0,'o'}, {"fasta-ref",1,0,'f'}, {"mask",1,0,'m'}, {"chain",1,0,'c'}, {0,0,0,0} }; char c; while ((c = getopt_long(argc, argv, "h?s:1iH:f:o:m:c:",loptions,NULL)) >= 0) { switch (c) { case 's': args->sample = optarg; break; case 'o': args->output_fname = optarg; break; case 'i': args->output_iupac = 1; break; case 'f': args->ref_fname = optarg; break; case 'm': args->mask_fname = optarg; break; case 'c': args->chain_fname = optarg; break; case 'H': args->haplotype = optarg[0] - '0'; if ( args->haplotype <=0 ) error("Expected positive integer with --haplotype\n"); break; default: usage(args); break; } } if ( optind>=argc ) usage(args); args->fname = argv[optind]; if ( !args->ref_fname && !isatty(fileno((FILE *)stdin)) ) args->ref_fname = "-"; if ( !args->ref_fname ) usage(args); init_data(args); consensus(args); destroy_data(args); free(args); return 0; }
void AgentCore::algorithmCallback(const ros::TimerEvent &timer_event) { consensus(); // also clears the received statistics container control(); // also publishes virtual agent pose and path guidance(); dynamics(); // also publishes agent pose and path waitForSlotTDMA(slot_tdma_*agent_id_); // sync to the next transmission TDMA slot (agent dependent) // publishes the last estimated statistics in the proper TDMA slot formation_control::FormationStatisticsStamped msg; msg.header.frame_id = agent_virtual_frame_; msg.header.stamp = ros::Time::now(); msg.agent_id = agent_id_; msg.stats = estimated_statistics_; stats_publisher_.publish(msg); std::stringstream s; s << "Estimated statistics published."; console(__func__, s, DEBUG); }
template<class T> cv::Mat filter::trends(const cv::Mat &data, size_t w, const T &fallback) { cv::Mat trend = cv::Mat_<T>(data.size()); size_t m = data.rows; size_t n = data.cols; size_t u = w / 2; for (size_t i = 0; i < m; i++) { T *cell = trend.ptr<T>(i); for (size_t j = 0; j < n; j++, cell++) { size_t x = (j < u ? j : j - u); size_t y = (i < u ? i : i - u); size_t width = (x + w > n ? n - x : w); size_t height = (y + w > m ? m - y : w); cv::Rect roi(x, y, width, height); cv::Mat patch(data, roi); *cell = consensus(patch, fallback); } } return trend; }
template<class T> cv::Mat filter::trends(const cv::Mat &data, size_t w) { cv::Mat c = cv::Mat_<T>(data.size()); size_t m = data.rows; size_t n = data.cols; size_t u = w / 2; for (size_t i = 0; i < m; i++) { for (size_t j = 0; j < n; j++) { size_t x = (j < u ? j : j - u); size_t y = (i < u ? i : i - u); size_t width = (x + w > n ? n - x : w); size_t height = (y + w > m ? m - y : w); cv::Rect roi(x, y, width, height); cv::Mat patch(data, roi); const T &t = data.at<T>(i, j); c.at<T>(i, j) = consensus(patch, t); } } return c; }
PRIVATE void set_fold_compound(vrna_fold_compound_t *vc, vrna_md_t *md_p, unsigned int options, unsigned int aux){ char *sequence, **sequences; unsigned int length, s; int cp; /* cut point for cofold */ char *seq, *seq2; sequence = NULL; sequences = NULL; cp = -1; /* some default init values */ vc->params = NULL; vc->exp_params = NULL; vc->matrices = NULL; vc->exp_matrices = NULL; vc->hc = NULL; vc->auxdata = NULL; vc->free_auxdata = NULL; switch(vc->type){ case VRNA_VC_TYPE_SINGLE: sequence = vc->sequence; seq2 = strdup(sequence); seq = vrna_cut_point_remove(seq2, &cp); /* splice out the '&' if concatenated sequences and reset cp... this should also be safe for single sequences */ vc->cutpoint = cp; if((cp > 0) && (md_p->min_loop_size == TURN)) md_p->min_loop_size = 0; /* is it safe to set this here? */ free(vc->sequence); vc->sequence = seq; vc->length = length = strlen(seq); vc->sequence_encoding = vrna_seq_encode(seq, md_p); vc->sequence_encoding2 = vrna_seq_encode_simple(seq, md_p); if(!(options & VRNA_OPTION_EVAL_ONLY)){ vc->ptype = (aux & WITH_PTYPE) ? vrna_ptypes(vc->sequence_encoding2, md_p) : NULL; /* backward compatibility ptypes */ vc->ptype_pf_compat = (aux & WITH_PTYPE_COMPAT) ? get_ptypes(vc->sequence_encoding2, md_p, 1) : NULL; } else { vc->ptype = NULL; vc->ptype_pf_compat = NULL; } vc->sc = NULL; free(seq2); break; case VRNA_VC_TYPE_ALIGNMENT: sequences = vc->sequences; vc->length = length = vc->length; vc->cons_seq = consensus((const char **)sequences); vc->S_cons = vrna_seq_encode_simple(vc->cons_seq, md_p); vc->pscore = vrna_alloc(sizeof(int)*((length*(length+1))/2+2)); /* backward compatibility ptypes */ vc->pscore_pf_compat = (aux & WITH_PTYPE_COMPAT) ? vrna_alloc(sizeof(int)*((length*(length+1))/2+2)) : NULL; oldAliEn = vc->oldAliEn = md_p->oldAliEn; vc->S = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->S5 = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->S3 = vrna_alloc((vc->n_seq+1) * sizeof(short *)); vc->a2s = vrna_alloc((vc->n_seq+1) * sizeof(unsigned short *)); vc->Ss = vrna_alloc((vc->n_seq+1) * sizeof(char *)); for (s = 0; s < vc->n_seq; s++) { vrna_aln_encode(vc->sequences[s], &(vc->S[s]), &(vc->S5[s]), &(vc->S3[s]), &(vc->Ss[s]), &(vc->a2s[s]), md_p); } vc->S5[vc->n_seq] = NULL; vc->S3[vc->n_seq] = NULL; vc->a2s[vc->n_seq] = NULL; vc->Ss[vc->n_seq] = NULL; vc->S[vc->n_seq] = NULL; vc->scs = NULL; break; default: /* do nothing ? */ break; } vc->iindx = vrna_idx_row_wise(vc->length); vc->jindx = vrna_idx_col_wise(vc->length); /* now come the energy parameters */ add_params(vc, md_p, options); }
int PS_color_aln(const char *structure, const char *filename, const char *seqs[], const char *names[]) { /* produce PS sequence alignment color-annotated by consensus structure */ int N,i,j,k,x,y,tmp,columnWidth; char *tmpBuffer,*ssEscaped,*ruler, *cons; char c; float fontWidth, fontHeight, imageHeight, imageWidth,tmpColumns; int length, maxName, maxNum, currPos; float lineStep,blockStep,consStep,ssStep,rulerStep,nameStep,numberStep; float maxConsBar,startY,namesX,seqsX, currY; float score,barHeight,xx,yy; int match,block; FILE *outfile; short *pair_table; char * colorMatrix[6][3] = { {"0.0 1", "0.0 0.6", "0.0 0.2"}, /* red */ {"0.16 1","0.16 0.6", "0.16 0.2"}, /* ochre */ {"0.32 1","0.32 0.6", "0.32 0.2"}, /* turquoise */ {"0.48 1","0.48 0.6", "0.48 0.2"}, /* green */ {"0.65 1","0.65 0.6", "0.65 0.2"}, /* blue */ {"0.81 1","0.81 0.6", "0.81 0.2"} /* violet */ }; const char *alnPlotHeader = "%%!PS-Adobe-3.0 EPSF-3.0\n" "%%%%BoundingBox: %i %i %i %i\n" "%%%%EndComments\n" "%% draws Vienna RNA like colored boxes\n" "/box { %% x1 y1 x2 y2 hue saturation\n" " gsave\n" " dup 0.3 mul 1 exch sub sethsbcolor\n" " exch 3 index sub exch 2 index sub rectfill\n" " grestore\n" "} def\n" "%% draws a box in current color\n" "/box2 { %% x1 y1 x2 y2\n" " exch 3 index sub exch 2 index sub rectfill\n" "} def\n" "/string { %% (Text) x y\n" " 6 add\n" " moveto\n" " show\n" "} def\n" "0 %i translate\n" "1 -1 scale\n" "/Courier findfont\n" "[10 0 0 -10 0 0] makefont setfont\n"; outfile = fopen(filename, "w"); if (outfile == NULL) { fprintf(stderr, "can't open file %s - not doing alignment plot\n", filename); return 0; } columnWidth=60; /* Display long alignments in blocks of this size */ fontWidth=6; /* Font metrics */ fontHeight=6.5; lineStep=fontHeight+2; /* distance between lines */ blockStep=3.5*fontHeight; /* distance between blocks */ consStep=fontHeight*0.5; /* distance between alignment and conservation curve */ ssStep=2; /* distance between secondary structure line and sequences */ rulerStep=2; /* distance between sequences and ruler */ nameStep=3*fontWidth; /* distance between names and sequences */ numberStep=fontWidth; /* distance between sequeces and numbers */ maxConsBar=2.5*fontHeight; /* Height of conservation curve */ startY=2; /* "y origin" */ namesX=fontWidth; /* "x origin" */ /* Number of columns of the alignment */ length=strlen(seqs[0]); /* Allocate memory for various strings, length*2 is (more than) enough for all of them */ tmpBuffer = (char *) space((unsigned) length*2); ssEscaped=(char *) space((unsigned) length*2); ruler=(char *) space((unsigned) length*2); pair_table=make_pair_table(structure); /* Get length of longest name and count sequences in alignment*/ for (i=maxName=N=0; names[i] != NULL; i++) { N++; tmp=strlen(names[i]); if (tmp>maxName) maxName=tmp; } /* x-coord. where sequences start */ seqsX=namesX+maxName*fontWidth+nameStep; /* calculate number of digits of the alignment length */ snprintf(tmpBuffer,length, "%i",length); maxNum=strlen(tmpBuffer); /* Calculate bounding box */ tmpColumns=columnWidth; if (length<columnWidth){ tmpColumns=length; } imageWidth=ceil(namesX+(maxName+tmpColumns+maxNum)*fontWidth+2*nameStep+fontWidth+numberStep); imageHeight=startY+ceil((float)length/columnWidth)*((N+2)*lineStep+blockStep+consStep+ssStep+rulerStep); /* Write postscript header including correct bounding box */ fprintf(outfile,alnPlotHeader,0,0,(int)imageWidth,(int)imageHeight,(int)imageHeight); /* Create ruler and secondary structure lines */ i=0; /* Init all with dots */ for (i=0;i<(length);i++){ ruler[i]='.'; } i=0; for (i=0;i<length;i++){ /* Write number every 10th position, leave out block breaks */ if ((i+1)%10==0 && (i+1)%columnWidth!=0){ snprintf(tmpBuffer,length,"%i",i+1); strncpy(ruler+i,tmpBuffer,strlen(tmpBuffer)); } } ruler[length]='\0'; /* Draw color annotation first */ /* Repeat for all pairs */ for (i=1; i<=length; i++) { if ((j=pair_table[i])>i) { /* Repeat for open and closing position */ for (k=0;k<2;k++){ int pairings, nonpair, s, col; int ptype[8] = {0,0,0,0,0,0,0,0}; char *color; col = (k==0)?i-1:j-1; block=ceil((float)(col+1)/columnWidth); xx=seqsX+(col-(block-1)*columnWidth)*fontWidth; /* Repeat for each sequence */ for (s=pairings=nonpair=0; s<N; s++) { ptype[BP_pair[ENCODE(seqs[s][i-1])][ENCODE(seqs[s][j-1])]]++; } for (pairings=0,s=1; s<=7; s++) { if (ptype[s]) pairings++; } nonpair=ptype[0]; if (nonpair <=2) { color = colorMatrix[pairings-1][nonpair]; for (s=0; s<N; s++) { yy=startY+(block-1)*(lineStep*(N+2)+blockStep+consStep+rulerStep)+ssStep*(block)+(s+1)*lineStep; /* Color according due color information in pi-array, only if base pair is possible */ if (BP_pair[ENCODE(seqs[s][i-1])][ENCODE(seqs[s][j-1])]) { fprintf(outfile, "%.1f %.1f %.1f %.1f %s box\n", xx,yy-1,xx+fontWidth,yy+fontHeight+1,color); } } } } } } free(pair_table); /* Process rest of the output in blocks of columnWidth */ currY=startY; currPos=0; cons = consensus(seqs); while (currPos<length) { /* Display secondary structure line */ fprintf(outfile,"0 setgray\n"); strncpy(tmpBuffer,structure+currPos,columnWidth); tmpBuffer[columnWidth]='\0'; x=0;y=0; while ((c=tmpBuffer[x])){ if (c=='.'){ ssEscaped[y++]='.'; } else { ssEscaped[y++]='\\'; ssEscaped[y++]=c; } x++; } ssEscaped[y]='\0'; fprintf(outfile, "(%s) %.1f %.1f string\n", ssEscaped,seqsX,currY); currY+=ssStep+lineStep; /* Display names, sequences and numbers */ for (i=0; i<N; i++) { strncpy(tmpBuffer,seqs[i]+currPos,columnWidth); tmpBuffer[columnWidth]='\0'; match=0; for (j=0;j<(currPos+strlen(tmpBuffer));j++){ if (seqs[i][j] != '-') match++; } fprintf(outfile, "(%s) %.1f %.1f string\n", names[i],namesX,currY); fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer,seqsX,currY); fprintf(outfile, "(%i) %.1f %.1f string\n", match,seqsX+fontWidth*(strlen(tmpBuffer))+numberStep,currY); currY+=lineStep; } currY+=rulerStep; strncpy(tmpBuffer,ruler+currPos,columnWidth); tmpBuffer[columnWidth]='\0'; fprintf(outfile, "(%s) %.1f %.1f string\n", tmpBuffer,seqsX,currY); currY+=lineStep; currY+=consStep; /*Display conservation bar*/ fprintf(outfile,"0.6 setgray\n"); for (i=currPos;(i<currPos+columnWidth && i<length);i++){ match=0; for (j=0;j<N;j++){ if (cons[i] == seqs[j][i]) match++; if (cons[i]=='U' && seqs[j][i]=='T') match++; if (cons[i]=='T' && seqs[j][i]=='U') match++; } score=(float)(match-1)/(N-1); if (cons[i] == '-' || cons[i] == '_' || cons[i] == '.'){ score=0; } barHeight=maxConsBar*score; if (barHeight==0){ barHeight=1; } xx=seqsX+(i-(columnWidth*currPos/columnWidth))*fontWidth; fprintf(outfile,"%.1f %.1f %.1f %.1f box2\n", xx, currY+maxConsBar-barHeight, xx+fontWidth, currY+maxConsBar); } currY+=blockStep; currPos+=columnWidth; } free(cons); fprintf(outfile,"showpage\n"); fclose(outfile); free(tmpBuffer); free(ssEscaped);free(ruler); return 0; }
int main(int argc, char *argv[]) { char *modelDir=NULL; /* Directory with model files */ struct svm_model* decision_model; /* SVM classification model */ /* Command line options */ int reverse=0; /* Scan reverse complement */ int showVersion=0; /* Shows version and exits */ int showHelp=0; /* Show short help and exits */ int from=-1; /* Scan slice from-to */ int to=-1; FILE *clust_file=stdin; /* Input file */ FILE *out=stdout; /* Output file */ struct aln *AS[MAX_NUM_NAMES]; struct aln *window[MAX_NUM_NAMES]; char *tmpAln[MAX_NUM_NAMES]; int n_seq; /* number of input sequences */ int length; /* length of alignment/window */ int z_score_type; int decision_model_type; char *structure=NULL; char *singleStruc,*gapStruc, *output,*woGapsSeq; char strand[8]; char warningString[2000]; char warningString_regression[2000]; char *string=NULL; double singleMFE,sumMFE,singleZ,sumZ,z,sci,id,decValue,prob,comb,entropy,GC; double min_en, real_en; int i,j,k,l,ll,r,countAln,nonGaps,singleGC; int (*readFunction)(FILE *clust,struct aln *alignedSeqs[]); char** lines=NULL; int directions[3]={FORWARD,0,0}; int currDirection; struct gengetopt_args_info args; double meanMFE_fwd=0; double consensusMFE_fwd=0; double sci_fwd=0; double z_fwd=0; int strandGuess; int avoid_shuffle=0; double strandProb,strandDec; if (cmdline_parser (argc, argv, &args) != 0){ usage(); exit(EXIT_FAILURE); } if (args.help_given){ help(); exit(EXIT_SUCCESS); } if (args.version_given){ version(); exit(EXIT_SUCCESS); } if (args.outfile_given){ out = fopen(args.outfile_arg, "w"); if (out == NULL){ fprintf(stderr, "ERROR: Can't open output file %s\n", args.outfile_arg); exit(1); } } /* Strand prediction implies both strands scored */ if (args.predict_strand_flag){ args.both_strands_flag=1; } if (args.forward_flag && !args.reverse_flag){ directions[0]=FORWARD; directions[1]=directions[2]=0; } if (!args.forward_flag && args.reverse_flag){ directions[0]=REVERSE; directions[1]=directions[2]=0; } if ((args.forward_flag && args.reverse_flag) || args.both_strands_flag){ directions[0]=FORWARD; directions[1]=REVERSE; } if (args.window_given){ if (sscanf(args.window_arg,"%d-%d",&from,&to)!=2){ nrerror("ERROR: Invalid --window/-w command. " "Use it like '--window 100-200'\n"); } printf("from:%d,to:%d\n",from,to); } if (args.inputs_num>=1){ clust_file = fopen(args.inputs[0], "r"); if (clust_file == NULL){ fprintf(stderr, "ERROR: Can't open input file %s\n", args.inputs[0]); exit(1); } } /* Global RNA package variables */ do_backtrack = 1; dangles=2; switch(checkFormat(clust_file)){ case CLUSTAL: readFunction=&read_clustal; break; case MAF: readFunction=&read_maf; break; case 0: nrerror("ERROR: Unknown alignment file format. Use Clustal W or MAF format.\n"); } /* Set z-score type (mono/dinucleotide) here */ z_score_type = 2; if (args.mononucleotide_given) z_score_type = 0; /* now let's decide which decision model to take */ /* decision_model_type = 1 for normal model used in RNAz 1.0 */ /* decision_model_type = 2 for normal model using dinucelotide background */ /* decision_model_type = 3 for structural model using dinucelotide background */ decision_model_type = 2; if (args.mononucleotide_given) decision_model_type = 1; if (args.locarnate_given) decision_model_type = 3; if ((args.mononucleotide_given) && args.locarnate_given){ z_score_type=2; nrerror("ERROR: Structural decision model only trained with dinucleotide background model.\n"); } if (args.no_shuffle_given) avoid_shuffle = 1; decision_model=get_decision_model(NULL, decision_model_type); /* Initialize Regression Models for mononucleotide */ /* Not needed if we score with dinucleotides */ if (z_score_type == 0) regression_svm_init(); countAln=0; while ((n_seq=readFunction(clust_file, AS))!=0){ if (n_seq ==1){ nrerror("ERROR: You need at least two sequences in the alignment.\n"); } countAln++; length = (int) strlen(AS[0]->seq); /* if a slice is specified by the user */ if ((from!=-1 || to!=-1) && (countAln==1)){ if ((from>=to)||(from<=0)||(to>length)){ nrerror("ERROR: Invalid window range given.\n"); } sliceAln((const struct aln**)AS, (struct aln **)window, from, to); length=to-from+1; } else { /* take complete alignment */ /* window=AS does not work..., deep copy seems not necessary here*/ from=1; to=length; sliceAln((const struct aln **)AS, (struct aln **)window, 1, length); } /* Convert all Us to Ts for RNAalifold. There is a slight difference in the results. During training we used alignments with Ts, so we use Ts here as well. */ for (i=0;i<n_seq;i++){ j=0; while (window[i]->seq[j]){ window[i]->seq[j]=toupper(window[i]->seq[j]); if (window[i]->seq[j]=='U') window[i]->seq[j]='T'; ++j; } } k=0; while ((currDirection=directions[k++])!=0){ if (currDirection==REVERSE){ revAln((struct aln **)window); strcpy(strand,"reverse"); } else { strcpy(strand,"forward"); } structure = (char *) space((unsigned) length+1); for (i=0;window[i]!=NULL;i++){ tmpAln[i]=window[i]->seq; } tmpAln[i]=NULL; min_en = alifold(tmpAln, structure); free_alifold_arrays(); comb=combPerPair(window,structure); sumZ=0.0; sumMFE=0.0; GC=0.0; output=(char *)space(sizeof(char)*(length+160)*(n_seq+1)*3); strcpy(warningString,""); strcpy(warningString_regression,""); for (i=0;i<n_seq;i++){ singleStruc = space(strlen(window[i]->seq)+1); woGapsSeq = space(strlen(window[i]->seq)+1); j=0; nonGaps=0; singleGC=0; while (window[i]->seq[j]){ /* Convert all Ts to Us for RNAfold. There is a difference between the results. With U in the function call, we get the results as RNAfold gives on the command line. Since this variant was also used during training, we use it here as well. */ if (window[i]->seq[j]=='T') window[i]->seq[j]='U'; if (window[i]->seq[j]=='C') singleGC++; if (window[i]->seq[j]=='G') singleGC++; if (window[i]->seq[j]!='-'){ nonGaps++; woGapsSeq[strlen(woGapsSeq)]=window[i]->seq[j]; woGapsSeq[strlen(woGapsSeq)]='\0'; } ++j; } /* z-score is calculated here! */ singleMFE = fold(woGapsSeq, singleStruc); free_arrays(); /* z-score type may be overwritten. If it is out of training bounds, we switch to shuffling if allowed (avoid_shuffle). */ int z_score_type_orig = z_score_type; singleZ=mfe_zscore(woGapsSeq,singleMFE, &z_score_type, avoid_shuffle, warningString_regression); GC+=(double) singleGC/nonGaps; sumZ+=singleZ; sumMFE+=singleMFE; if (window[1]->strand!='?' && !args.window_given){ sprintf(output+strlen(output), ">%s %d %d %c %d\n", window[i]->name,window[i]->start, window[i]->length,window[i]->strand, window[i]->fullLength); } else { sprintf(output+strlen(output),">%s\n",window[i]->name); } gapStruc= (char *) space(sizeof(char)*(strlen(window[i]->seq)+1)); l=ll=0; while (window[i]->seq[l]!='\0'){ if (window[i]->seq[l]!='-'){ gapStruc[l]=singleStruc[ll]; l++; ll++; } else { gapStruc[l]='-'; l++; } } char ch; ch = 'R'; if (z_score_type == 1 || z_score_type == 3) ch = 'S'; sprintf(output+strlen(output),"%s\n%s ( %6.2f, z-score = %6.2f, %c)\n", window[i]->seq,gapStruc,singleMFE,singleZ,ch); z_score_type = z_score_type_orig; free(woGapsSeq); free(singleStruc); } { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; window[i]!=NULL; i++) s += energy_of_struct(window[i]->seq, structure); real_en = s/i; } string = consensus((const struct aln**) window); sprintf(output+strlen(output), ">consensus\n%s\n%s (%6.2f = %6.2f + %6.2f) \n", string, structure, min_en, real_en, min_en-real_en ); free(string); id=meanPairID((const struct aln**)window); entropy=NormShannonEntropy((const struct aln**)window); z=sumZ/n_seq; GC=(double)GC/n_seq; if (sumMFE==0){ /*Set SCI to 0 in the weird case of no structure in single sequences*/ sci=0; } else { sci=min_en/(sumMFE/n_seq); } decValue=999; prob=0; classify(&prob,&decValue,decision_model,id,n_seq,z,sci,entropy,decision_model_type); if (args.cutoff_given){ if (prob<args.cutoff_arg){ continue; } } warning(warningString,id,n_seq,z,sci,entropy,(struct aln **)window,decision_model_type); fprintf(out,"\n############################ RNAz "PACKAGE_VERSION" ##############################\n\n"); fprintf(out," Sequences: %u\n", n_seq); if (args.window_given){ fprintf(out," Slice: %u to %u\n",from,to); } fprintf(out," Columns: %u\n",length); fprintf(out," Reading direction: %s\n",strand); fprintf(out," Mean pairwise identity: %6.2f\n", id); fprintf(out," Shannon entropy: %2.5f\n", entropy); fprintf(out," G+C content: %2.5f\n", GC); fprintf(out," Mean single sequence MFE: %6.2f\n", sumMFE/n_seq); fprintf(out," Consensus MFE: %6.2f\n",min_en); fprintf(out," Energy contribution: %6.2f\n",real_en); fprintf(out," Covariance contribution: %6.2f\n",min_en-real_en); fprintf(out," Combinations/Pair: %6.2f\n",comb); fprintf(out," Mean z-score: %6.2f\n",z); fprintf(out," Structure conservation index: %6.2f\n",sci); if (decision_model_type == 1) { fprintf(out," Background model: mononucleotide\n"); fprintf(out," Decision model: sequence based alignment quality\n"); } if (decision_model_type == 2) { fprintf(out," Background model: dinucleotide\n"); fprintf(out," Decision model: sequence based alignment quality\n"); } if (decision_model_type == 3) { fprintf(out," Background model: dinucleotide\n"); fprintf(out," Decision model: structural RNA alignment quality\n"); } fprintf(out," SVM decision value: %6.2f\n",decValue); fprintf(out," SVM RNA-class probability: %6f\n",prob); if (prob>0.5){ fprintf(out," Prediction: RNA\n"); } else { fprintf(out," Prediction: OTHER\n"); } fprintf(out,"%s",warningString_regression); fprintf(out,"%s",warningString); fprintf(out,"\n######################################################################\n\n"); fprintf(out,"%s",output); fflush(out); free(structure); free(output); if (currDirection==FORWARD && args.predict_strand_flag){ meanMFE_fwd=sumMFE/n_seq; consensusMFE_fwd=min_en; sci_fwd=sci; z_fwd=z; } if (currDirection==REVERSE && args.predict_strand_flag){ if (predict_strand(sci_fwd-sci, meanMFE_fwd-(sumMFE/n_seq), consensusMFE_fwd-min_en, z_fwd-z, n_seq, id, &strandGuess, &strandProb, &strandDec, NULL)){ if (strandGuess==1){ fprintf(out, "\n# Strand winner: forward (%.2f)\n",strandProb); } else { fprintf(out, "\n# Strand winner: reverse (%.2f)\n",1-strandProb); } } else { fprintf(out, "\n# WARNING: No strand prediction (values out of range)\n"); } } } freeAln((struct aln **)AS); freeAln((struct aln **)window); } if (args.inputs_num>=1){ fclose(clust_file); } cmdline_parser_free (&args); if (countAln==0){ nrerror("ERROR: Empty alignment file\n"); } svm_destroy_model(decision_model); regression_svm_free(); return 0; }
int main(int argc, char *argv[]){ struct RNALalifold_args_info args_info; char *string, *structure, *ParamFile, *ns_bases, *c; char ffname[80], gfname[80], fname[80]; int n_seq, i, length, sym, r, maxdist; int mis, pf, istty; float cutoff; double min_en, real_en, sfact; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ FILE *clust_file = stdin; string = structure = ParamFile = ns_bases = NULL; mis = pf = 0; maxdist = 70; do_backtrack = 1; dangles = 2; sfact = 1.07; cutoff = 0.0005; /* ############################################# # check the command line parameters ############################################# */ if(RNALalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.noTetra_given) tetra_loop=0; /* set dangle model */ if(args_info.dangles_given) dangles = args_info.dangles_arg; /* do not allow weak pairs */ if(args_info.noLP_given) noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) no_closingGU = 1; /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* set cfactor */ if(args_info.cfactor_given) cv_fact = args_info.cfactor_arg; /* set nfactor */ if(args_info.nfactor_given) nc_fact = args_info.nfactor_arg; /* set the maximum base pair span */ if(args_info.span_given) maxdist = args_info.span_arg; /* set the pair probability cutoff */ if(args_info.cutoff_given) cutoff = args_info.cutoff_arg; /* calculate most informative sequence */ if(args_info.mis_given) mis = 1; /* check unnamed options a.k.a. filename of input alignment */ if(args_info.inputs_num == 1){ clust_file = fopen(args_info.inputs[0], "r"); if(clust_file == NULL){ fprintf(stderr, "can't open %s\n", args_info.inputs[0]); } } else{ RNALalifold_cmdline_parser_print_help(); exit(1); } /* free allocated memory of command line data structure */ RNALalifold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); if (istty && (clust_file == stdin)) { print_tty_input_seq_str("Input aligned sequences in clustalw format"); } n_seq = read_clustal(clust_file, AS, names); if (clust_file != stdin) fclose(clust_file); if (n_seq==0) nrerror("no sequences found"); length = (int) strlen(AS[0]); if (length<maxdist) { fprintf(stderr, "Alignment length < window size: setting L=%d\n",length); maxdist=length; } structure = (char *) space((unsigned) length+1); /* ############################################# # begin calculations ############################################# */ update_fold_params(); if(!pf) min_en = aliLfold(AS, structure, maxdist); { eos_debug=-1; /* shut off warnings about nonstandard pairs */ /* for (i=0; AS[i]!=NULL; i++) s += energy_of_struct(AS[i], structure); real_en = s/i;*/ } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); printf("%s\n%s\n", string, structure); /* if (istty) printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n", min_en, real_en, min_en - real_en); else printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); */ strcpy(ffname, "alirna.ps"); strcpy(gfname, "alirna.g"); /* if (length<=2500) { char *A; A = annote(structure, (const char**) AS); (void) PS_rna_plot_a(string, structure, ffname, NULL, A); free(A); } else fprintf(stderr,"INFO: structure too long, not doing xy_plot\n"); */ /* {*/ /* free mfe arrays but preserve base_pair for PS_dot_plot */ /* struct bond *bp; bp = base_pair; base_pair = space(16); free_alifold_arrays(); / * frees base_pair * / base_pair = bp; }*/ if (pf) { double energy, kT; plist *pl; char * mfe_struc; mfe_struc = strdup(structure); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = -1;/*exp(-(sfact*min_en)/kT/length);*/ if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); fflush(stdout); /* init_alipf_fold(length); */ /* energy = alipfW_fold(AS, structure, &pl, maxdist, cutoff); */ if (do_backtrack) { printf("%s", structure); /*if (!istty) printf(" [%6.2f]\n", energy); else */ printf("\n"); } /*if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); useless!!*/ /* printf(" frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT));*/ if (do_backtrack) { FILE *aliout; cpair *cp; strcpy(ffname, "alifold.out"); aliout = fopen(ffname, "w"); if (!aliout) { fprintf(stderr, "can't open %s skipping output\n", ffname); } else { fprintf(aliout, "%d sequence; length of alignment %d\n", n_seq, length); fprintf(aliout, "alifold output\n"); fprintf(aliout, "%s\n", structure); } strcpy(ffname, "alidotL.ps"); cp = make_color_pinfo2(AS,pl,n_seq); (void) PS_color_dot_plot_turn(string, cp, ffname, maxdist); free(cp); } free(mfe_struc); free(pl); } free(base_pair); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } return 0; }
/*--------------------------------------------------------------------------*/ int main(int argc, char *argv[]){ struct RNAalifold_args_info args_info; unsigned int input_type; char ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH]; char *input_string, *string, *structure, *cstruc, *ParamFile, *ns_bases, *c; int n_seq, i, length, sym, r, noPS, with_sci; int endgaps, mis, circular, doAlnPS, doColor, doMEA, n_back, eval_energy, pf, istty; double min_en, real_en, sfact, MEAgamma, bppmThreshold, betaScale; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ FILE *clust_file = stdin; pf_paramT *pf_parameters; model_detailsT md; fname[0] = ffname[0] = gfname[0] = '\0'; string = structure = cstruc = ParamFile = ns_bases = NULL; pf_parameters = NULL; endgaps = mis = pf = circular = doAlnPS = doColor = n_back = eval_energy = oldAliEn = doMEA = ribo = noPS = 0; do_backtrack = 1; dangles = 2; gquad = 0; sfact = 1.07; bppmThreshold = 1e-6; MEAgamma = 1.0; betaScale = 1.; with_sci = 0; set_model_details(&md); /* ############################################# # check the command line prameters ############################################# */ if(RNAalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1); /* temperature */ if(args_info.temp_given) temperature = args_info.temp_arg; /* structure constraint */ if(args_info.constraint_given) fold_constrained=1; /* do not take special tetra loop energies into account */ if(args_info.noTetra_given) md.special_hp = tetra_loop=0; /* set dangle model */ if(args_info.dangles_given){ if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2)) warn_user("required dangle model not implemented, falling back to default dangles=2"); else md.dangles = dangles=args_info.dangles_arg; } /* do not allow weak pairs */ if(args_info.noLP_given) md.noLP = noLonelyPairs = 1; /* do not allow wobble pairs (GU) */ if(args_info.noGU_given) md.noGU = noGU = 1; /* do not allow weak closing pairs (AU,GU) */ if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1; /* gquadruplex support */ if(args_info.gquad_given) md.gquad = gquad = 1; /* sci computation */ if(args_info.sci_given) with_sci = 1; /* do not convert DNA nucleotide "T" to appropriate RNA "U" */ /* set energy model */ if(args_info.energyModel_given) energy_set = args_info.energyModel_arg; /* take another energy parameter set */ if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg); /* Allow other pairs in addition to the usual AU,GC,and GU pairs */ if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg); /* set pf scaling factor */ if(args_info.pfScale_given) sfact = args_info.pfScale_arg; /* assume RNA sequence to be circular */ if(args_info.circ_given) circular=1; /* do not produce postscript output */ if(args_info.noPS_given) noPS = 1; /* partition function settings */ if(args_info.partfunc_given){ pf = 1; if(args_info.partfunc_arg != -1) do_backtrack = args_info.partfunc_arg; } /* MEA (maximum expected accuracy) settings */ if(args_info.MEA_given){ pf = doMEA = 1; if(args_info.MEA_arg != -1) MEAgamma = args_info.MEA_arg; } if(args_info.betaScale_given) betaScale = args_info.betaScale_arg; /* set the bppm threshold for the dotplot */ if(args_info.bppmThreshold_given) bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg)); /* set cfactor */ if(args_info.cfactor_given) cv_fact = args_info.cfactor_arg; /* set nfactor */ if(args_info.nfactor_given) nc_fact = args_info.nfactor_arg; if(args_info.endgaps_given) endgaps = 1; if(args_info.mis_given) mis = 1; if(args_info.color_given) doColor=1; if(args_info.aln_given) doAlnPS=1; if(args_info.old_given) oldAliEn = 1; if(args_info.stochBT_given){ n_back = args_info.stochBT_arg; do_backtrack = 0; pf = 1; init_rand(); } if(args_info.stochBT_en_given){ n_back = args_info.stochBT_en_arg; do_backtrack = 0; pf = 1; eval_energy = 1; init_rand(); } if(args_info.ribosum_file_given){ RibosumFile = strdup(args_info.ribosum_file_arg); ribo = 1; } if(args_info.ribosum_scoring_given){ RibosumFile = NULL; ribo = 1; } if(args_info.layout_type_given) rna_plot_type = args_info.layout_type_arg; /* alignment file name given as unnamed option? */ if(args_info.inputs_num == 1){ clust_file = fopen(args_info.inputs[0], "r"); if (clust_file == NULL) { fprintf(stderr, "can't open %s\n", args_info.inputs[0]); } } /* free allocated memory of command line data structure */ RNAalifold_cmdline_parser_free (&args_info); /* ############################################# # begin initializing ############################################# */ if(circular && gquad){ nrerror("G-Quadruplex support is currently not available for circular RNA structures"); } make_pair_matrix(); if (circular && noLonelyPairs) warn_user("depending on the origin of the circular sequence, " "some structures may be missed when using --noLP\n" "Try rotating your sequence a few times\n"); if (ParamFile != NULL) read_parameter_file(ParamFile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } istty = isatty(fileno(stdout))&&isatty(fileno(stdin)); /* ######################################################## # handle user input from 'stdin' if necessary ######################################################## */ if(fold_constrained){ if(istty){ print_tty_constraint_full(); print_tty_input_seq_str(""); } input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS); if(input_type & VRNA_INPUT_QUIT){ return 0;} else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){ cstruc = strdup(input_string); free(input_string); } else warn_user("constraints missing"); } if (istty && (clust_file == stdin)) print_tty_input_seq_str("Input aligned sequences in clustalw or stockholm format\n(enter a line starting with \"//\" to indicate the end of your input)"); n_seq = read_clustal(clust_file, AS, names); if (n_seq==0) nrerror("no sequences found"); if (clust_file != stdin) fclose(clust_file); /* ######################################################## # done with 'stdin' handling, now init everything properly ######################################################## */ length = (int) strlen(AS[0]); structure = (char *)space((unsigned) length+1); if(fold_constrained && cstruc != NULL) strncpy(structure, cstruc, length); if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); /* ######################################################## # begin actual calculations ######################################################## */ if (circular) { int i; double s = 0; min_en = circalifold((const char **)AS, structure); for (i=0; AS[i]!=NULL; i++) s += energy_of_circ_structure(AS[i], structure, -1); real_en = s/i; } else { float *ens = (float *)space(2*sizeof(float)); min_en = alifold((const char **)AS, structure); if(md.gquad) energy_of_ali_gquad_structure((const char **)AS, structure, n_seq, ens); else energy_of_alistruct((const char **)AS, structure, n_seq, ens); real_en = ens[0]; free(ens); } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); printf("%s\n%s", string, structure); if(istty){ if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf( "\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)" "\n SCI = %2.4f\n", min_en, real_en, min_en-real_en, sci); } else printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n", min_en, real_en, min_en - real_en); } else { if(with_sci){ float sci = min_en; float e_mean = 0; for (i=0; AS[i]!=NULL; i++){ char *seq = get_ungapped_sequence(AS[i]); char *str = (char *)space(sizeof(char) * (strlen(seq) + 1)); e_mean += fold(seq, str); free(seq); free(str); } e_mean /= i; sci /= e_mean; printf(" (%6.2f = %6.2f + %6.2f) [%2.4f]\n", min_en, real_en, min_en-real_en, sci); } else printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); } strcpy(ffname, "alirna.ps"); strcpy(gfname, "alirna.g"); if (!noPS) { char **A; A = annote(structure, (const char**) AS); if(md.gquad){ if (doColor) (void) PS_rna_plot_a_gquad(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a_gquad(string, structure, ffname, NULL, A[1]); } else { if (doColor) (void) PS_rna_plot_a(string, structure, ffname, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, ffname, NULL, A[1]); } free(A[0]); free(A[1]); free(A); } if (doAlnPS) PS_color_aln(structure, "aln.ps", (const char const **) AS, (const char const **) names); /* free mfe arrays */ free_alifold_arrays(); if (pf) { float energy, kT; char * mfe_struc; mfe_struc = strdup(structure); kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale); fflush(stdout); if (cstruc!=NULL) strncpy(structure, cstruc, length+1); pf_parameters = get_boltzmann_factors_ali(n_seq, temperature, betaScale, md, pf_scale); energy = alipf_fold_par((const char **)AS, structure, NULL, pf_parameters, do_backtrack, fold_constrained, circular); if (n_back>0) { /*stochastic sampling*/ for (i=0; i<n_back; i++) { char *s; double prob=1.; s = alipbacktrack(&prob); printf("%s ", s); if (eval_energy ) printf("%6g %.2f ",prob, -1*(kT*log(prob)-energy)); printf("\n"); free(s); } } if (do_backtrack) { printf("%s", structure); if (!istty) printf(" [%6.2f]\n", energy); else printf("\n"); } if ((istty)||(!do_backtrack)) printf(" free energy of ensemble = %6.2f kcal/mol\n", energy); printf(" frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; char *cent; double dist; FLT_OR_DBL *probs = export_ali_bppm(); plist *pl, *mfel; assign_plist_from_pr(&pl, probs, length, bppmThreshold); assign_plist_from_db(&mfel, mfe_struc, 0.95*0.95); if (!circular){ float *ens; cent = get_centroid_struct_pr(length, &dist, probs); ens=(float *)space(2*sizeof(float)); energy_of_alistruct((const char **)AS, cent, n_seq, ens); /*cent_en = energy_of_struct(string, cent);*/ /*ali*/ printf("%s %6.2f {%6.2f + %6.2f}\n",cent,ens[0]-ens[1],ens[0],(-1)*ens[1]); free(cent); free(ens); } if(doMEA){ float mea, *ens; plist *pl2; assign_plist_from_pr(&pl2, probs, length, 1e-4/(1+MEAgamma)); mea = MEA(pl2, structure, MEAgamma); ens = (float *)space(2*sizeof(float)); if(circular) energy_of_alistruct((const char **)AS, structure, n_seq, ens); else ens[0] = energy_of_structure(string, structure, 0); printf("%s {%6.2f MEA=%.2f}\n", structure, ens[0], mea); free(ens); free(pl2); } if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_ali.out"); } else strcpy(ffname, "alifold.out"); aliout = fopen(ffname, "w"); if (!aliout) { fprintf(stderr, "can't open %s skipping output\n", ffname); } else { print_aliout(AS, pl, bppmThreshold, n_seq, mfe_struc, aliout); } fclose(aliout); if (fname[0]!='\0') { strcpy(ffname, fname); strcat(ffname, "_dp.ps"); } else strcpy(ffname, "alidot.ps"); cp = make_color_pinfo(AS,pl, bppmThreshold, n_seq, mfel); (void) PS_color_dot_plot(string, cp, ffname); free(cp); free(pl); free(mfel); } free(mfe_struc); free_alipf_arrays(); free(pf_parameters); } if (cstruc!=NULL) free(cstruc); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } return 0; }
int main (int argc, char** argv) { Sint optindex, c; unsigned char depictsw=0; unsigned char wurst=1; unsigned char gnuplot=0; Uint i, j, noofseqs=0, nooffreqs=0, noofqueries=0; Uint noofhits=100; Uint substrlen = 10; Uint minseeds = 5; Uint maxmatches = 10000; char *seq, *vec, *bin; imbissinfo *imbiss; void *space = NULL; double *scores = NULL; int swscores[2]={3,-2}; char *pveclistfile=NULL; char *alphabetfile=NULL; char *inputfile=NULL; char *batchfile = NULL; char *subfile=NULL; char *reportfile = NULL; int (*handler) (void *, Matchtype *, IntSequence **, Uint, Uint, void *) = allscores; double (*filter) (void *, Matchtype *, IntSequence *, IntSequence *, Uint *, Uint, Uint, void *) = swconstfilter; Matchtype* (*select) (void *, Matchtype *, Uint k, IntSequence *, IntSequence **, void *) = selectSW; stringset_t **fn, **freq, *queryurl, **queries=NULL; Suffixarray *arr = NULL; IntSequence **sequences = NULL; IntSequence *input = NULL; FAlphabet *alphabet = NULL; PairSint *matches=NULL; Uint percent=0; time_t startsuf, endsuf; double difsuf, difmatch, difrank; #ifdef MEMMAN_H Spacetable spacetab; initmemoryblocks(&spacetab, 100000); space = &spacetab; #endif while(1) { c=getopt_long(argc, argv, "SAghFGBLM:D:r:m:x:n:p:b:s:a:q:l:c:dvw", long_options, &optindex); if (c==-1) break; switch(c) { case 'r': reportfile=optarg; break; case 'v': verbose_flag=1; break; case 'd': depictsw = 1; break; case 's': pveclistfile = optarg; break; case 'a': alphabetfile = optarg; break; case 'q': inputfile = optarg; noofqueries = 1; break; case 'l': substrlen = atoi(optarg); break; case 'c': minseeds = atoi(optarg); break; case 'b': batchfile = optarg; break; case 'p': percent = atoi(optarg); break; case 'x': subfile = optarg; break; case 'n': noofhits = atoi(optarg); break; case 'w': wurst = 0; break; case 'B': filter = scorefilter; select = selectBlastScore; break; case 'S': filter = scorefilter; select = selectScore; break; case 'A': filter = swconstfilter; select = selectSW; break; case 'F': filter = scorefilter; select = selectScoreSWconst; break; case 'G': filter = scorefilter; select = selectBlastScoreSWconst; break; case 'M': swscores[0]=atoi(optarg); break; case 'L': handler = latexscores; break; case 'D': swscores[1]=atoi(optarg); break; case 'g': gnuplot = 1; break; case 'm': maxmatches=atoi(optarg); break; case 'h': default: usage(argv[0]); exit (EXIT_FAILURE); } } if (pveclistfile==NULL || (inputfile == NULL && batchfile==NULL) || alphabetfile == NULL) { usage(argv[0]); exit (EXIT_FAILURE); } imbiss = ALLOCMEMORY(space, NULL, imbissinfo, 1); imbiss->reportfile = reportfile; imbiss->swscores = swscores; imbiss->noofhits = noofhits; imbiss->minseeds = minseeds; imbiss->wurst = wurst; /*read batch file*/ if (batchfile) { queries = readcsv(space, batchfile, "", &noofqueries); } /*read substitution matrix*/ if (subfile) { freq=readcsv(space, subfile,",", &nooffreqs); scores = ALLOCMEMORY(space, NULL, double, ((nooffreqs-1)*(nooffreqs-1)) ); for(i=1; i < nooffreqs; i++) { for(j=1; j < nooffreqs; j++) { if(strcmp(SETSTR(freq[i],j),"inf")==0){ MATRIX2D(scores, nooffreqs-1, i, j)=0; }else{ MATRIX2D(scores, nooffreqs-1, i, j)=atof(SETSTR(freq[i],j)); } } } } /*read alphabet*/ if (alphabetfile != NULL) { alphabet = loadCSValphabet(space, alphabetfile); sortMapdomain(space, alphabet); } /*load sequence database*/ fn=readcsv(space, pveclistfile, "", &noofseqs); sequences = ALLOCMEMORY(space, NULL, IntSequence *, noofseqs); for(i=0; i < noofseqs; i++) { sequences[i] = loadSequence(space, SETSTR(fn[i],0)); } for (i=0; i < noofseqs; i++) { destructStringset(space, fn[i]); } FREEMEMORY(space, fn); /*construct the suffix array*/ time (&startsuf); arr = constructSufArr(space, sequences, noofseqs, NULL); constructLcp(space, arr); time (&endsuf); difsuf = difftime (endsuf, startsuf); /*do search*/ for (i=0; i < noofqueries; i++) { /*get query form batchfile*/ if (queries) { inputfile = SETSTR(queries[i],0); } /*typically only used with batchfile*/ if (percent != 0) { substrlen = ((double)((double)input->length/100)*(double) percent); } input = loadSequence(space, inputfile); //seq = printSequence (space, input, 60); printf(">IMBISS order delivered\n"); //printf("%s\n",seq); printf("%s\n", input->url); //FREEMEMORY(space, seq); time (&startsuf); matches=sufSubstring(space, arr, input->sequence, input->length, substrlen); time (&endsuf); difmatch = difftime (endsuf, startsuf); /*get prob vector url for salami/wurst*/ //printf("%.*s\n", 5, input->url + 58); vec = malloc(sizeof(char)*66); sprintf(vec, "/smallfiles/public/no_backup/bm/pdb_all_vec_6mer_struct/%5s.vec\0", input->url+56); bin = malloc(sizeof(char)*54); sprintf(bin, "/smallfiles/public/no_backup/bm/pdb_all_bin/%5s.bin\0", input->url+56); queryurl = initStringset(space); addString(space, queryurl, bin, strlen(bin)); addString(space, queryurl, vec, strlen(vec)); getimbissblast(space, input, sequences, noofseqs, alphabet, imbiss); imbiss->query = queryurl; imbiss->substrlen = substrlen; imbiss->alphabet = alphabet; /*if a substition file was given ...*/ if (subfile) { imbiss->sub = createsubmatrix(scores, imbiss->score, nooffreqs-1); } /*match 'n' report*/ time (&startsuf); imbiss->consensus = ALLOCMEMORY(space, NULL, Uint, (input->length-substrlen)); memset(imbiss->consensus, 0, (sizeof(Uint)*(input->length-substrlen))); rankSufmatch(space, arr, matches, input->length-substrlen, maxmatches, substrlen, sequences, noofseqs, filter, select, handler, input, imbiss, scores, depictsw); if (gnuplot) { consensus (space, imbiss->consensus, input->length-substrlen, input, substrlen, imbiss); } time (&endsuf); difrank = difftime (endsuf, startsuf); printf ("Building the suffixtree has taken %f seconds.\n", difsuf); printf ("Match the suffixtree has taken %f seconds.\n", difmatch); printf ("Rank the suffixtree has taken %f seconds.\n", difrank); /*partial cleanup*/ //destructStringset(space, queryurl); destructSequence(space, input); if(subfile) { FREEMEMORY(space, imbiss->sub); } FREEMEMORY(space, imbiss->consensus); FREEMEMORY(space, imbiss->score); FREEMEMORY(space, matches); free(bin); free(vec); } /*final cleanup*/ for (i=0; i < noofseqs; i++) { destructSequence(space, sequences[i]); } FREEMEMORY(space, sequences); destructSufArr(space, arr); #ifdef MEMMAN_H activeblocks(space); #endif printf("Goodbye.\n"); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { char *string; char *structure=NULL; char *cstruc=NULL; char *ns_bases=NULL; char *c; int n_seq; int i; int length; int sym; int endgaps = 0; int mis = 0; double min_en; double real_en; double sfact = 1.07; int pf = 0; int istty; char *AS[MAX_NUM_NAMES]; /* aligned sequences */ char *names[MAX_NUM_NAMES]; /* sequence names */ AjPSeqset seq = NULL; AjPFile confile = NULL; AjPFile alifile = NULL; AjPFile paramfile = NULL; AjPFile outf = NULL; AjPFile essfile = NULL; AjPFile dotfile = NULL; AjPStr constring = NULL; float eT = 0.; AjBool eGU; AjBool eclose; AjBool lonely; AjPStr ensbases = NULL; AjBool etloop; AjPStr eenergy = NULL; char ewt = '\0'; float escale = 0.; AjPStr edangles = NULL; char edangle = '\0'; ajint len; AjPSeq tseq = NULL; AjPStr tname = NULL; int circ = 0; int doAlnPS = 0; int doColor = 0; embInitPV("vrnaalifoldpf",argc,argv,"VIENNA",VERSION); constring = ajStrNew(); seq = ajAcdGetSeqset("sequence"); confile = ajAcdGetInfile("constraintfile"); paramfile = ajAcdGetInfile("paramfile"); eT = ajAcdGetFloat("temperature"); eGU = ajAcdGetBoolean("gu"); eclose = ajAcdGetBoolean("closegu"); lonely = ajAcdGetBoolean("lp"); ensbases = ajAcdGetString("nsbases"); etloop = ajAcdGetBoolean("tetraloop"); eenergy = ajAcdGetListSingle("energy"); escale = ajAcdGetFloat("scale"); edangles = ajAcdGetListSingle("dangles"); mis = !!ajAcdGetBoolean("most"); endgaps = !!ajAcdGetBoolean("endgaps"); nc_fact = (double) ajAcdGetFloat("nspenalty"); cv_fact = (double) ajAcdGetFloat("covariance"); outf = ajAcdGetOutfile("outfile"); essfile = ajAcdGetOutfile("ssoutfile"); alifile = ajAcdGetOutfile("alignoutfile"); circ = !!ajAcdGetBoolean("circular"); doColor = !!ajAcdGetBoolean("colour"); dotfile = ajAcdGetOutfile("dotoutfile"); do_backtrack = 1; pf = 1; string = NULL; istty = 0; dangles = 2; temperature = (double) eT; noGU = (eGU) ? 0 : 1; no_closingGU = (eclose) ? 0 : 1; noLonelyPairs = (lonely) ? 0 : 1; ns_bases = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL; tetra_loop = !!etloop; ewt = *ajStrGetPtr(eenergy); if(ewt == '0') energy_set = 0; else if(ewt == '1') energy_set = 1; else if(ewt == '2') energy_set = 2; sfact = (double) escale; edangle = *ajStrGetPtr(edangles); if(edangle == '0') dangles = 0; else if(edangle == '1') dangles = 1; else if(edangle == '2') dangles = 2; else if(edangle == '3') dangles = 3; if(paramfile) read_parameter_file(paramfile); if (ns_bases != NULL) { nonstandards = space(33); c=ns_bases; i=sym=0; if (*c=='-') { sym=1; c++; } while (*c!='\0') { if (*c!=',') { nonstandards[i++]=*c++; nonstandards[i++]=*c; if ((sym)&&(*c!=*(c-1))) { nonstandards[i++]=*c; nonstandards[i++]=*(c-1); } } c++; } } if(alifile) doAlnPS = 1; if(confile) vienna_GetConstraints(confile,&constring); n_seq = ajSeqsetGetSize(seq); if(n_seq > MAX_NUM_NAMES - 1) ajFatal("[e]RNAalifold is restricted to %d sequences\n", MAX_NUM_NAMES - 1); if (n_seq==0) ajFatal("No sequences found"); for(i=0;i<n_seq;++i) { tseq = (AjPSeq) ajSeqsetGetseqSeq(seq,i); ajSeqGapStandard(tseq, '-'); tname = (AjPStr) ajSeqsetGetseqNameS(seq,i); len = ajSeqGetLen(tseq); AS[i] = (char *) space(len+1); names[i] = (char *) space(ajStrGetLen(tname)+1); strcpy(AS[i],ajSeqGetSeqC(tseq)); strcpy(names[i],ajStrGetPtr(tname)); } AS[n_seq] = NULL; names[n_seq] = NULL; if (endgaps) for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~'); length = (int) strlen(AS[0]); structure = (char *) space((unsigned) length+1); if(confile) { fold_constrained = 1; strcpy(structure,ajStrGetPtr(constring)); } if (circ && noLonelyPairs) ajWarn( "warning, depending on the origin of the circular sequence, " "some structures may be missed when using -noLP\n" "Try rotating your sequence a few times\n"); if (circ) min_en = circalifold((const char **)AS, structure); else min_en = alifold(AS, structure); { int i; double s=0; extern int eos_debug; eos_debug=-1; /* shut off warnings about nonstandard pairs */ for (i=0; AS[i]!=NULL; i++) if (circ) s += energy_of_circ_struct(AS[i], structure); else s += energy_of_struct(AS[i], structure); real_en = s/i; } string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS); ajFmtPrintF(outf,"%s\n%s", string, structure); ajFmtPrintF(outf," (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en ); if (length<=2500) { char **A; A = annote(structure, (const char**) AS); if (doColor) (void) PS_rna_plot_a(string, structure, essfile, A[0], A[1]); else (void) PS_rna_plot_a(string, structure, essfile, NULL, A[1]); free(A[0]); free(A[1]);free(A); } else ajWarn("INFO: structure too long, not doing xy_plot\n"); if (doAlnPS) PS_color_aln(structure, alifile, AS, names); { /* free mfe arrays but preserve base_pair for PS_dot_plot */ struct bond *bp; bp = base_pair; base_pair = space(16); free_alifold_arrays(); /* free's base_pair */ free_alipf_arrays(); base_pair = bp; } if (pf) { double energy, kT; pair_info *pi; char * mfe_struc; mfe_struc = strdup(structure); kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */ pf_scale = exp(-(sfact*min_en)/kT/length); if (length>2000) ajWarn("scaling factor %f\n", pf_scale); /* init_alipf_fold(length); */ if (confile) strncpy(structure, ajStrGetPtr(constring), length+1); energy = (circ) ? alipf_circ_fold(AS, structure, &pi) : alipf_fold(AS, structure, &pi); if (do_backtrack) { ajFmtPrintF(outf,"%s", structure); ajFmtPrintF(outf," [%6.2f]\n", energy); } if ((istty)||(!do_backtrack)) ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n", energy); ajFmtPrintF(outf," frequency of mfe structure in ensemble %g\n", exp((energy-min_en)/kT)); if (do_backtrack) { FILE *aliout; cpair *cp; short *ptable; int k; ptable = make_pair_table(mfe_struc); ajFmtPrintF(outf,"\n# Alignment section\n\n"); aliout = ajFileGetFileptr(outf); fprintf(aliout, "%d sequences; length of alignment %d\n", n_seq, length); fprintf(aliout, "alifold output\n"); for (k=0; pi[k].i>0; k++) { pi[k].comp = (ptable[pi[k].i] == pi[k].j) ? 1:0; print_pi(pi[k], aliout); } fprintf(aliout, "%s\n", structure); free(ptable); cp = make_color_pinfo(pi); (void) PS_color_dot_plot(string, cp, dotfile); free(cp); free(mfe_struc); free(pi); } } if (cstruc!=NULL) free(cstruc); free(base_pair); (void) fflush(stdout); free(string); free(structure); for (i=0; AS[i]; i++) { free(AS[i]); free(names[i]); } ajSeqsetDel(&seq); ajStrDel(&constring); ajStrDel(&eenergy); ajStrDel(&edangles); ajStrDel(&ensbases); ajFileClose(&confile); ajFileClose(¶mfile); ajFileClose(&outf); ajFileClose(&essfile); ajFileClose(&alifile); ajFileClose(&dotfile); embExit(); return 0; }
int main(int argc, char** argv) { if(argc > 3) { double t1, t2, t3; struct timeval t_s1, t_e1, t_s2, t_e2, t_s3, t_e3; image_height=atoi(argv[1]); image_width=atoi(argv[1]); if(atoi(argv[2]) == 1) create_image(); image = (char*)malloc(sizeof(char)*image_height*image_width); meet=(ConsensusGrid*)malloc(sizeof(ConsensusGrid)*image_height*image_width); basis = (int*)malloc(sizeof(int)*image_height*image_width); read_raw_image("image.raw",image,image_height,image_width); cout << "Calculating consensus..." << endl << endl; for(int i=0; i<image_height; i++) { for(int j = 0; j<image_width; j++) { if(atoi(argv[3]) == 1) { gettimeofday(&t_s1, NULL); meet[i*image_width+j]=consensus_parallel(i, j, image,image_height,image_width); gettimeofday(&t_e1, NULL); t1 = (((double)t_e1.tv_sec-(double)t_s1.tv_sec)*1000) + ((double)t_e1.tv_usec - (double)t_s1.tv_usec)/1000; } else { gettimeofday(&t_s1, NULL); meet[i*image_width+j]=consensus(i, j, image,image_height,image_width); gettimeofday(&t_e1, NULL); t1 = (((double)t_e1.tv_sec-(double)t_s1.tv_sec)*1000) + ((double)t_e1.tv_usec - (double)t_s1.tv_usec)/1000; } } } cout << "Calculating list..." << endl << endl; if(atoi(argv[3]) == 1) { gettimeofday(&t_s2, NULL); calculate_list_parallel(); gettimeofday(&t_e2, NULL); t2 = (((double)t_e2.tv_sec-(double)t_s2.tv_sec)*1000) + ((double)t_e2.tv_usec - (double)t_s2.tv_usec)/1000; } else { gettimeofday(&t_s2, NULL); calculate_list(); gettimeofday(&t_e2, NULL); t2 = (((double)t_e2.tv_sec-(double)t_s2.tv_sec)*1000) + ((double)t_e2.tv_usec - (double)t_s2.tv_usec)/1000; } cout << "Calculating basis..." << endl << endl; if(atoi(argv[3]) == 1) { gettimeofday(&t_s3, NULL); calculate_basis(); gettimeofday(&t_e3, NULL); t3 = (((double)t_e3.tv_sec-(double)t_s3.tv_sec)*1000) + ((double)t_e3.tv_usec - (double)t_s3.tv_usec)/1000; } else { gettimeofday(&t_s3, NULL); calculate_basis(); gettimeofday(&t_e3, NULL); t3 = (((double)t_e3.tv_sec-(double)t_s3.tv_sec)*1000) + ((double)t_e3.tv_usec - (double)t_s3.tv_usec)/1000; } cout << "Basis count = " << basis_count << endl; cout << endl << "Time for Parallel Consensus Calculation = " << t1 << " msec" << endl; cout << "Time for Parallel List Calculation = " << t2 << " msec" << endl; cout << "Time for Basis Calculation = " << t3 << " msec" << endl; } else { cout << "Insufficient arguments\n" << endl; cout << "First argument = Image Size" << endl; cout << "Second argument = 1 to create image, 0 not to create image" << endl; cout << "Third argument = 1 for parallel, 0 for not parallel\n" << endl; } }
int main(int argc, char** argv) { if(argc > 1) { int* basis_serial; int* basis_parallel; int basis_count1, basis_count2; image_height=atoi(argv[1]); image_width=atoi(argv[1]); create_image(); image = (char*)malloc(sizeof(char)*image_height*image_width); meet=(ConsensusGrid*)malloc(sizeof(ConsensusGrid)*image_height*image_width); basis = (int*)malloc(sizeof(int)*image_height*image_width); read_raw_image("image.raw",image,image_height,image_width); struct timeval t_s, t_e; gettimeofday(&t_s, NULL); /// For serial implementation for(int i=0; i<image_height; i++) { for(int j = 0; j<image_width; j++) { meet[i*image_width+j]=consensus(i, j, image,image_height,image_width); } } printf("Calculating list for serial\n"); calculate_list(); printf("Calculating basis for serial\n"); calculate_basis(); gettimeofday(&t_e, NULL); double t1 = (((double)t_e.tv_sec-(double)t_s.tv_sec)*1000) + ((double)t_e.tv_usec - (double)t_s.tv_usec)/1000; basis_serial = basis; basis_count1 = basis_count; cout << "Serial Basis count = " << basis_count << endl << endl;; reset_values(); /// For parallel implementation struct timeval t_s2, t_e2; gettimeofday(&t_s2, NULL); for(int i=0; i<image_height; i++) { for(int j = 0; j<image_width; j++) { meet[i*image_width+j]=consensus_parallel(i, j, image,image_height,image_width); } } printf("Calculating list for parallel\n"); calculate_list(); printf("Calculating basis for parallel\n"); calculate_basis(); gettimeofday(&t_e2, NULL); double t2 = (((double)t_e2.tv_sec-(double)t_s2.tv_sec)*1000) + ((double)t_e2.tv_usec - (double)t_s2.tv_usec)/1000; basis_parallel = basis; basis_count2 = basis_count; cout << "Parallel Basis count = " << basis_count << endl << endl; Assert_Output(basis_serial, basis_parallel, basis_count1, basis_count2); } else { cout << "Insufficient arguments\n" << endl; cout << "First argument = Image Size" << endl; } }