bool processmodel(const string & inputmodelfile, int inputmodeltype, const string & outputmodelfile, int outputmodeltype, const string & corpusfile, PatternSetModel * constrainbymodel, IndexedCorpus * corpus, PatternModelOptions & options, bool continued, bool expand, int firstsentence, bool ignoreerrors, string inputmodelfile2, ClassDecoder * classdecoder, ClassEncoder * classencoder, bool print, bool report, bool nocoverage, bool histogram , bool query, string dorelations, bool doinstantiate, bool info, bool printreverseindex, int cooc, double coocthreshold, bool flexfromskip, const vector<string> & querypatterns) { if (!(print || report || histogram || query || info || cooc || printreverseindex || (dorelations != "") || (!querypatterns.empty()) || (!outputmodelfile.empty()) )) { cerr << "Ooops... You didn't really give me anything to do...that can't be right.. Please study the usage options (-h) again! Did you perhaps forget a --print or --outputmodel? " << endl; return false; } ModelType * inputmodel; string outputqualifier = ""; if ((outputmodeltype == UNINDEXEDPATTERNMODEL) || (outputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) { outputqualifier += " unindexed"; } if ((outputmodeltype == INDEXEDPATTERNPOINTERMODEL) || (outputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) { outputqualifier += " pointer"; } if (inputmodelfile.empty()) { //train model from scratch inputmodel = new ModelType(corpus); cerr << "Training" << outputqualifier << " model on " << corpusfile <<endl; inputmodel->train(corpusfile, options, constrainbymodel, NULL, continued,firstsentence,ignoreerrors); if (constrainbymodel) { cerr << "Unloading constraint model" << endl; delete constrainbymodel; constrainbymodel = NULL; } if (options.DOSKIPGRAMS) { if ((inputmodeltype == UNINDEXEDPATTERNMODEL) || (inputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) { cerr << "WARNING: Can't compute skipgrams non-exhaustively on unindexed model" << endl; if (flexfromskip) cerr << "WARNING: Can't compute flexgrams from skipgrams on unindexed model" << endl; } else { if (!inputmodelfile2.empty()) cerr << "WARNING: Can not compute skipgrams constrained by " << inputmodelfile2 << "!" << endl; if (!inputmodel->hasskipgrams) { cerr << "Computing skipgrams" << endl; inputmodel->trainskipgrams(options); } if (flexfromskip) { cerr << "Computing flexgrams from skipgrams" << corpusfile <<endl; int found = inputmodel->computeflexgrams_fromskipgrams(); cerr << found << " flexgrams found" << corpusfile <<endl; } } } } else { //load model cerr << "Loading pattern model " << inputmodelfile << " as" << outputqualifier << " model..."<<endl; inputmodel = new ModelType(inputmodelfile, options, (PatternModelInterface*) constrainbymodel, corpus); if ((corpus != NULL) && (inputmodel->hasskipgrams)) { cerr << "Filtering skipgrams..." << endl; inputmodel->pruneskipgrams(options.MINTOKENS, options.MINSKIPTYPES); } if ((!corpusfile.empty()) && (expand)) { cerr << "Expanding model on " << corpusfile <<endl; inputmodel->train(corpusfile, options, constrainbymodel,NULL, continued,firstsentence,ignoreerrors); if (constrainbymodel) { cerr << "Unloading constraint model" << endl; delete constrainbymodel; constrainbymodel = NULL; } } else if (options.DOSKIPGRAMS) { if (constrainbymodel) { cerr << "Unloading constraint model" << endl; delete constrainbymodel; constrainbymodel = NULL; } cerr << "Computing skipgrams" << endl; if (!inputmodelfile2.empty()) cerr << "WARNING: Can not compute skipgrams constrained by " << inputmodelfile2 << "!" << endl; inputmodel->trainskipgrams(options); if (flexfromskip) { cerr << "Computing flexgrams from skipgrams" << corpusfile <<endl; int found = inputmodel->computeflexgrams_fromskipgrams(); cerr << found << " flexgrams found" << corpusfile <<endl; } } else { if (constrainbymodel) { cerr << "Unloading constraint model" << endl; delete constrainbymodel; constrainbymodel = NULL; } } } if (!outputmodelfile.empty()) { cerr << "Writing model to " << outputmodelfile << endl; inputmodel->write(outputmodelfile); } viewmodel<ModelType>(*inputmodel, classdecoder, classencoder, print, report, nocoverage, histogram, query, dorelations, doinstantiate, info, printreverseindex, cooc, coocthreshold); if (!querypatterns.empty()) { processquerypatterns<ModelType>(*inputmodel, classencoder, classdecoder, querypatterns, dorelations, doinstantiate); } delete inputmodel; return true; }
void writeResults(ModelType& model, double* counts) { double denom; char outF[STRLEN]; FILE *fo; sprintf(modelF, "%s.model", statName); model.write(modelF); //calculate tau values double *tau = new double[M + 1]; memset(tau, 0, sizeof(double) * (M + 1)); denom = 0.0; for (int i = 1; i <= M; i++) if (eel[i] >= EPSILON) { tau[i] = theta[i] / eel[i]; denom += tau[i]; } general_assert(denom > 0, "No alignable reads?!"); for (int i = 1; i <= M; i++) { tau[i] /= denom; } //isoform level results sprintf(outF, "%s.iso_res", imdName); fo = fopen(outF, "w"); for (int i = 1; i <= M; i++) { const Transcript& transcript = transcripts.getTranscriptAt(i); fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n')); } for (int i = 1; i <= M; i++) fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n')); for (int i = 1; i <= M; i++) fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n')); for (int i = 1; i <= M; i++) { const Transcript& transcript = transcripts.getTranscriptAt(i); fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n')); } fclose(fo); //gene level results sprintf(outF, "%s.gene_res", imdName); fo = fopen(outF, "w"); for (int i = 0; i < m; i++) { const string& gene_id = transcripts.getTranscriptAt(gi.spAt(i)).getGeneID(); fprintf(fo, "%s%c", gene_id.c_str(), (i < m - 1 ? '\t' : '\n')); } for (int i = 0; i < m; i++) { double sumC = 0.0; // sum of counts int b = gi.spAt(i), e = gi.spAt(i + 1); for (int j = b; j < e; j++) sumC += counts[j]; fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n')); } for (int i = 0; i < m; i++) { double sumT = 0.0; // sum of tau values int b = gi.spAt(i), e = gi.spAt(i + 1); for (int j = b; j < e; j++) sumT += tau[j]; fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n')); } for (int i = 0; i < m; i++) { int b = gi.spAt(i), e = gi.spAt(i + 1); for (int j = b; j < e; j++) { fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : (i < m - 1 ? '\t' :'\n'))); } } fclose(fo); delete[] tau; if (verbose) { printf("Expression Results are written!\n"); } }
void writeResults(ModelType& model, double* counts) { sprintf(modelF, "%s.model", statName); model.write(modelF); writeResultsEM(M, refName, imdName, transcripts, theta, eel, countvs[0]); }