bool processmodel(const string & inputmodelfile, int inputmodeltype, const string & outputmodelfile, int outputmodeltype, const string & corpusfile,   PatternSetModel * constrainbymodel, IndexedCorpus * corpus, PatternModelOptions & options, bool continued, bool expand, int firstsentence, bool ignoreerrors, string inputmodelfile2, ClassDecoder * classdecoder,  ClassEncoder * classencoder, bool print, bool report, bool nocoverage, bool histogram , bool query, string dorelations, bool doinstantiate, bool info, bool printreverseindex, int cooc, double coocthreshold, bool flexfromskip, const vector<string> & querypatterns) {
    if (!(print || report || histogram || query || info || cooc || printreverseindex || (dorelations != "") || (!querypatterns.empty()) || (!outputmodelfile.empty()) )) {
        cerr << "Ooops... You didn't really give me anything to do...that can't be right.. Please study the usage options (-h) again! Did you perhaps forget a --print or --outputmodel? " << endl;
        return false;
    }

    ModelType * inputmodel;

    string outputqualifier = "";
    if ((outputmodeltype == UNINDEXEDPATTERNMODEL) || (outputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) {
        outputqualifier += " unindexed";
    }
    if ((outputmodeltype == INDEXEDPATTERNPOINTERMODEL) || (outputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) {
        outputqualifier += " pointer";
    }

    if (inputmodelfile.empty()) {
        //train model from scratch

        inputmodel = new ModelType(corpus);


        cerr << "Training" << outputqualifier << " model on  " << corpusfile <<endl;
        inputmodel->train(corpusfile, options, constrainbymodel, NULL, continued,firstsentence,ignoreerrors);
        if (constrainbymodel) {
            cerr << "Unloading constraint model" << endl;
            delete constrainbymodel;
            constrainbymodel = NULL;
        }

        if (options.DOSKIPGRAMS) {
            if ((inputmodeltype == UNINDEXEDPATTERNMODEL) || (inputmodeltype == UNINDEXEDPATTERNPOINTERMODEL)) {
                cerr << "WARNING: Can't compute skipgrams non-exhaustively on unindexed model" << endl;
                if (flexfromskip) cerr << "WARNING: Can't compute flexgrams from skipgrams on unindexed model" << endl;
            }  else {
                if (!inputmodelfile2.empty()) cerr << "WARNING: Can not compute skipgrams constrained by " << inputmodelfile2 << "!" << endl;
                if (!inputmodel->hasskipgrams) {
                    cerr << "Computing skipgrams" << endl;
                    inputmodel->trainskipgrams(options);
                }
                if (flexfromskip) {
                    cerr << "Computing flexgrams from skipgrams" << corpusfile <<endl;
                    int found = inputmodel->computeflexgrams_fromskipgrams();
                    cerr << found << " flexgrams found" << corpusfile <<endl;
                }
            }
        }

    } else {
        //load model

        cerr << "Loading pattern model " << inputmodelfile << " as" << outputqualifier << " model..."<<endl;
        inputmodel = new ModelType(inputmodelfile, options, (PatternModelInterface*) constrainbymodel, corpus);
        if ((corpus != NULL) && (inputmodel->hasskipgrams)) {
            cerr << "Filtering skipgrams..." << endl;
            inputmodel->pruneskipgrams(options.MINTOKENS, options.MINSKIPTYPES);
        }

        if ((!corpusfile.empty()) && (expand)) {
            cerr << "Expanding model on  " << corpusfile <<endl;
            inputmodel->train(corpusfile, options, constrainbymodel,NULL,  continued,firstsentence,ignoreerrors);
            if (constrainbymodel) {
                cerr << "Unloading constraint model" << endl;
                delete constrainbymodel;
                constrainbymodel = NULL;
            }
        } else if (options.DOSKIPGRAMS) {
            if (constrainbymodel) {
                cerr << "Unloading constraint model" << endl;
                delete constrainbymodel;
                constrainbymodel = NULL;
            }
            cerr << "Computing skipgrams" << endl;
            if (!inputmodelfile2.empty()) cerr << "WARNING: Can not compute skipgrams constrained by " << inputmodelfile2 << "!" << endl;
            inputmodel->trainskipgrams(options);
            if (flexfromskip) {
                cerr << "Computing flexgrams from skipgrams" << corpusfile <<endl;
                int found = inputmodel->computeflexgrams_fromskipgrams();
                cerr << found << " flexgrams found" << corpusfile <<endl;
            }
        } else {
            if (constrainbymodel) {
                cerr << "Unloading constraint model" << endl;
                delete constrainbymodel;
                constrainbymodel = NULL;
            }
        }
    }


    if (!outputmodelfile.empty()) {
        cerr << "Writing model to " << outputmodelfile << endl;
        inputmodel->write(outputmodelfile);
    }
    viewmodel<ModelType>(*inputmodel, classdecoder, classencoder, print, report, nocoverage, histogram, query, dorelations, doinstantiate, info, printreverseindex, cooc, coocthreshold);

    if (!querypatterns.empty()) {
        processquerypatterns<ModelType>(*inputmodel,  classencoder, classdecoder, querypatterns, dorelations, doinstantiate);
    }

    delete inputmodel;

    return true;
}
Exemple #2
0
void writeResults(ModelType& model, double* counts) {
	double denom;
	char outF[STRLEN];
	FILE *fo;

	sprintf(modelF, "%s.model", statName);
	model.write(modelF);

	//calculate tau values
	double *tau = new double[M + 1];
	memset(tau, 0, sizeof(double) * (M + 1));

	denom = 0.0;
	for (int i = 1; i <= M; i++) 
	  if (eel[i] >= EPSILON) {
	    tau[i] = theta[i] / eel[i];
	    denom += tau[i];
	  }   

	general_assert(denom > 0, "No alignable reads?!");

	for (int i = 1; i <= M; i++) {
		tau[i] /= denom;
	}

	//isoform level results
	sprintf(outF, "%s.iso_res", imdName);
	fo = fopen(outF, "w");
	for (int i = 1; i <= M; i++) {
		const Transcript& transcript = transcripts.getTranscriptAt(i);
		fprintf(fo, "%s%c", transcript.getTranscriptID().c_str(), (i < M ? '\t' : '\n'));
	}
	for (int i = 1; i <= M; i++)
		fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
	for (int i = 1; i <= M; i++)
		fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
	for (int i = 1; i <= M; i++) {
		const Transcript& transcript = transcripts.getTranscriptAt(i);
		fprintf(fo, "%s%c", transcript.getGeneID().c_str(), (i < M ? '\t' : '\n'));
	}
	fclose(fo);

	//gene level results
	sprintf(outF, "%s.gene_res", imdName);
	fo = fopen(outF, "w");
	for (int i = 0; i < m; i++) {
		const string& gene_id = transcripts.getTranscriptAt(gi.spAt(i)).getGeneID();
		fprintf(fo, "%s%c", gene_id.c_str(), (i < m - 1 ? '\t' : '\n'));
	}
	for (int i = 0; i < m; i++) {
		double sumC = 0.0; // sum of counts
		int b = gi.spAt(i), e = gi.spAt(i + 1);
		for (int j = b; j < e; j++) sumC += counts[j];
		fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n'));
	}
	for (int i = 0; i < m; i++) {
		double sumT = 0.0; // sum of tau values
		int b = gi.spAt(i), e = gi.spAt(i + 1);
		for (int j = b; j < e; j++) sumT += tau[j];
		fprintf(fo, "%.15g%c", sumT, (i < m - 1 ? '\t' : '\n'));
	}
	for (int i = 0; i < m; i++) {
		int b = gi.spAt(i), e = gi.spAt(i + 1);
		for (int j = b; j < e; j++) {
			fprintf(fo, "%s%c", transcripts.getTranscriptAt(j).getTranscriptID().c_str(), (j < e - 1 ? ',' : (i < m - 1 ? '\t' :'\n')));
		}
	}
	fclose(fo);

	delete[] tau;

	if (verbose) { printf("Expression Results are written!\n"); }
}
Exemple #3
0
void writeResults(ModelType& model, double* counts) {
  sprintf(modelF, "%s.model", statName);
  model.write(modelF);
  writeResultsEM(M, refName, imdName, transcripts, theta, eel, countvs[0]);
}