double parzenWindowEstimator::getHist(const int i, const int j) { if ( posHist(i,j)+negHist(i,j) < 5 ) return 0; return posHist(i,j)/(posHist(i,j)+negHist(i,j)); }
double parzenWindowEstimator1D::getF_X(const std::vector<double> x) { double f_x = 0; for (size_t i = 0; i < posHist.rows(); i++) { if ( posHist(i,0)>=0 ) { double factor=(posHist(i,0)+negHist(i,0))>0?posHist(i,0)/(posHist(i,0)+negHist(i,0)):0; f_x += factor * gauss(ext(0,0)+i*binWidth[0],sigm[0],x[0]); } } return f_x; }
double parzenWindowEstimator2D::getF_X(const std::vector<double> x) { double f_x = 0; for (size_t i = 0; i < posHist.rows(); i++) { for (size_t j = 0; j < posHist.cols(); j++) { if ( posHist(i,j)>=0 ) { double factor=(posHist(i,j)+negHist(i,j))>0?posHist(i,j)/(posHist(i,j)+negHist(i,j)):0; f_x += factor * gauss2D(ext(0,0)+i*binWidth[0],ext(1,0)+j*binWidth[1],sigm[0],sigm[1],x[0],x[1]); } } } return f_x; }
bool parzenWindowEstimator::addSample(const std::vector<double> x) { std::vector<int> b; if (getIndexes(x,b)) { printf("adding sample %i %i from %g %g\n",b[0],b[1],x[0],x[1]); posHist(b[0],b[1]) += 1; return true; } else return false; }
int Application::main(int argc,char *argv[]) { // Process command line BOOM::CommandLine cmd(argc,argv,""); if(cmd.numArgs()!=5) throw string( "\ncompute-signal-likelihoods <*.model> <pos-examples.fasta> \n\ <neg-examples.fasta> <consensuses> <#bins>\n\ \n\ example:\n\ compute-signal-likelihoods tag.model tag.fasta contigs.fasta TAG,TGA,TAA\n\ \n\ \n"); BOOM::String modelFilename=cmd.arg(0); BOOM::String posExamples=cmd.arg(1); BOOM::String negExamples=cmd.arg(2); BOOM::String consensusArg=cmd.arg(3); int numBins=cmd.arg(4).asInt(); if(!filestemRegex.search(modelFilename)) throw BOOM::String("Couldn't parse filestem from filename: ")+ modelFilename; BOOM::String filestem=filestemRegex[1]; // Load the model alphabet=DnaAlphabet::global(); GarbageIgnorer GC; SignalSensor *model=SignalSensor::load(modelFilename,GC); BOOM::Vector<BOOM::String> *consensuses=consensusArg.getFields(","); int numConsensuses=consensuses->size(); for(int i=0 ; i<numConsensuses ; ++i) model->addConsensus((*consensuses)[i]); // Load the examples BOOM::Vector<double> *posScores=loadPosExamples(posExamples,*model); BOOM::Vector<double> *negScores=loadPosExamples(negExamples,*model); // Report summary stats BOOM::SummaryStats posStats(*posScores); BOOM::SummaryStats backgroundStats(*negScores); cout<<"positives: "<<posStats.getMean()<<"+/-"<<posStats.getStdDev()<< " ("<<posStats.getMin()<<"-"<<posStats.getMax()<<")"<<endl; cout<<"background: "<<backgroundStats.getMean()<<"+/-" <<backgroundStats.getStdDev()<<" ("<<backgroundStats.getMin() <<"-"<<backgroundStats.getMax()<<")"<<endl; writeHistogramFile(*posScores,filestem+".pos-hist"); writeHistogramFile(*negScores,filestem+".neg-hist"); // Construct histograms double minScore=POSITIVE_INFINITY, maxScore=NEGATIVE_INFINITY; getExtrema(*posScores,minScore,maxScore); getExtrema(*negScores,minScore,maxScore); Histogram<double> posHist(minScore,maxScore,numBins,0.01); Histogram<double> backgroundHist(minScore,maxScore,numBins,1); posHist.addCounts(*posScores); backgroundHist.addCounts(*negScores); backgroundHist.addCounts(*posScores); // Compute log-likelihood ratios and write output file computeRatios(filestem,posHist,backgroundHist,minScore,maxScore, numBins); /* posHist.divideBy(backgroundHist); posHist.useLogs(); BOOM::String outfile=filestem+".isp"; posHist.save(outfile); */ return 0; }