void testPredictor(Predictor p){ //Load sample structure SampleStructure sampleStructure = getTestSampleStructure(); //Load test data Array<Sample> testData = getTestSampleData(); unsigned errors = 0; for(unsigned s = 0; s < testData.length; s++){ Sample predictedSample = p.predict(testData[s]); std::cout << "SAMPLE " << ((double)s) << ": "; for(unsigned f = 0; f < sampleStructure.contvarCount; f++){ std::cout << testData[s].contvars[f] << ", "; } for(unsigned f = 0; f < sampleStructure.catvarCount; f++){ std::cout << ((double)testData[s].catvars[f]) << ", "; } std::cout << "\nPREDICTIONS: "; for(unsigned f = 0; f < sampleStructure.contvarCount; f++){ contvar_t prediction = predictedSample.contvars[f]; //p.classifiers[f]->predict(testData[s]); std::cout << ((double)prediction) << ", "; //if(prediction != testData[s].catvars[f]){ // errors++; //} }for(unsigned f = 0; f < sampleStructure.catvarCount; f++){ catvar_t prediction = predictedSample.catvars[f]; //p.classifiers[f]->predict(testData[s]); std::cout << ((double)prediction) << ", "; if(prediction != testData[s].catvars[f]){ errors++; } } std::cout << "\n"; } std::cout << "Catvar Accuracy: " << (1.0 - ((double)errors) / (testData.length * sampleStructure.catvarCount)) << ".\n"; }
void PredictorRegistryTest::testNext() { ContextTracker* pointer = static_cast<ContextTracker*>((void*)0xdeadbeef); registry->setContextTracker(pointer); PredictorRegistry::Iterator it = registry->iterator(); Predictor* predictor = 0; while (it.hasNext()) { predictor = it.next(); } // since we've iterated till the end of the predictors list, predictor // is now pointing to the DummyPredictor, so let's test we got the // dummy prediction back Prediction prediction = predictor->predict(20, 0); CPPUNIT_ASSERT(predictor != 0); size_t expected_size = 18; CPPUNIT_ASSERT_EQUAL(expected_size, prediction.size()); CPPUNIT_ASSERT_EQUAL(Suggestion("foo1", 0.99), prediction.getSuggestion(0)); CPPUNIT_ASSERT_EQUAL(Suggestion("foobar6", 0.74), prediction.getSuggestion(17)); }
void testFrac(Predictor p){ std::cout << "Testing FRaC.\n"; SampleStructure st = getTestSampleStructure(); Array<Sample> evaluationData = getEvaluationSampleData(); ErrorModelCreators emc; emc.rc = [](SampleStructure* st, unsigned index, contvar_t* trueData, contvar_t* predictedData, unsigned length){ return (ContinuousErrorModel*) new Gaussian(trueData, predictedData, length); }; emc.cc = [](SampleStructure* st, unsigned index, catvar_t* trueData, catvar_t* predictedData, unsigned length){ return (CategoricalErrorModel*) new SurprisalMatrix(st->catvarSizes[index], trueData, predictedData, length); }; emc.bcc = [](SampleStructure* st, unsigned index, binvar_t* trueData, binvar_t* predictedData, unsigned length){ Array<catvar_t> trueConv = Array<binvar_t>(trueData, length).map<catvar_t>([](binvar_t bv){return (catvar_t)bv;}); Array<catvar_t> predConv = Array<binvar_t>(predictedData, length).map<catvar_t>([](binvar_t bv){return (catvar_t)bv;}); SurprisalMatrix catmodel = SurprisalMatrix(2, trueConv.data, predConv.data, length); trueConv.freeMemory(); predConv.freeMemory(); return (BinaryErrorModel*) new BinaryErrorModelFromCategoricalErrorModel<SurprisalMatrix>(SurprisalMatrix()); }; //Note: Here we build error models on training data: we should be using new validation data. ErrorModelCollection e = ErrorModelCollection(st, emc, evaluationData, p); /* gaussian* rem = new gaussian[st.contvarCount]; NormalizedSurprisalMatrix* cem = new NormalizedSurprisalMatrix[st.catvarCount]; for(unsigned i = 0; i < st.catvarCount; i++){ unsigned length = trainingData.length; catvar_t* trueData = new catvar_t[length]; catvar_t* predictedData = new catvar_t[length]; for(unsigned j = 0; j < length; j++){ trueData[j] = trainingData[j].catvars[i]; predictedData[j] = p.classifiers[i]->predict(trainingData[j]); } cem[i] = NormalizedSurprisalMatrix(st.catvarSizes[i], trueData, predictedData, length); } char* bcem = new char[st.binvarCount]; ErrorModelCollection e = ErrorModelCollection(rem, cem, bcem); */ TraditionalFRaC frac(st, p, e); Array<Sample> testData = getTestSampleData(); for(unsigned i = 0; i < testData.length; i++){ double ns = frac.calculateNS(testData[i], p.predict(testData[i])); std::cout << "Normal Sample " << i << ": NS = " << ns << "\n"; std::cout << StructuredSample(st, testData[i]) << "\n"; } Array<Sample> anomTestData = getAnomalousSampleData(); for(unsigned i = 0; i < anomTestData.length; i++){ double ns = frac.calculateNS(anomTestData[i], p.predict(testData[i])); std::cout << "Anomalous Sample " << i << ": NS = " << ns << "\n"; std::cout << StructuredSample(st, anomTestData[i]) << "\n"; } }
/** * program main entry * * @param argc number of program arguments * @param argv array of program arguments of length argc */ int main(int argc, char **argv){ try { // set overall logging style el::Loggers::reconfigureAllLoggers(el::ConfigurationType::Format, std::string("# %level : %msg")); // TODO setup log file el::Loggers::reconfigureAllLoggers(el::ConfigurationType::ToFile, std::string("false")); el::Loggers::reconfigureAllLoggers(el::ConfigurationType::ToStandardOutput, std::string("true")); // set additional logging flags el::Loggers::addFlag(el::LoggingFlag::DisableApplicationAbortOnFatalLog); el::Loggers::addFlag(el::LoggingFlag::LogDetailedCrashReason); el::Loggers::addFlag(el::LoggingFlag::AllowVerboseIfModuleNotSpecified); #if INTARNA_LOG_COLORING el::Loggers::addFlag(el::LoggingFlag::ColoredTerminalOutput); #endif // setup logging with given parameters START_EASYLOGGINGPP(argc, argv); // check if log file set and update all loggers before going on if (el::Helpers::commandLineArgs() != NULL && el::Helpers::commandLineArgs()->hasParamWithValue(el::base::consts::kDefaultLogFileParam)) { // default all to file el::Loggers::reconfigureAllLoggers(el::ConfigurationType::ToStandardOutput, std::string("false")); el::Loggers::reconfigureAllLoggers(el::ConfigurationType::ToFile, std::string("true")); // enforec error out to standard output el::Loggers::reconfigureAllLoggers(el::Level::Error, el::ConfigurationType::ToStandardOutput, std::string("true")); el::Loggers::reconfigureAllLoggers(el::Level::Error, el::ConfigurationType::ToFile, std::string("false")); } // parse command line parameters CommandLineParsing parameters; { VLOG(1) <<"parsing arguments"<<"..."; int retCode = parameters.parse( argc, argv ); if (retCode != CommandLineParsing::ReturnCode::KEEP_GOING) { return retCode; } } #if INTARNA_MULITHREADING // OMP shared variables to enable exception forwarding from within OMP parallelized for loop bool threadAborted = false; std::exception_ptr exceptionPtrDuringOmp = NULL; std::stringstream exceptionInfoDuringOmp; #endif // number of already reported interactions to enable IntaRNA v1 separator output size_t reportedInteractions = 0; // storage to avoid accessibility recomputation (init NULL) std::vector< ReverseAccessibility * > queryAcc(parameters.getQuerySequences().size(), NULL); // compute all query accessibilities to enable parallelization #if INTARNA_MULITHREADING // parallelize this loop if possible; if not -> parallelize the query-loop # pragma omp parallel for schedule(dynamic) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp) #endif for (size_t qi=0; qi<queryAcc.size(); qi++) { // get accessibility handler #if INTARNA_MULITHREADING #pragma omp flush (threadAborted) // explicit try-catch-block due to missing OMP exception forwarding if (!threadAborted) { try { // get query accessibility handler #pragma omp critical(intarna_omp_logOutput) #endif VLOG(1) <<"computing accessibility for query '"<<parameters.getQuerySequences().at(qi).getId()<<"'..."; Accessibility * queryAccOrig = parameters.getQueryAccessibility(qi); INTARNA_CHECK_NOT_NULL(queryAccOrig,"query initialization failed"); // reverse indexing of target sequence for the computation queryAcc[qi] = new ReverseAccessibility(*queryAccOrig); // check if we have to warn about ambiguity if (queryAccOrig->getSequence().isAmbiguous()) { #if INTARNA_MULITHREADING #pragma omp critical(intarna_omp_logOutput) #endif LOG(INFO) <<"Sequence '"<<queryAccOrig->getSequence().getId() <<"' contains ambiguous nucleotide encodings. These positions are ignored for interaction computation."; } #if INTARNA_MULITHREADING ////////////////////// exception handling /////////////////////////// } catch (std::exception & e) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::make_exception_ptr(e); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #query "<<qi <<" : "<<e.what(); // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } catch (...) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::current_exception(); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #query "<<qi; // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } } // if not threadAborted #endif } // check which loop to parallelize const bool parallelizeTargetLoop = parameters.getTargetSequences().size() > 1; const bool parallelizeQueryLoop = !parallelizeTargetLoop && parameters.getQuerySequences().size() > 1; const bool parallelizeWindowsLoop = !parallelizeTargetLoop && !parallelizeQueryLoop; // run prediction for all pairs of sequences // first: iterate over all target sequences #if INTARNA_MULITHREADING // parallelize this loop if possible; if not -> parallelize the query-loop # pragma omp parallel for schedule(dynamic) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp) if(parallelizeTargetLoop) #endif for ( size_t targetNumber = 0; targetNumber < parameters.getTargetSequences().size(); ++targetNumber ) { #if INTARNA_MULITHREADING #pragma omp flush (threadAborted) // explicit try-catch-block due to missing OMP exception forwarding if (!threadAborted) { try { // get target accessibility handler #pragma omp critical(intarna_omp_logOutput) #endif { VLOG(1) <<"computing accessibility for target '"<<parameters.getTargetSequences().at(targetNumber).getId()<<"'..."; } // VRNA not completely threadsafe ... Accessibility * targetAcc = parameters.getTargetAccessibility(targetNumber); INTARNA_CHECK_NOT_NULL(targetAcc,"target initialization failed"); // check if we have to warn about ambiguity if (targetAcc->getSequence().isAmbiguous()) { #if INTARNA_MULITHREADING #pragma omp critical(intarna_omp_logOutput) #endif { LOG(INFO) <<"Sequence '"<<targetAcc->getSequence().getId() <<"' contains ambiguous IUPAC nucleotide encodings. These positions are ignored for interaction computation and replaced by 'N'.";} } // second: iterate over all query sequences #if INTARNA_MULITHREADING // this parallelization should only be enabled if the outer target-loop is not parallelized # pragma omp parallel for schedule(dynamic) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp,targetAcc,targetNumber) if(parallelizeQueryLoop) #endif for ( size_t queryNumber = 0; queryNumber < parameters.getQuerySequences().size(); ++queryNumber ) { #if INTARNA_MULITHREADING #pragma omp flush (threadAborted) // explicit try-catch-block due to missing OMP exception forwarding if (!threadAborted) { try { #endif // sanity check assert( queryAcc.at(queryNumber) != NULL ); // get energy computation handler for both sequences InteractionEnergy* energy = parameters.getEnergyHandler( *targetAcc, *(queryAcc.at(queryNumber)) ); INTARNA_CHECK_NOT_NULL(energy,"energy initialization failed"); // get output/storage handler OutputHandler * output = parameters.getOutputHandler( *energy ); INTARNA_CHECK_NOT_NULL(output,"output handler initialization failed"); // check if we have to add separator for IntaRNA v1 output if (reportedInteractions > 0 && dynamic_cast<OutputHandlerIntaRNA1*>(output) != NULL) { dynamic_cast<OutputHandlerIntaRNA1*>(output)->addSeparator( true ); } // setup collecting output handler to ensure // k-best output per query-target combination // and not per region combination if not requested OutputHandlerInteractionList bestInteractions( (parameters.reportBestPerRegion() ? std::numeric_limits<size_t>::max() : 1 ) * parameters.getOutputConstraint().reportMax ); // run prediction for all range combinations BOOST_FOREACH(const IndexRange & tRange, parameters.getTargetRanges(*energy, targetNumber)) { BOOST_FOREACH(const IndexRange & qRange, parameters.getQueryRanges(*energy, queryNumber)) { // get windows for both ranges std::vector<IndexRange> queryWindows = qRange.overlappingWindows(parameters.getWindowWidth(), parameters.getWindowOverlap()); std::vector<IndexRange> targetWindows = tRange.overlappingWindows(parameters.getWindowWidth(), parameters.getWindowOverlap()); // iterate over all window combinations #if INTARNA_MULITHREADING // this parallelization should only be enabled if neither the outer target-loop nor the inner query-loop are parallelized # pragma omp parallel for schedule(dynamic) collapse(2) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp,targetAcc,targetNumber,queryNumber,queryWindows,targetWindows, bestInteractions, energy) if(parallelizeWindowsLoop) #endif for (int qNumWindow = 0; qNumWindow < queryWindows.size(); ++qNumWindow) { for (int tNumWindow = 0; tNumWindow < targetWindows.size(); ++tNumWindow) { #if INTARNA_MULITHREADING #pragma omp flush (threadAborted) // explicit try-catch-block due to missing OMP exception forwarding if (!threadAborted) { try { #endif IndexRange qWindow = queryWindows.at(qNumWindow); IndexRange tWindow = targetWindows.at(tNumWindow); #if INTARNA_MULITHREADING #pragma omp critical(intarna_omp_logOutput) #endif { VLOG(1) <<"predicting interactions for" <<" target "<<targetAcc->getSequence().getId() <<" (range " <<(tWindow+1)<<")" <<" and" <<" query "<<queryAcc.at(queryNumber)->getSequence().getId() <<" (range " <<(qWindow+1)<<")" #if INTARNA_MULITHREADING #if INTARNA_IN_DEBUG_MODE <<" in thread "<<omp_get_thread_num() #endif #endif <<" ..."; } // get interaction prediction handler Predictor * predictor = parameters.getPredictor( *energy, bestInteractions ); INTARNA_CHECK_NOT_NULL(predictor,"predictor initialization failed"); // run prediction for this window combination predictor->predict( tWindow , queryAcc.at(queryNumber)->getReversedIndexRange(qWindow) , parameters.getOutputConstraint() ); // garbage collection INTARNA_CLEANUP(predictor); #if INTARNA_MULITHREADING ////////////////////// exception handling /////////////////////////// } catch (std::exception & e) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::make_exception_ptr(e); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber <<" #query " <<queryNumber <<" : "<<e.what(); // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } catch (...) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::current_exception(); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber <<" #query " <<queryNumber; // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } } // if not threadAborted #endif }} // window combinations } // target ranges } // query ranges #if INTARNA_MULITHREADING #pragma omp critical(intarna_omp_outputHandlerUpdate) #endif {// update final output handler BOOST_FOREACH( const Interaction * inter, bestInteractions) { // forward all reported interactions for all regions to final output handler output->add(*inter); }} #if INTARNA_MULITHREADING #pragma omp atomic update #endif reportedInteractions += output->reported(); // garbage collection INTARNA_CLEANUP(output); INTARNA_CLEANUP(energy); #if INTARNA_MULITHREADING ////////////////////// exception handling /////////////////////////// } catch (std::exception & e) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::make_exception_ptr(e); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber <<" #query " <<queryNumber <<" : "<<e.what(); // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } catch (...) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::current_exception(); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber <<" #query " <<queryNumber; // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } } // if not threadAborted #endif } // for queries // write accessibility to file if needed parameters.writeTargetAccessibility( *targetAcc ); // garbage collection INTARNA_CLEANUP(targetAcc); #if INTARNA_MULITHREADING ////////////////////// exception handling /////////////////////////// } catch (std::exception & e) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::make_exception_ptr(e); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber <<" : "<<e.what(); // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } catch (...) { // ensure exception handling for first failed thread only #pragma omp critical(intarna_omp_exception) { if (!threadAborted) { // store exception information exceptionPtrDuringOmp = std::current_exception(); exceptionInfoDuringOmp <<" #thread "<<omp_get_thread_num() <<" #target "<<targetNumber; // trigger abortion of all threads threadAborted = true; #pragma omp flush (threadAborted) } } // omp critical(intarna_omp_exception) } } // if not threadAborted
void DejavuPredictorTest::testPredict() { *stream << "polly wants a cracker "; ct->update(); // get pointer to dejavu predictor Predictor* predictor = predictorRegistry->iterator().next(); { *stream << "polly "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "wants "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "a "; Prediction expected; expected.addSuggestion(Suggestion("cracker", 1.0)); CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } *stream << "soda "; ct->update(); { *stream << "polly "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "wants "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "a "; Prediction expected; expected.addSuggestion(Suggestion("cracker", 1.0)); expected.addSuggestion(Suggestion("soda", 1.0)); CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } *stream << "cake "; ct->update(); { *stream << "polly "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "wants "; Prediction expected; CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } { *stream << "a "; Prediction expected; expected.addSuggestion(Suggestion("cake", 1.0)); expected.addSuggestion(Suggestion("cracker", 1.0)); expected.addSuggestion(Suggestion("soda", 1.0)); CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, 0)); ct->update(); } *stream << "crumble "; ct->update(); { // test filter const char* filter[] = { "cra", "so", 0 }; *stream << "polly wants a "; Prediction expected; expected.addSuggestion(Suggestion("cracker", 1.0)); expected.addSuggestion(Suggestion("soda", 1.0)); CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, filter)); ct->update(); } *stream << "break "; ct->update(); { // test filter const char* filter[] = { "r", 0 }; *stream << "polly wants a c"; Prediction expected; expected.addSuggestion(Suggestion("cracker", 1.0)); expected.addSuggestion(Suggestion("crumble", 1.0)); CPPUNIT_ASSERT_EQUAL(expected, predictor->predict(SIZE, filter)); ct->update(); } *stream << "uddle "; ct->update(); }