void Eval::doEval(int high, int low) { XenOption* opt = XenOption::getInstance(); StaticData* sD = StaticData::getInstance(); if (opt->getSVocab()->getFileName().compare("") == 0) { sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp()); } else { sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab()); } pool threadPool(opt->getThreads()); int pc = low; if (pc == 0) { pc = opt->getStep(); } std::vector<std::string> parts; while (pc <= high) { EvalMap::iterator found = ptrDist->find(pc); if (found == ptrDist->end()) { std::string partName = sD->getXenResult()->getXenFile()->getDirName() + "/" + sD->getXenResult()->getXenFile()->getPrefix() + "-" + XenCommon::toString(pc) + "pc.gz"; XenIO::writeXRpart(sD->getXenResult(), pc, partName); boost::shared_ptr<Corpus> c = boost::make_shared<Corpus>(); c->initialize(partName, "xx"); threadPool.schedule( boost::bind(taskEval, pc, c, sD->getVocabs()->getPtrSourceVoc(), sD->getDevCorp(), ptrDist)); parts.push_back(partName); } pc += opt->getStep(); } threadPool.wait(); for (int i = 0; i < parts.size(); i++) XenIO::delFile(parts[i]); std::cout << "Evaluation done." << std::endl; }
int SimplePPL::launch() { XenOption* opt = XenOption::getInstance(); StaticData* sD = StaticData::getInstance(); sD->getSourceCorps()->getPtrInCorp()->initialize(opt->getInSData(), opt->getSLang()); sD->getSourceCorps()->getPtrOutCorp()->initialize(opt->getOutSData(), opt->getSLang()); if (opt->getSVocab()->getFileName().compare("") == 0) { if (opt->getFullVocab()) sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getSourceCorps()->getPtrOutCorp()); else sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp()); } else sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab()); if (opt->getInSLM()->getFileName().compare("") == 0) { sD->getSourceLMs()->getPtrInLM()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getVocabs()->getPtrSourceVoc()); sD->getSourceLMs()->getPtrInLM()->createLM(); sD->getSourceLMs()->getPtrInLM()->writeLM(); } else { sD->getSourceLMs()->getPtrInLM()->initialize(opt->getInSLM(), sD->getVocabs()->getPtrSourceVoc()); sD->getSourceLMs()->getPtrInLM()->loadLM(); } if (!boost::filesystem::exists(sD->getSourceLMs()->getPtrInLM()->getFileName())) { std::cout << "Error: LM file " + sD->getSourceLMs()->getPtrInLM()->getFileName() + " does not exists!" << std::endl; return 1; } sD->getSourcePPLs()->getPtrInPPL()->initialize(sD->getSourceCorps()->getPtrOutCorp(), sD->getSourceLMs()->getPtrInLM()); sD->getSourcePPLs()->getPtrInPPL()->calcPPLCorpus(); if (opt->getWFile()->getFileName().compare("") != 0) sD->getWeightsFile()->initialize(opt->getWFile()); for (unsigned int i = 0; i < sD->getSourcePPLs()->getPtrInPPL()->getSize(); i++) { double res = sD->getSourcePPLs()->getPtrInPPL()->getPPL(i); if (opt->getWFile()->getFileName().compare("") != 0) res = res * sD->getWeightsFile()->getWeight(i); sD->getScHold()->getPtrScores()->addScore(res); } sD->getScHold()->getPtrScores()->calibrate(); if (opt->getInv()) { sD->getScHold()->getPtrScores()->inverse(); } if (opt->getTLang().compare("") == 0) { std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl; XenIO::cleanCorpusMono(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl; XenIO::writeMonoOutput(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores()); } else { boost::shared_ptr<Corpus> ptrOTCorp = boost::make_shared<Corpus>(); ptrOTCorp->initialize(opt->getOutTData(), opt->getTLang()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (unclean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl; XenIO::cleanCorpusBi(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (clean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl; XenIO::writeBiOutput(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores()); } return 0; }