示例#1
0
int SimplePPL::launch() {
    XenOption* opt = XenOption::getInstance();
    StaticData* sD = StaticData::getInstance();
    
    sD->getSourceCorps()->getPtrInCorp()->initialize(opt->getInSData(), opt->getSLang());
    sD->getSourceCorps()->getPtrOutCorp()->initialize(opt->getOutSData(), opt->getSLang());
    
    if (opt->getSVocab()->getFileName().compare("") == 0) {
        if (opt->getFullVocab())
            sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getSourceCorps()->getPtrOutCorp());
        else
            sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp());
    }
    else
        sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab());
    
    if (opt->getInSLM()->getFileName().compare("") == 0) {
        sD->getSourceLMs()->getPtrInLM()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getVocabs()->getPtrSourceVoc());
        sD->getSourceLMs()->getPtrInLM()->createLM();
        sD->getSourceLMs()->getPtrInLM()->writeLM();
    }
    else {
        sD->getSourceLMs()->getPtrInLM()->initialize(opt->getInSLM(), sD->getVocabs()->getPtrSourceVoc());
        sD->getSourceLMs()->getPtrInLM()->loadLM();
    }
    
    if (!boost::filesystem::exists(sD->getSourceLMs()->getPtrInLM()->getFileName())) {
        std::cout << "Error: LM file " + sD->getSourceLMs()->getPtrInLM()->getFileName() + " does not exists!" << std::endl;
        return 1;
    }
    
    sD->getSourcePPLs()->getPtrInPPL()->initialize(sD->getSourceCorps()->getPtrOutCorp(), sD->getSourceLMs()->getPtrInLM());
    sD->getSourcePPLs()->getPtrInPPL()->calcPPLCorpus();
    
    if (opt->getWFile()->getFileName().compare("") != 0)
        sD->getWeightsFile()->initialize(opt->getWFile());
    
    for (unsigned int i = 0; i < sD->getSourcePPLs()->getPtrInPPL()->getSize(); i++) {
        double res = sD->getSourcePPLs()->getPtrInPPL()->getPPL(i);
        
        if (opt->getWFile()->getFileName().compare("") != 0)
            res = res * sD->getWeightsFile()->getWeight(i);
        
        sD->getScHold()->getPtrScores()->addScore(res);
    }
    
    sD->getScHold()->getPtrScores()->calibrate();
    if (opt->getInv()) { sD->getScHold()->getPtrScores()->inverse(); }
    
    if (opt->getTLang().compare("") == 0) {
        std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl;
        XenIO::cleanCorpusMono(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores());
        std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl;
        XenIO::writeMonoOutput(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores());
    }
    else {
        boost::shared_ptr<Corpus> ptrOTCorp = boost::make_shared<Corpus>();
        ptrOTCorp->initialize(opt->getOutTData(), opt->getTLang());
        std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (unclean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl;
        XenIO::cleanCorpusBi(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores());
        std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (clean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl;
        XenIO::writeBiOutput(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores());
    }
    
    return 0;
}
示例#2
0
文件: eval.cpp 项目: gp-huang/XenC
void Eval::doEval(int high, int low) {
    XenOption* opt = XenOption::getInstance();
    StaticData* sD = StaticData::getInstance();
    
    if (opt->getSVocab()->getFileName().compare("") == 0) { sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp()); }
    else { sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab()); }
    
    pool threadPool(opt->getThreads());
    
    int pc = low;
    if (pc == 0) { pc = opt->getStep(); }

    std::vector<std::string> parts;

	while (pc <= high) {
        EvalMap::iterator found = ptrDist->find(pc);
        if (found == ptrDist->end()) {
            std::string partName = sD->getXenResult()->getXenFile()->getDirName() + "/" + sD->getXenResult()->getXenFile()->getPrefix() + "-" + XenCommon::toString(pc) + "pc.gz";
            XenIO::writeXRpart(sD->getXenResult(), pc, partName);

            boost::shared_ptr<Corpus> c = boost::make_shared<Corpus>();
            c->initialize(partName, "xx");

            threadPool.schedule(
                    boost::bind(taskEval, pc, c, sD->getVocabs()->getPtrSourceVoc(), sD->getDevCorp(),
                                ptrDist));

            parts.push_back(partName);
        }
        pc += opt->getStep();
	}
    
    threadPool.wait();

    for (int i = 0; i < parts.size(); i++)
        XenIO::delFile(parts[i]);

    std::cout << "Evaluation done." << std::endl;
}