void MosesDecoder::initialize(StaticData& staticData, const std::string& source, size_t sentenceid, float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding) { m_sentence = new Sentence(); stringstream in(source + "\n"); const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder(); m_sentence->Read(in,inputFactorOrder); // set weight of BleuScoreFeature //cerr << "Reload Bleu feature weight: " << bleuObjectiveWeight*bleuScoreWeight << " (" << bleuObjectiveWeight << "*" << bleuScoreWeight << ")" << endl; staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight); m_bleuScoreFeature->SetCurrSourceLength((*m_sentence).GetSize()); if (chartDecoding) m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()-2); else m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()); if (avgRefLength) m_bleuScoreFeature->SetCurrAvgRefLength(sentenceid); else m_bleuScoreFeature->SetCurrShortestRefLength(sentenceid); m_bleuScoreFeature->SetCurrReferenceNgrams(sentenceid); }
void Eval::doEval(int high, int low) { XenOption* opt = XenOption::getInstance(); StaticData* sD = StaticData::getInstance(); if (opt->getSVocab()->getFileName().compare("") == 0) { sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp()); } else { sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab()); } pool threadPool(opt->getThreads()); int pc = low; if (pc == 0) { pc = opt->getStep(); } std::vector<std::string> parts; while (pc <= high) { EvalMap::iterator found = ptrDist->find(pc); if (found == ptrDist->end()) { std::string partName = sD->getXenResult()->getXenFile()->getDirName() + "/" + sD->getXenResult()->getXenFile()->getPrefix() + "-" + XenCommon::toString(pc) + "pc.gz"; XenIO::writeXRpart(sD->getXenResult(), pc, partName); boost::shared_ptr<Corpus> c = boost::make_shared<Corpus>(); c->initialize(partName, "xx"); threadPool.schedule( boost::bind(taskEval, pc, c, sD->getVocabs()->getPtrSourceVoc(), sD->getDevCorp(), ptrDist)); parts.push_back(partName); } pc += opt->getStep(); } threadPool.wait(); for (int i = 0; i < parts.size(); i++) XenIO::delFile(parts[i]); std::cout << "Evaluation done." << std::endl; }
int SimplePPL::launch() { XenOption* opt = XenOption::getInstance(); StaticData* sD = StaticData::getInstance(); sD->getSourceCorps()->getPtrInCorp()->initialize(opt->getInSData(), opt->getSLang()); sD->getSourceCorps()->getPtrOutCorp()->initialize(opt->getOutSData(), opt->getSLang()); if (opt->getSVocab()->getFileName().compare("") == 0) { if (opt->getFullVocab()) sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getSourceCorps()->getPtrOutCorp()); else sD->getVocabs()->getPtrSourceVoc()->initialize(sD->getSourceCorps()->getPtrInCorp()); } else sD->getVocabs()->getPtrSourceVoc()->initialize(opt->getSVocab()); if (opt->getInSLM()->getFileName().compare("") == 0) { sD->getSourceLMs()->getPtrInLM()->initialize(sD->getSourceCorps()->getPtrInCorp(), sD->getVocabs()->getPtrSourceVoc()); sD->getSourceLMs()->getPtrInLM()->createLM(); sD->getSourceLMs()->getPtrInLM()->writeLM(); } else { sD->getSourceLMs()->getPtrInLM()->initialize(opt->getInSLM(), sD->getVocabs()->getPtrSourceVoc()); sD->getSourceLMs()->getPtrInLM()->loadLM(); } if (!boost::filesystem::exists(sD->getSourceLMs()->getPtrInLM()->getFileName())) { std::cout << "Error: LM file " + sD->getSourceLMs()->getPtrInLM()->getFileName() + " does not exists!" << std::endl; return 1; } sD->getSourcePPLs()->getPtrInPPL()->initialize(sD->getSourceCorps()->getPtrOutCorp(), sD->getSourceLMs()->getPtrInLM()); sD->getSourcePPLs()->getPtrInPPL()->calcPPLCorpus(); if (opt->getWFile()->getFileName().compare("") != 0) sD->getWeightsFile()->initialize(opt->getWFile()); for (unsigned int i = 0; i < sD->getSourcePPLs()->getPtrInPPL()->getSize(); i++) { double res = sD->getSourcePPLs()->getPtrInPPL()->getPPL(i); if (opt->getWFile()->getFileName().compare("") != 0) res = res * sD->getWeightsFile()->getWeight(i); sD->getScHold()->getPtrScores()->addScore(res); } sD->getScHold()->getPtrScores()->calibrate(); if (opt->getInv()) { sD->getScHold()->getPtrScores()->inverse(); } if (opt->getTLang().compare("") == 0) { std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl; XenIO::cleanCorpusMono(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) << std::endl; XenIO::writeMonoOutput(sD->getSourceCorps()->getPtrOutCorp(), sD->getScHold()->getPtrScores()); } else { boost::shared_ptr<Corpus> ptrOTCorp = boost::make_shared<Corpus>(); ptrOTCorp->initialize(opt->getOutTData(), opt->getTLang()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (unclean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (unclean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl; XenIO::cleanCorpusBi(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores()); std::cout << "NB Scores: " + XenCommon::toString(sD->getScHold()->getPtrScores()->getSize()) + " NB Source corp (clean): " + XenCommon::toString(sD->getSourceCorps()->getPtrOutCorp()->getSize()) + " NB Target corp (clean): " + XenCommon::toString(ptrOTCorp->getSize()) << std::endl; XenIO::writeBiOutput(sD->getSourceCorps()->getPtrOutCorp(), ptrOTCorp, sD->getScHold()->getPtrScores()); } return 0; }
int main(int argc, char* argv[]) { po::options_description desc("XenC options", 200); Options opt; try { desc.add_options() ("source,s", po::value<std::string>(&opt.sLang)->required(), "source language (fr, en, ...)") ("target,t", po::value<std::string>(&opt.tLang)->default_value(""), "target language (if relevant)") ("in-stext,i", po::value<std::string>(&opt.inSData)->required(), "in-domain source text filename (plain text or gzipped file)") ("out-stext,o", po::value<std::string>(&opt.outSData)->required(), "out-of-domain source text filename (plain text or gzipped file)") ("mode,m", po::value<int>(&opt.mode)->required()->default_value(2), "filtering mode (1, 2, 3 or 4). Default is 2 (monolingual cross-entropy)") ("eval,e", po::value<bool>(&opt.eval)->zero_tokens()->default_value(false), "add this switch to evaluate a filtered file after computation. Eval is always done on source language") ("best-point,b", po::value<bool>(&opt.bp)->zero_tokens()->default_value(false), "add this switch to determinate the best point of a filtered file (eval option is implicit)") ("dev,d", po::value<std::string>(&opt.dev)->default_value(""), "source language dev file for eval or best point (all modes), if different from in-domain text") ("in-ttext", po::value<std::string>(&opt.inTData)->default_value(""), "in-domain target text filename, if target language (plain text or gzipped file)") ("out-ttext", po::value<std::string>(&opt.outTData)->default_value(""), "out-of-domain target text filename, if target language (plain text or gzipped file)") ("mono", po::value<bool>(&opt.mono)->zero_tokens()->default_value(false), "switch to force monolingual mode (if no target language)") ("stem", po::value<bool>(&opt.stem)->zero_tokens()->default_value(false), "switch to activate stem models computation and scoring from stem files") ("in-sstem", po::value<std::string>(&opt.inSStem)->default_value(""), "in-domain source stem filename (plain text or gzipped file)") ("in-tstem", po::value<std::string>(&opt.inTStem)->default_value(""), "in-domain target stem filename (plain text or gzipped file)") ("out-sstem", po::value<std::string>(&opt.outSStem)->default_value(""), "out-of-domain source stem filename (plain text or gzipped file)") ("out-tstem", po::value<std::string>(&opt.outTStem)->default_value(""), "out-of-domain target stem filename (plain text or gzipped file)") ("in-ptable", po::value<std::string>(&opt.iPTable)->default_value(""), "in-domain phrase table filename used in mode 4 scoring") ("out-ptable", po::value<std::string>(&opt.oPTable)->default_value(""), "out-of-domain phrase table filename used in mode 4 scoring") ("local", po::value<bool>(&opt.local)->zero_tokens()->default_value(false), "add a 7th score (local cross-entropy regarding the source phrase)") ("mean", po::value<bool>(&opt.mean)->zero_tokens()->default_value(false), "mean score from 3 OOD sample LMs instead of 1 in mode 2 & 3 (3 times slower + EXPERIMENTAL)") ("sim", po::value<bool>(&opt.sim)->zero_tokens()->default_value(false), "add similarity measures to score computing (EXPERIMENTAL, mode 2 only)") ("sim-only", po::value<bool>(&opt.simOnly)->zero_tokens()->default_value(false), "use only similarity measures (no cross-entropy)") ("vector-size", po::value<int>(&opt.vecSize)->default_value(150), "size of vector for similarity scores, default is 150 (WARNING: the more the slower)") ("step", po::value<int>(&opt.step)->default_value(10), "percentage steps for evaluation. Default is 10 (100%, 90%, ...)") ("s-vocab", po::value<std::string>(&opt.sVocab)->default_value(""), "source language vocab filename for LMs estimation. Default is in-domain source text vocab") ("t-vocab", po::value<std::string>(&opt.tVocab)->default_value(""), "target language vocab filename for LMs estimation. Default is in-domain target text vocab") ("full-vocab", po::value<bool>(&opt.fullVoc)->zero_tokens()->default_value(false), "use in-domain + out-of-domain vocabularies instead of in-domain only") ("in-slm", po::value<std::string>(&opt.inSLM)->default_value(""), "in-domain source language model (LM). Will be estimated if not present") ("out-slm", po::value<std::string>(&opt.outSLM)->default_value(""), "out-of-domain source language model (LM). Will be estimated if not present") ("in-tlm", po::value<std::string>(&opt.inTLM)->default_value(""), "in-domain target language model (LM). Will be estimated if not present") ("out-tlm", po::value<std::string>(&opt.outTLM)->default_value(""), "out-of-domain target language model (LM). Will be estimated if not present") ("order", po::value<int>(&opt.order)->default_value(4), "order for LMs. Default is 4") ("discount", po::value<int>(&opt.discount)->default_value(0), "discounting method for LM estimation. Default is modified KneserNey (0). 1 is GoodTuring, 2 is WittenBell.") ("to-lower", po::value<bool>(&opt.toLower)->default_value(false), "maps vocabulary to lower case for LM estimation. Useful for ASR. Default is false.") ("no-unkisword", po::value<bool>(&opt.noUnkIsWord)->default_value(false), "DO NOT consider <unk> and its probability as a word. Default is false, with respect to common practice.") ("bin-lm", po::value<int>(&opt.binLM)->default_value(1), "whether you want to estimate arpa.gz (0) or binary (1) LMs. Default is 1 (binary)") ("w-file", po::value<std::string>(&opt.wFile)->default_value(""), "filename for weighting the final score (one value per line)") ("log", po::value<bool>(&opt.log)->zero_tokens()->default_value(false), "switch to consider weights in w-file as log values") ("rev", po::value<bool>(&opt.rev)->zero_tokens()->default_value(false), "switch to require descending order sorted output") ("inv", po::value<bool>(&opt.inv)->zero_tokens()->default_value(false), "switch to require inversed calibrated scores (1 - score)") ("threads", po::value<int>(&opt.threads)->default_value(2), "number of threads to run for various operations (eval, sim, ...). Default is 2") ("sorted-only", po::value<bool>(&opt.sortOnly)->zero_tokens()->default_value(false), "switch to save space & time by only outputing the sorted scores file") ("help,h", "displays this help message") ("version,v", "displays program version"); po::variables_map vm; try { po::store(po::parse_command_line(argc, argv, desc), vm); if (vm.count("help") || argc == 1) { std::cout << "XenC version " + version + " PUBLIC RELEASE. Copyright 2013, Anthony Rousseau, LIUM, University of Le Mans, France." << std::endl << std::endl; std::cout << desc << std::endl; std::cout << "Filtering modes:" << std::endl << std::endl; std::cout << "For all modes (excepted 4), you must provide at least a source language, and in-domain and out-of-domain bitexts. Bitexts MUST NOT contain tabs." << std::endl; std::cout << "For every text file used, max words per line is 16384 and max chars per line is max words * 16." << std::endl << std::endl; std::cout << "Also, if no vocabularies and no language models are provided, they will be generated with the following parameters:" << std::endl; std::cout << "\t- vocabs:\tvocabularies will be created from words of in-domain bitexts." << std::endl; std::cout << "\t- LMs:\t\torder 4, modified kn-int smoothing, 0-0-0-0 cut-offs, sblm (binary) output format." << std::endl << std::endl; std::cout << "\t1:" << std::endl; std::cout << "\tSimple source language perplexity filtering. (Gao & al. 2002)" << std::endl; std::cout << "\tWill sort the out-of-domain bitext sentences (ascending order)" << std::endl; std::cout << "\tbased on perplexity scores given by a in-domain language model." << std::endl << std::endl; std::cout << "\t2:" << std::endl; std::cout << "\tSource language cross-entropy (Xen) difference filtering. (Moore & Lewis 2010)" << std::endl; std::cout << "\tWill sort the out-of-domain bitext sentences (ascending order)" << std::endl; std::cout << "\tbased on (in-source Xen - out-source Xen)." << std::endl << std::endl; std::cout << "\t3:" << std::endl; std::cout << "\tBilingual cross-entropy difference filtering. (Axelrod & al. 2011)" << std::endl; std::cout << "\tWill sort the out-of-domain bitext sentences (ascending order)" << std::endl; std::cout << "\tbased on (in-source Xen - out-source Xen) + (in-target Xen - out-target Xen)." << std::endl << std::endl; std::cout << "\t4:" << std::endl; std::cout << "\tPhrase-table scoring mode. (EXPERIMENTAL)" << std::endl; std::cout << "\tAdds the cross-entropy score of each phrase pair" << std::endl; std::cout << "\tin a phrase-table as a sixth feature of the table." << std::endl << std::endl; std::cout << "\tYou must provide:" << std::endl; std::cout << "\t\t- in-domain and out-of-domain phrase tables." << std::endl; std::cout << "\t\t- source and target vocabularies." << std::endl << std::endl; return 0; } if (vm.count("version")) { std::cout << "XenC version " + version + " PUBLIC RELEASE. Copyright 2013, Anthony Rousseau, LIUM, University of Le Mans, France." << std::endl; return 0; } po::notify(vm); } catch (po::error& e) { std::cout << desc << std::endl; std::cout << e.what() << std::endl; } } catch (std::exception& e) { std::cout << desc << std::endl; std::cout << e.what() << std::endl; } opt.pc = 0; opt.inToks = 0; opt.outToks = 0; if (opt.dev.compare("") == 0) { if (boost::filesystem::exists(opt.inSData)) { opt.dev = opt.inSData; } else { std::cerr << "You must at least specify a source in-domain corpus." << std::endl; return 1; } } // ----------------------------------------------------- // Create singletons & mode XenOption* xOpt = XenOption::getInstance(&opt); StaticData* sD = StaticData::getInstance(); boost::shared_ptr<Mode> mode; switch (xOpt->getMode()) { case 1: mode = boost::make_shared<SimplePPL>(); break; case 2: mode = boost::make_shared<MonoXEntropy>(); break; case 3: mode = boost::make_shared<BiXEntropy>(); break; case 4: mode = boost::make_shared<PTScoring>(); break; default: break; } // ----------------------------------------------------- opt.outName = getOutName(xOpt); std::string sC = sanityCheck(xOpt); // Check if all mandatory are here // ----------------------------------------------------- // LAST CHECK BEFORE OPERATIONS if (sC.compare("0") == 0) { std::cout << "Source language: " << opt.sLang << std::endl; if (!opt.mono) { std::cout << "Target language: " << opt.tLang << std::endl; } if (opt.mode != 4) { std::cout << "In-domain source data: " << opt.inSData << std::endl; std::cout << "Out-of-domain source data: " << opt.outSData << std::endl; if (opt.stem) { std::cout << "In-domain source stem file: " << opt.inSStem << std::endl; std::cout << "Out-of-domain source stem file: " << opt.outSStem << std::endl; } if (!opt.mono) { std::cout << "In-domain target data: " << opt.inTData << std::endl; std::cout << "Out-of-domain target data: " << opt.outTData << std::endl; if (opt.stem) { std::cout << "In-domain target stem file: " << opt.inTStem << std::endl; std::cout << "Out-of-domain target stem file: " << opt.outTStem << std::endl; } } } else { std::cout << "Source vocabulary: " << opt.sVocab << std::endl; std::cout << "Target vocabulary: " << opt.tVocab << std::endl; std::cout << "In-domain phrase table: " << opt.iPTable << std::endl; std::cout << "Out-of-domain phrase table: " << opt.oPTable << std::endl; std::cout << "Output phrase table: " << opt.outName << std::endl; } std::cout << "Mode: " << opt.mode << std::endl; } else { std::cerr << std::endl << sC << std::endl; sD->deleteInstance(); xOpt->deleteInstance(); return 1; } // ----------------------------------------------------- try { // Normal mode if (!xOpt->getEval()&& !xOpt->getBp()) { int ret = mode->launch(); if (ret == 0) { xOpt->deleteInstance(); sD->deleteInstance(); return 0; } else { std::cerr << "Something went wrong." << std::endl; xOpt->deleteInstance(); sD->deleteInstance(); return 1; } } // Eval or BP else { std::string sortedName = xOpt->getOutName() + ".sorted.gz"; std::string distName = xOpt->getOutName() + ".dist"; std::string bpName = xOpt->getOutName() + ".bp"; std::cout << "Sorted output used: " + sortedName << std::endl; // ----------------------------------------------------- // Proceed to normal mode if not done before if (!boost::filesystem::exists(sortedName)) { int ret = mode->launch(); if (ret != 0) { std::cerr << "Something went wrong." << std::endl; xOpt->deleteInstance(); sD->deleteInstance(); return 1; } } // ----------------------------------------------------- boost::shared_ptr<XenFile> sorted = boost::make_shared<XenFile>(); sorted->initialize(sortedName); sD->getXenResult()->initialize(sorted); sD->getDevCorp()->initialize(xOpt->getDev(), xOpt->getSLang()); // Eval if (xOpt->getEval()) { boost::shared_ptr<Eval> ptrEval = boost::make_shared<Eval>(); ptrEval->doEval(100, 0); int oldStep = xOpt->getStep(); xOpt->setStep(2); ptrEval->doEval(8, 0); xOpt->setStep(oldStep); XenIO::writeEval(ptrEval->getDist(), distName); } // BP else if (xOpt->getBp()) { boost::shared_ptr<Eval> ptrEval; if (boost::filesystem::exists(distName.c_str())) ptrEval = boost::make_shared<Eval>(distName); else { ptrEval = boost::make_shared<Eval>(); ptrEval->doEval(100, 0); int oldStep = xOpt->getStep(); xOpt->setStep(2); ptrEval->doEval(8, 0); xOpt->setStep(oldStep); XenIO::writeEval(ptrEval->getDist(), distName); } ptrEval->doBP(); XenIO::writeEval(ptrEval->getDist(), bpName); } else { return 1; } } } catch (XenCommon::XenCEption &e) { throw; } xOpt->deleteInstance(); sD->deleteInstance(); return 0; }
// // Constructor // VMDumper::VMDumper(const UINT_32 iInstructions, const VMInstruction * aInstructions, const StaticText & oSyscalls, const StaticData & oStaticData, const StaticText & oStaticText, const HashTable & oHashTable) { INT_32 iSyscallsDataSize = 0; if (oSyscalls.iUsedDataOffsetsSize == 0) { iSyscallsDataSize = 0; } else { const TextDataIndex & oTMP = oSyscalls.aDataOffsets[oSyscalls.iUsedDataOffsetsSize - 1]; iSyscallsDataSize = oTMP.offset + oTMP.length + 1; } INT_32 iStaticTextDataSize = 0; if (oStaticText.iUsedDataOffsetsSize == 0) { iStaticTextDataSize = 0; } else { const TextDataIndex & oTMP = oStaticText.aDataOffsets[oStaticText.iUsedDataOffsetsSize - 1]; iStaticTextDataSize = oTMP.offset + oTMP.length + 1; } const INT_32 iCodeSize = sizeof(VMInstruction) * iInstructions; const INT_32 iSyscallsIndexSize = sizeof(TextDataIndex) * oSyscalls.iUsedDataOffsetsSize; const INT_32 iStaticDataIndexSize = sizeof(StaticDataVar) * oStaticData.iUsedDataSize; const INT_32 iStaticTextIndexSize = sizeof(TextDataIndex) * oStaticText.iUsedDataOffsetsSize; const INT_32 iStaticDataBitIndexSize = sizeof(UINT_64) + oStaticData.GetBitIndex() -> GetUsedSize(); const INT_32 iCallsHashTableSize = sizeof(HashElement) * (1 << oHashTable.iPower); // Header iVMExecutableSize = AlignSegment(sizeof(VMExecutable)) + // Code segment AlignSegment(iCodeSize) + // Static syscalls segment AlignSegment(iSyscallsDataSize) + // Static syscalls index segment AlignSegment(iSyscallsIndexSize) + // Static Data segment AlignSegment(iStaticDataIndexSize) + // Static Text segment AlignSegment(iStaticTextDataSize) + // Static Text index segment AlignSegment(iStaticTextIndexSize) + // Bit index AlignSegment(iStaticDataBitIndexSize) + // Calls segment AlignSegment(iCallsHashTableSize); CHAR_P vRawData = (CHAR_P)malloc(iVMExecutableSize); // Make valgind happy memset(vRawData, '-', iVMExecutableSize); oVMExecutable = (VMExecutable *)vRawData; oVMExecutable -> magic[0] = 'C'; oVMExecutable -> magic[1] = 'T'; oVMExecutable -> magic[2] = 'P'; oVMExecutable -> magic[3] = 'P'; for(UINT_32 iI = 0; iI < 8; ++iI) { oVMExecutable -> version[iI] = 0; } oVMExecutable -> version[0] = 2; oVMExecutable -> entry_point = 0; oVMExecutable -> code_offset = AlignSegment(sizeof(VMExecutable)); oVMExecutable -> code_size = iCodeSize; // Syscalls segment // Aligned Not yet aligned oVMExecutable -> syscalls_offset = oVMExecutable -> code_offset + AlignSegment(iCodeSize); oVMExecutable -> syscalls_data_size = iSyscallsDataSize; // // Aligned Not yet aligned oVMExecutable -> syscalls_index_offset = oVMExecutable -> syscalls_offset + AlignSegment(iSyscallsDataSize); oVMExecutable -> syscalls_index_size = iSyscallsIndexSize; // Data segment // Aligned Not yet aligned oVMExecutable -> static_data_offset = oVMExecutable -> syscalls_index_offset + AlignSegment(iSyscallsIndexSize); oVMExecutable -> static_data_data_size = iStaticDataIndexSize; // Text segment // Aligned Not yet aligned oVMExecutable -> static_text_offset = oVMExecutable -> static_data_offset + AlignSegment(iStaticDataIndexSize); oVMExecutable -> static_text_data_size = iStaticTextDataSize; // // Aligned Not yet aligned oVMExecutable -> static_text_index_offset = oVMExecutable -> static_text_offset + AlignSegment(iStaticTextDataSize); oVMExecutable -> static_text_index_size = iStaticTextIndexSize; // Version 2.2+ // Offset of static data bit index oVMExecutable -> static_data_bit_index_offset = oVMExecutable -> static_text_index_offset + AlignSegment(iStaticTextIndexSize); // Offset of static data bit inde oVMExecutable -> static_data_bit_index_size = iStaticDataBitIndexSize; // Platform-dependent data (byte order) oVMExecutable -> platform = 0x4142434445464748ull; oVMExecutable -> ieee754double = 15839800103804824402926068484019465486336.0; oVMExecutable -> crc = 0; // Version 2.4+ // Calls hash table // Aligned Not yet aligned oVMExecutable -> calls_hash_table_offset = oVMExecutable -> static_data_bit_index_offset + AlignSegment(iStaticDataBitIndexSize); oVMExecutable -> calls_hash_table_size = iCallsHashTableSize; oVMExecutable -> calls_hash_table_power = oHashTable.iPower; // Copy code segment memcpy(vRawData + oVMExecutable -> code_offset, aInstructions, oVMExecutable -> code_size); // Copy syscalls, if need if (oVMExecutable -> syscalls_data_size != 0) { memcpy(vRawData + oVMExecutable -> syscalls_offset, oSyscalls.sData, oVMExecutable -> syscalls_data_size); // Copy static text index segment memcpy(vRawData + oVMExecutable -> syscalls_index_offset, oSyscalls.aDataOffsets, oVMExecutable -> syscalls_index_size); } // Copy static data segment, if need if (oVMExecutable -> static_data_data_size != 0) { // Copy static data segment memcpy(vRawData + oVMExecutable -> static_data_offset, oStaticData.aData, oVMExecutable -> static_data_data_size); } // Copy static text segment, if need if (oVMExecutable -> static_text_data_size != 0) { memcpy(vRawData + oVMExecutable -> static_text_offset, oStaticText.sData, oVMExecutable -> static_text_data_size); // Copy static text index segment memcpy(vRawData + oVMExecutable -> static_text_index_offset, oStaticText.aDataOffsets, oVMExecutable -> static_text_index_size); } // Copy static data bit index, if need if (oVMExecutable -> static_data_bit_index_size != 0) { memcpy(vRawData + oVMExecutable -> static_data_bit_index_offset, oStaticData.GetBitIndex() -> GetIndexData(), oVMExecutable -> static_data_bit_index_size); } // Version 2.4+ // Copy Calls hash table, if need if (oVMExecutable -> calls_hash_table_size != 0) { memcpy(vRawData + oVMExecutable -> calls_hash_table_offset, oHashTable.aElements, oVMExecutable -> calls_hash_table_size); } // Calculate CRC of file oVMExecutable -> crc = crc32((UCCHAR_P)oVMExecutable, iVMExecutableSize); }