int main() { std::unordered_map< std::string, int > dictionary; //std::ifstream input("../data/corpus/europarl.lowercased.en", std::ios::in); int phrases[][3] = { { 0, 1, 2 }, { 0, 2, 3 }, { 2, 3, 1 }, }; /*for (std::string line; std::getline(input, line); ) { std::istringstream iss(line); Phrase phrase; for (std::string word; iss >> word; ) phrase.push_back(getId(dictionary, word)); phrases.push_back(phrase); if (phrases.size() == 1000) break; }*/ std::vector< Phrase > phs; for (int i = 0; i < 3; ++i) phs.push_back(Phrase(phrases[i], phrases[i+1])); LanguageModel model(learn_ngram_language_model(phs, 4, 2, 0.0001)); for (int i = 0; i < 4; ++i) { for (int j= 0; j < 4; ++j) { Phrase phrase; phrase.push_back(i); phrase.push_back(j); printf("%d %d %lf\n", i, j, model.get_probability(phrase)); } Phrase phrase; phrase.push_back(i); printf("%d %lf\n", i, model.get_probability(phrase)); } Phrase phrase; phrase.push_back(2); phrase.push_back(3); phrase.push_back(1); printf("2 3 1 %lf\n", model.get_probability(phrase)); model.save("lmodel.bin"); if (!(model == load_ngram_language_model("lmodel.bin"))) throw 42; }
void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pugi::xml_node &parentNode, const Parameter ¶ms) { int childNum = 0; for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); // span label string label; int startPos = output.size(); if (!nodeName.empty()) { pugi::xml_attribute attribute = childNode.attribute("label"); label = attribute.as_string(); // recursively call this function. For proper recursive trees XMLParse(output, tree, childNode, params); } // fill phrase vector string text = childNode.value(); Escape(text); //cerr << childNum << " " << label << "=" << text << endl; std::vector<string> toks; Moses::Tokenize(toks, text); for (size_t i = 0; i < toks.size(); ++i) { const string &tok = toks[i]; Word *word = new Word(output.size(), tok); output.push_back(word); } // is it a labelled span? int endPos = output.size() - 1; // fill syntax labels if (!label.empty()) { label = "[" + label + "]"; tree.Add(startPos, endPos, label, params); } ++childNum; } }