void LabelDeEn(const Phrase &source, ostream &out) { Ranges ranges; // find ranges to label for (int start = 0; start < source.size(); ++start) { for (int end = start; end < source.size(); ++end) { if (IsA(source, start, -1, 1, "VAFIN") && IsA(source, end, +1, 1, "VVINF VVPP") && !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) { Range range(start, end, "reorder-label"); ranges.push_back(range); } else if ((start == 0 || IsA(source, start, -1, 1, "$,")) && IsA(source, end, +1, 0, "zu") && IsA(source, end, +2, 1, "VVINF") && !Contains(source, start, end, 1, "$,")) { Range range(start, end, "reorder-label"); ranges.push_back(range); } } } OutputWithLabels(source, ranges, out); }
BleuStats ComputeBleuStats(const Phrase &hyp, const Phrase& ref) { const size_t refSize = ref.size(); NGramTree refTree; for (size_t pos = 0; pos < refSize; ++pos) { BuildNGramTree(ref, refTree, pos, refSize, 0); } const size_t hypSize = hyp.size(); BleuStats lineStats(hypSize, 0); NGramTree hypTree; for (size_t pos = 0; pos < hypSize; ++pos) { BuildNGramTree(hyp, hypTree, pos, hypSize, 0); } CountNGrams(hypTree, refTree, 0, lineStats.m_counts); // cout << "Ref: " << ref << endl; // cout << "Hyp: " << hyp << endl; // cout << "Stats: "; // cout << lineStats.counts[0] << ", "; // cout << lineStats.counts[1] << ", "; // cout << lineStats.counts[2] << ", "; // cout << lineStats.counts[3] << endl; return lineStats; }
void AlignedSentenceSyntax::XMLParse(Phrase &output, SyntaxTree &tree, const pugi::xml_node &parentNode, const Parameter ¶ms) { int childNum = 0; for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); // span label string label; int startPos = output.size(); if (!nodeName.empty()) { pugi::xml_attribute attribute = childNode.attribute("label"); label = attribute.as_string(); // recursively call this function. For proper recursive trees XMLParse(output, tree, childNode, params); } // fill phrase vector string text = childNode.value(); Escape(text); //cerr << childNum << " " << label << "=" << text << endl; std::vector<string> toks; Moses::Tokenize(toks, text); for (size_t i = 0; i < toks.size(); ++i) { const string &tok = toks[i]; Word *word = new Word(output.size(), tok); output.push_back(word); } // is it a labelled span? int endPos = output.size() - 1; // fill syntax labels if (!label.empty()) { label = "[" + label + "]"; tree.Add(startPos, endPos, label, params); } ++childNum; } }
size_t Found(const Phrase &source, int pos, int factor, const std::string &str) { const size_t MAX_RANGE = 10; vector<string> soughts = Moses::Tokenize(str, " "); vector<string> puncts = Moses::Tokenize(". : , ;", " "); size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE); for (size_t i = pos + 1; i < maxEnd; ++i) { const Word &word = source[i]; bool found; found = Found(word, factor, puncts); if (found) { return std::numeric_limits<size_t>::max(); } found = Found(word, factor, soughts); if (found) { return i; } } return std::numeric_limits<size_t>::max(); }
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line) { std::vector<string> toks; Moses::Tokenize(toks, line); vec.resize(toks.size()); for (size_t i = 0; i < vec.size(); ++i) { const string &tok = toks[i]; Word *word = new Word(i, tok); vec[i] = word; } }
double IBMModelOne::distance( const Phrase& hu, const Phrase& en ) const { double val = log(1.0+hu.size()) / en.size() ; double huRatio = 1.0 / hu.size(); for ( int enPos=0; enPos<en.size(); ++enPos ) { double sum = 0; const Word& enWord = en[enPos]; for ( int huPos=0; huPos<hu.size(); ++huPos ) { sum += lookup( hu[huPos], enWord ); } massert( sum>0 ); val -= log(sum); } throw "unimplemented"; }
void ClientXMLDocument::phraseToXML(const Phrase & phrase) { int pint(0); string pmode(""); if (cl("-inv").optionflag) { outs << " <SENTENCE Type=\"" << phrase.back().second << "\" Inv=\"" << cl("-inv").optionarg << "\">" << endl; } else { outs << " <SENTENCE Type=\"" << phrase.back().second << "\">" << endl; } for(unsigned i=0; i<phrase.size()-1; i++) { if(phrase[i+1].first == TOKEN_PHRASE_DELIMITER) { pint = 2; switch (phrase[i+1].second[0]) { case ':' : pmode="."; break; default : pmode="?"; break; } } if(phrase[i+1].first == TOKEN_SENTENCE_DELIMITER) { pint = 5; switch (phrase[i+1].second[0]) { case '?' : pmode="?"; break; default : pmode="."; break; } } outs << " <WORD Orth=\"" << phrase[i].second << "\" PInt=\"" <<pint << "\" PMode=\"" << pmode << "\"></WORD>" << endl; if(pint) { pint = 0; pmode=""; i++; } } outs << " </SENTENCE>" << endl; }
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out) { Ranges ranges; // find ranges to label for (int start = 0; start < source.size(); ++start) { size_t end = std::numeric_limits<size_t>::max(); if (IsA(source, start, 0, 0, "ask asked asking")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "back backed backing")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "blow blown blew")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "break broke broken")) { end = Found(source, start, 0, "down up in"); } else if (IsA(source, start, 0, 0, "bring brought bringing")) { end = Found(source, start, 0, "down up in"); } else if (IsA(source, start, 0, 0, "call called calling")) { end = Found(source, start, 0, "back up off"); } else if (IsA(source, start, 0, 0, "check checked checking")) { end = Found(source, start, 0, "out in"); } else if (IsA(source, start, 0, 0, "cheer cheered cheering")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "cross crossed crossing")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "cut cutting")) { end = Found(source, start, 0, "down off out"); } else if (IsA(source, start, 0, 0, "do did done")) { end = Found(source, start, 0, "over up"); } else if (IsA(source, start, 0, 0, "drop dropped dropping")) { end = Found(source, start, 0, "off"); } else if (IsA(source, start, 0, 0, "figure figured figuring")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "fill filled filling")) { end = Found(source, start, 0, "in out up"); } else if (IsA(source, start, 0, 0, "find found finding")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "get got getting gotten")) { end = Found(source, start, 0, "across over back"); } else if (IsA(source, start, 0, 0, "give given gave giving")) { end = Found(source, start, 0, "away back out up"); } else if (IsA(source, start, 0, 0, "hand handed handing")) { end = Found(source, start, 0, "down in over"); } else if (IsA(source, start, 0, 0, "hold held holding")) { end = Found(source, start, 0, "back up"); } else if (IsA(source, start, 0, 0, "keep kept keeping")) { end = Found(source, start, 0, "from up"); } else if (IsA(source, start, 0, 0, "let letting")) { end = Found(source, start, 0, "down in"); } else if (IsA(source, start, 0, 0, "look looked looking")) { end = Found(source, start, 0, "over up"); } else if (IsA(source, start, 0, 0, "make made making")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "mix mixed mixing")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "pass passed passing")) { end = Found(source, start, 0, "out up"); } else if (IsA(source, start, 0, 0, "pay payed paying")) { end = Found(source, start, 0, "back"); } else if (IsA(source, start, 0, 0, "pick picked picking")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "point pointed pointing")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "put putting")) { end = Found(source, start, 0, "down off out together on"); } else if (IsA(source, start, 0, 0, "send sending")) { end = Found(source, start, 0, "back"); } else if (IsA(source, start, 0, 0, "set setting")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "sort sorted sorting")) { end = Found(source, start, 0, "out"); } else if (IsA(source, start, 0, 0, "switch switched switching")) { end = Found(source, start, 0, "off on"); } else if (IsA(source, start, 0, 0, "take took taking")) { end = Found(source, start, 0, "apart back off out"); } else if (IsA(source, start, 0, 0, "tear torn tearing")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "think thought thinking")) { end = Found(source, start, 0, "over"); } else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) { end = Found(source, start, 0, "away"); } else if (IsA(source, start, 0, 0, "turn turned turning")) { end = Found(source, start, 0, "down off on"); } else if (IsA(source, start, 0, 0, "try tried trying")) { end = Found(source, start, 0, "on out"); } else if (IsA(source, start, 0, 0, "use used using")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "warm warmed warming")) { end = Found(source, start, 0, "up"); } else if (IsA(source, start, 0, 0, "work worked working")) { end = Found(source, start, 0, "out"); } // found range to label if (end != std::numeric_limits<size_t>::max() && end > start + 1) { bool add = true; if (revision == 1 && Exist(source, start + 1, end - 1, 1, "VB VBD VBG VBN VBP VBZ")) { // there's a verb in between add = false; } if (add) { Range range(start + 1, end - 1, "reorder-label"); ranges.push_back(range); } } } OutputWithLabels(source, ranges, out); }
void Demidify::disectPhrase(Song *song, size_t trackNo, int prog_base, int max_prog_delta) { if ((*song)[trackNo]->size() == 0) return; Phrase *phrase = (*(*song)[trackNo])[0]->phrase(); if (!phrase) return; if (verbose >= 2) { out << " | +- Disecting Phrase '" << phrase->title() << "' which has " << phrase->size() << " events and is in Track " << trackNo << ".\n"; } // STEP ZERO // Remove program changes and other fun from the Phrase. if (pullTrackParameters && phrase->size()) { if (verbose >= 2) out << " | +- Pulling Track parameters.\n"; PhraseEdit pe; pe.reset(phrase); size_t pos = 0; bool culled = false; MidiEvent e; do { if (pos < pe.size()) { e = pe[pos]; switch (e.data.status) { case MidiCommand_ProgramChange: (*song)[trackNo]->params()->setProgram(e.data.data1); (*song)[trackNo]->filter()->setChannel(e.data.channel); (*song)[trackNo]->filter()->setPort(e.data.port); pe.erase(pos); culled = true; break; case MidiCommand_ControlChange: switch (e.data.data1) { case MidiControl_BankSelectMSB: (*song)[trackNo]->params()->setBankMSB (e.data.data2); pe.erase(pos); culled = true; break; case MidiControl_BankSelectLSB: (*song)[trackNo]->params()->setBankLSB (e.data.data2); pe.erase(pos); culled = true; break; case MidiControl_PanMSB: (*song)[trackNo]->params()->setPan (e.data.data2); pe.erase(pos); culled = true; break; case MidiControl_ReverbDepth: (*song)[trackNo]->params()->setReverb (e.data.data2); pe.erase(pos); culled = true; break; case MidiControl_ChorusDepth: (*song)[trackNo]->params()->setChorus (e.data.data2); pe.erase(pos); culled = true; break; default: ++pos; } break; default: ++pos; break; } } } while (pos < pe.size() && e.data.status != MidiCommand_NoteOn); if (culled) { if (verbose >= 2) out << " | | |\n" << " | | +- Culled some Track information\n"; Phrase *newPhrase = pe.createPhrase(song->phraseList()); replacePhrase(song, phrase, newPhrase); phrase = newPhrase; } } // STEP ONE: // Remove the original MidiImport Part from the Track. if (verbose >= 2) out << " | +- Removing original Part\n"; Part *part = (*((*song)[trackNo]))[0]; (*song)[trackNo]->remove(part); delete part; if (phrase->size() == 0) return; // STEP TWO: // Break the Phrase up into smaller Phrase chunks. if (verbose >= 2) out << " | +- Breaking up Phrase\n"; Clock startTime = (*phrase)[0].time; // start time of Phrase Clock endTime; // end time of Phrase for (size_t pos = 0; pos < phrase->size(); ++pos) { if ((*phrase)[pos].time > endTime) endTime = (*phrase)[pos].time; if ((*phrase)[pos].data.status == MidiCommand_NoteOn && (*phrase)[pos].offTime > endTime) endTime = (*phrase)[pos].offTime; } size_t pos = 0; size_t noParts = 0; for (Clock partStart = (startTime / partSize) * partSize; partStart < endTime && pos < phrase->size(); partStart += partSize) { if (progress) progress->progress(prog_base + pos*max_prog_delta/phrase->size()); PhraseEdit pe; MidiEvent e, lastE; do { e = (*phrase)[pos]; if (e.time < partStart + partSize) { MidiEvent e2 = e; e2.time -= partStart; if (e2.data.status == MidiCommand_NoteOn) e2.offTime -= partStart; pe.insert(e2); ++pos; lastE = e; } } while (pos < phrase->size() && e.time < partStart + partSize); if (pe.size()) { if (verbose >= 3) out << " | | +- Created Phrase between " << partStart << "-" << partStart+lastE.time << " with " << pe.size() << " events\n"; // If there is an identical Phrase already in this Track, // use that, otherwise create a new Phrase. Phrase *newPhrase = 0; for (size_t plpos = 0; plpos < song->phraseList()->size(); ++plpos) { if (identical(&pe, (*song->phraseList())[plpos])) { newPhrase = (*song->phraseList())[plpos]; break; } } if (newPhrase == 0) { newPhrase = pe.createPhrase(song->phraseList()); } Part *newPart = new Part; newPart->setStart(partStart); newPart->setEnd(partStart+partSize); newPart->setPhrase(newPhrase); (*song)[trackNo]->insert(newPart); ++noParts; } else { if (verbose >= 3) out << " | | +- No Phrase at " << partStart << "\n"; } } if (verbose >= 2) out << " | | | +- split into " << noParts << " Parts\n"; // STEP THREE: // Look at the Part we've instered. Can any be reduced to repeats in Parts? if (compactParts) { if (verbose >= 2) out << " | +- Reducing repeated Parts\n"; reduceParts(song, trackNo); } // STEP FOUR: // Look at the pattern of Phrases (ignoring the repeated ones). // Can we make any bigger Phrases? if (aggressive) { if (verbose >= 2) out << " | +- Extending Parts/Phrases (there are " << (*song)[trackNo]->size() << " Parts)\n"; size_t pos = 0; while (pos < (*song)[trackNo]->size() - 4) { if (!matchParts(song, trackNo, pos)) ++pos; } // Try the repeat thing again? if (compactParts) reduceParts(song, trackNo); } // STEP FIVE: // Now remove the original Phrase. if (verbose >= 2) out << " | +- Removing original Phrase\n"; song->phraseList()->erase(phrase); if (verbose >= 2) out << " | +- Phrase disection done\n"; }