Exemplo n.º 1
0
void LabelDeEn(const Phrase &source, ostream &out)
{
  Ranges ranges;

  // find ranges to label
  for (int start = 0; start < source.size(); ++start) {
    for (int end = start; end < source.size(); ++end) {
     if (IsA(source, start, -1, 1, "VAFIN")
          && IsA(source, end, +1, 1, "VVINF VVPP")
          && !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
       Range range(start, end, "reorder-label");
       ranges.push_back(range);
      }
      else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
          && IsA(source, end, +1, 0, "zu")
          && IsA(source, end, +2, 1, "VVINF")
          && !Contains(source, start, end, 1, "$,")) {
        Range range(start, end, "reorder-label");
        ranges.push_back(range);
      }
    }
  }

  OutputWithLabels(source, ranges, out);
}
Exemplo n.º 2
0
BleuStats ComputeBleuStats(const Phrase &hyp, const Phrase& ref)
{
  const size_t refSize = ref.size();
  NGramTree refTree;
  for (size_t pos = 0; pos < refSize; ++pos)
  {
    BuildNGramTree(ref, refTree, pos, refSize, 0);
  }
  const size_t hypSize = hyp.size();
  BleuStats lineStats(hypSize, 0);

  NGramTree hypTree;
  for (size_t pos = 0; pos < hypSize; ++pos)
  {
    BuildNGramTree(hyp, hypTree, pos, hypSize, 0);
  }

  CountNGrams(hypTree, refTree, 0, lineStats.m_counts);
//        cout << "Ref: " << ref << endl;
//        cout << "Hyp: " << hyp << endl;
//        cout << "Stats: ";
//        cout << lineStats.counts[0] << ", ";
//        cout << lineStats.counts[1] << ", ";
//        cout << lineStats.counts[2] << ", ";
//        cout << lineStats.counts[3] << endl;
  return lineStats;
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
                                     SyntaxTree &tree,
                                     const pugi::xml_node &parentNode,
                                     const Parameter &params)
{
  int childNum = 0;
  for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
    string nodeName = childNode.name();

    // span label
    string label;
    int startPos = output.size();

    if (!nodeName.empty()) {
      pugi::xml_attribute attribute = childNode.attribute("label");
      label = attribute.as_string();

      // recursively call this function. For proper recursive trees
      XMLParse(output, tree, childNode, params);
    }



    // fill phrase vector
    string text = childNode.value();
    Escape(text);
    //cerr << childNum << " " << label << "=" << text << endl;

    std::vector<string> toks;
    Moses::Tokenize(toks, text);

    for (size_t i = 0; i < toks.size(); ++i) {
      const string &tok = toks[i];
      Word *word = new Word(output.size(), tok);
      output.push_back(word);
    }

    // is it a labelled span?
    int endPos = output.size() - 1;

    // fill syntax labels
    if (!label.empty()) {
      label = "[" + label + "]";
      tree.Add(startPos, endPos, label, params);
    }

    ++childNum;
  }

}
Exemplo n.º 4
0
size_t Found(const Phrase &source, int pos, int factor, const std::string &str)
{
  const size_t MAX_RANGE = 10;

  vector<string> soughts = Moses::Tokenize(str, " ");
  vector<string> puncts = Moses::Tokenize(". : , ;", " ");


  size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE);
  for (size_t i = pos + 1; i < maxEnd; ++i) {
	const Word &word = source[i];
	bool found;

	found = Found(word, factor, puncts);
	if (found) {
		return std::numeric_limits<size_t>::max();
	}

	found = Found(word, factor, soughts);
	if (found) {
		return i;
	}
  }

  return std::numeric_limits<size_t>::max();
}
Exemplo n.º 5
0
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
{
	std::vector<string> toks;
	Moses::Tokenize(toks, line);

	vec.resize(toks.size());
	for (size_t i = 0; i < vec.size(); ++i) {
		const string &tok = toks[i];
		Word *word = new Word(i, tok);
		vec[i] = word;
	}
}
Exemplo n.º 6
0
double IBMModelOne::distance( const Phrase& hu, const Phrase& en ) const
{
  double val = log(1.0+hu.size()) / en.size() ;

  double huRatio = 1.0 / hu.size();

  for ( int enPos=0; enPos<en.size(); ++enPos )
  {
    double sum = 0;
    const Word& enWord = en[enPos];

    for ( int huPos=0; huPos<hu.size(); ++huPos )
    {
      sum += lookup( hu[huPos], enWord );
    }

    massert( sum>0 );

    val -= log(sum);
  }

  throw "unimplemented";
}
Exemplo n.º 7
0
void ClientXMLDocument::phraseToXML(const Phrase & phrase)
{
	int pint(0);
	  string pmode("");
	  if (cl("-inv").optionflag) {
		outs << "  <SENTENCE Type=\"" << phrase.back().second << "\" Inv=\"" << cl("-inv").optionarg << "\">" << endl;
	  }
	  else {
		outs << "  <SENTENCE Type=\"" << phrase.back().second << "\">" << endl;
	  }
	  for(unsigned i=0; i<phrase.size()-1; i++) {
	    if(phrase[i+1].first == TOKEN_PHRASE_DELIMITER) {
	      pint = 2;
	      switch (phrase[i+1].second[0]) {
	      case ':' : pmode="."; break;
	      default : pmode="?"; break;
	      }
	    }
	    if(phrase[i+1].first == TOKEN_SENTENCE_DELIMITER) {
	      pint = 5;
	      switch (phrase[i+1].second[0]) {
	      case '?' : pmode="?"; break;
	      default : pmode="."; break;
	      }
	    }
	    outs << "   <WORD Orth=\"" << phrase[i].second << "\" PInt=\"" <<pint
		<< "\" PMode=\"" << pmode << "\"></WORD>" << endl;
	    if(pint) {
	      pint = 0;
	      pmode="";
	      i++;
	    }
	  }
	  outs << "  </SENTENCE>" << endl;

}
Exemplo n.º 8
0
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out)
{
  Ranges ranges;

  // find ranges to label
  for (int start = 0; start < source.size(); ++start) {
	size_t end = std::numeric_limits<size_t>::max();

	if (IsA(source, start, 0, 0, "ask asked asking")) {
		end = Found(source, start, 0, "out");
    }
	else if (IsA(source, start, 0, 0, "back backed backing")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "blow blown blew")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "break broke broken")) {
		end = Found(source, start, 0, "down up in");
	}
	else if (IsA(source, start, 0, 0, "bring brought bringing")) {
		end = Found(source, start, 0, "down up in");
	}
	else if (IsA(source, start, 0, 0, "call called calling")) {
		end = Found(source, start, 0, "back up off");
	}
	else if (IsA(source, start, 0, 0, "check checked checking")) {
		end = Found(source, start, 0, "out in");
	}
	else if (IsA(source, start, 0, 0, "cheer cheered cheering")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "cross crossed crossing")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "cut cutting")) {
		end = Found(source, start, 0, "down off out");
	}
	else if (IsA(source, start, 0, 0, "do did done")) {
		end = Found(source, start, 0, "over up");
	}
	else if (IsA(source, start, 0, 0, "drop dropped dropping")) {
		end = Found(source, start, 0, "off");
	}
	else if (IsA(source, start, 0, 0, "figure figured figuring")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "fill filled filling")) {
		end = Found(source, start, 0, "in out up");
	}
	else if (IsA(source, start, 0, 0, "find found finding")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "get got getting gotten")) {
		end = Found(source, start, 0, "across over back");
	}
	else if (IsA(source, start, 0, 0, "give given gave giving")) {
		end = Found(source, start, 0, "away back out up");
	}
	else if (IsA(source, start, 0, 0, "hand handed handing")) {
		end = Found(source, start, 0, "down in over");
	}
	else if (IsA(source, start, 0, 0, "hold held holding")) {
		end = Found(source, start, 0, "back up");
	}
	else if (IsA(source, start, 0, 0, "keep kept keeping")) {
		end = Found(source, start, 0, "from up");
	}
	else if (IsA(source, start, 0, 0, "let letting")) {
		end = Found(source, start, 0, "down in");
	}
	else if (IsA(source, start, 0, 0, "look looked looking")) {
		end = Found(source, start, 0, "over up");
	}
	else if (IsA(source, start, 0, 0, "make made making")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "mix mixed mixing")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "pass passed passing")) {
		end = Found(source, start, 0, "out up");
	}
	else if (IsA(source, start, 0, 0, "pay payed paying")) {
		end = Found(source, start, 0, "back");
	}
	else if (IsA(source, start, 0, 0, "pick picked picking")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "point pointed pointing")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "put putting")) {
		end = Found(source, start, 0, "down off out together on");
	}
	else if (IsA(source, start, 0, 0, "send sending")) {
		end = Found(source, start, 0, "back");
	}
	else if (IsA(source, start, 0, 0, "set setting")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "sort sorted sorting")) {
		end = Found(source, start, 0, "out");
	}
	else if (IsA(source, start, 0, 0, "switch switched switching")) {
		end = Found(source, start, 0, "off on");
	}
	else if (IsA(source, start, 0, 0, "take took taking")) {
		end = Found(source, start, 0, "apart back off out");
	}
	else if (IsA(source, start, 0, 0, "tear torn tearing")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "think thought thinking")) {
		end = Found(source, start, 0, "over");
	}
	else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) {
		end = Found(source, start, 0, "away");
	}
	else if (IsA(source, start, 0, 0, "turn turned turning")) {
		end = Found(source, start, 0, "down off on");
	}
	else if (IsA(source, start, 0, 0, "try tried trying")) {
		end = Found(source, start, 0, "on out");
	}
	else if (IsA(source, start, 0, 0, "use used using")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "warm warmed warming")) {
		end = Found(source, start, 0, "up");
	}
	else if (IsA(source, start, 0, 0, "work worked working")) {
		end = Found(source, start, 0, "out");
	}

	// found range to label
	if (end != std::numeric_limits<size_t>::max() &&
			end > start + 1) {
		bool add = true;
		if (revision == 1 && Exist(source,
									start + 1,
									end - 1,
									1,
									"VB VBD VBG VBN VBP VBZ")) {
			// there's a verb in between
			add = false;
		}

		if (add) {
			Range range(start + 1, end - 1, "reorder-label");
			ranges.push_back(range);
		}
	}
  }

  OutputWithLabels(source, ranges, out);
}
Exemplo n.º 9
0
void Demidify::disectPhrase(Song *song, size_t trackNo,
                            int prog_base, int max_prog_delta)
{
    if ((*song)[trackNo]->size() == 0) return;

    Phrase *phrase = (*(*song)[trackNo])[0]->phrase();
    if (!phrase) return;

    if (verbose >= 2)
    {
        out << "    |    +- Disecting Phrase '" << phrase->title()
            << "' which has "
            << phrase->size() << " events and is in Track " << trackNo
            << ".\n";
    }

    // STEP ZERO
    // Remove program changes and other fun from the Phrase.

    if (pullTrackParameters && phrase->size())
    {
        if (verbose >= 2) out << "    |    +- Pulling Track parameters.\n";
        PhraseEdit pe;
        pe.reset(phrase);
        size_t    pos = 0;
        bool      culled = false;
        MidiEvent e;
        do
        {
            if (pos < pe.size())
            {
                e = pe[pos];
                switch (e.data.status)
                {
                    case MidiCommand_ProgramChange:
                        (*song)[trackNo]->params()->setProgram(e.data.data1);
                        (*song)[trackNo]->filter()->setChannel(e.data.channel);
                        (*song)[trackNo]->filter()->setPort(e.data.port);
                        pe.erase(pos);
                        culled = true;
                        break;
                    case MidiCommand_ControlChange:
                        switch (e.data.data1)
                        {
                            case MidiControl_BankSelectMSB:
                                (*song)[trackNo]->params()->setBankMSB
                                    (e.data.data2);
                                pe.erase(pos);
                                culled = true;
                                break;
                            case MidiControl_BankSelectLSB:
                                (*song)[trackNo]->params()->setBankLSB
                                    (e.data.data2);
                                pe.erase(pos);
                                culled = true;
                                break;
                            case MidiControl_PanMSB:
                                (*song)[trackNo]->params()->setPan
                                    (e.data.data2);
                                pe.erase(pos);
                                culled = true;
                                break;
                            case MidiControl_ReverbDepth:
                                (*song)[trackNo]->params()->setReverb
                                    (e.data.data2);
                                pe.erase(pos);
                                culled = true;
                                break;
                            case MidiControl_ChorusDepth:
                                (*song)[trackNo]->params()->setChorus
                                    (e.data.data2);
                                pe.erase(pos);
                                culled = true;
                                break;
                            default:
                                ++pos;
                        }
                        break;
                    default:
                        ++pos;
                        break;
                }
            }
        }
        while (pos < pe.size() && e.data.status != MidiCommand_NoteOn);
        if (culled)
        {
            if (verbose >= 2)
                out << "    |    |    |\n"
                    << "    |    |    +- Culled some Track information\n";
            Phrase *newPhrase = pe.createPhrase(song->phraseList());
            replacePhrase(song, phrase, newPhrase);
            phrase = newPhrase;
        }
    }

    // STEP ONE:
    // Remove the original MidiImport Part from the Track.

    if (verbose >= 2) out << "    |    +- Removing original Part\n";
    Part *part = (*((*song)[trackNo]))[0];
    (*song)[trackNo]->remove(part);
    delete part;

    if (phrase->size() == 0) return;

    // STEP TWO:
    // Break the Phrase up into smaller Phrase chunks.

    if (verbose >= 2) out << "    |    +- Breaking up Phrase\n";

    Clock startTime = (*phrase)[0].time; // start time of Phrase
    Clock endTime;                       // end time of Phrase
    for (size_t pos = 0; pos < phrase->size(); ++pos)
    {
        if ((*phrase)[pos].time > endTime)
            endTime = (*phrase)[pos].time;
        if ((*phrase)[pos].data.status == MidiCommand_NoteOn
            && (*phrase)[pos].offTime > endTime)
            endTime = (*phrase)[pos].offTime;
    }

    size_t pos = 0;
    size_t noParts = 0;
    for (Clock partStart = (startTime / partSize) * partSize;
         partStart < endTime && pos < phrase->size();
         partStart += partSize)
    {
        if (progress)
            progress->progress(prog_base + pos*max_prog_delta/phrase->size());

        PhraseEdit pe;
        MidiEvent  e, lastE;
        do
        {
            e = (*phrase)[pos];
            if (e.time < partStart + partSize)
            {
                MidiEvent e2 = e;
                e2.time    -= partStart;
                if (e2.data.status == MidiCommand_NoteOn)
                    e2.offTime -= partStart;
                pe.insert(e2);
                ++pos;
                lastE = e;
            }
        }
        while (pos < phrase->size() && e.time < partStart + partSize);

        if (pe.size())
        {
            if (verbose >= 3)
                out << "    |    |    +- Created Phrase between "
                    << partStart << "-" << partStart+lastE.time << " with "
                    << pe.size() << " events\n";

            // If there is an identical Phrase already in this Track,
            // use that, otherwise create a new Phrase.

            Phrase *newPhrase = 0;

            for (size_t plpos = 0; plpos < song->phraseList()->size(); ++plpos)
            {
                if (identical(&pe, (*song->phraseList())[plpos]))
                {
                    newPhrase = (*song->phraseList())[plpos];
                    break;
                }
            }

            if (newPhrase == 0)
            {
                newPhrase = pe.createPhrase(song->phraseList());
            }

            Part *newPart = new Part;
            newPart->setStart(partStart);
            newPart->setEnd(partStart+partSize);
            newPart->setPhrase(newPhrase);

            (*song)[trackNo]->insert(newPart);

            ++noParts;
        }
        else
        {
            if (verbose >= 3)
                out << "    |    |    +- No Phrase at "
                    << partStart << "\n";
        }
    }
    if (verbose >= 2)
        out << "    |    |    |    +- split into " << noParts
            << " Parts\n";

    // STEP THREE:
    // Look at the Part we've instered. Can any be reduced to repeats in Parts?

    if (compactParts)
    {
        if (verbose >= 2) out << "    |    +- Reducing repeated Parts\n";
        reduceParts(song, trackNo);
    }

    // STEP FOUR:
    // Look at the pattern of Phrases (ignoring the repeated ones).
    // Can we make any bigger Phrases?

    if (aggressive)
    {
        if (verbose >= 2)
            out << "    |    +- Extending Parts/Phrases (there are "
                << (*song)[trackNo]->size() << " Parts)\n";
        size_t pos = 0;
        while (pos < (*song)[trackNo]->size() - 4)
        {
            if (!matchParts(song, trackNo, pos)) ++pos;
        }
        // Try the repeat thing again?
        if (compactParts) reduceParts(song, trackNo);
    }


    // STEP FIVE:
    // Now remove the original Phrase.

    if (verbose >= 2) out << "    |    +- Removing original Phrase\n";
    song->phraseList()->erase(phrase);


    if (verbose >= 2) out << "    |    +- Phrase disection done\n";
}