Ejemplo n.º 1
0
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
    std::ostream &sourceSS,
    std::ostream &targetSS)
{
  const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
  const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();

  // Write the source side of the rule to sourceSS.
  int i = 0;
  for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
       p != sourceRHS.end(); ++p, ++i) {
    WriteSymbol(*p, sourceSS);
    sourceSS << " ";
  }
  if (m_options.conditionOnTargetLhs) {
    WriteSymbol(rule.GetTargetLHS(), sourceSS);
  } else {
    WriteSymbol(rule.GetSourceLHS(), sourceSS);
  }

  // Write the target side of the rule to targetSS.
  i = 0;
  for (std::vector<Symbol>::const_iterator p(targetRHS.begin());
       p != targetRHS.end(); ++p, ++i) {
    WriteSymbol(*p, targetSS);
    targetSS << " ";
  }
  WriteSymbol(rule.GetTargetLHS(), targetSS);
}
Ejemplo n.º 2
0
void ScfgRuleWriter::Write(const ScfgRule &rule)
{
  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  if (m_options.unpairedExtractFormat) {
    WriteUnpairedFormat(rule, sourceSS, targetSS);
  } else {
    WriteStandardFormat(rule, sourceSS, targetSS);
  }

  // Write the rule to the forward and inverse extract files.
  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";

  const Alignment &alignment = rule.GetAlignment();
  for (Alignment::const_iterator p = alignment.begin();
       p != alignment.end(); ++p) {
    m_fwd << " " << p->first << "-" << p->second;
    m_inv << " " << p->second << "-" << p->first;
  }

  // Write a count of 1 and an empty NT length column to the forward extract
  // file.
  // TODO Add option to write NT length?
  m_fwd << " ||| 1 ||| |||";
  if (m_options.pcfg) {
    // Write the PCFG score.
    m_fwd << " " << std::exp(rule.GetPcfgScore());
  }
  m_fwd << std::endl;

  // Write a count of 1 to the inverse extract file.
  m_inv << " ||| 1" << std::endl;
}
Ejemplo n.º 3
0
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
{
  const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
  const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();

  std::map<int, int> sourceToTargetNTMap;
  std::map<int, int> targetToSourceNTMap;

  const Alignment &alignment = rule.GetAlignment();

  for (Alignment::const_iterator p(alignment.begin());
       p != alignment.end(); ++p) {
    if (sourceRHS[p->first].GetType() == NonTerminal) {
      assert(targetRHS[p->second].GetType() == NonTerminal);
      sourceToTargetNTMap[p->first] = p->second;
      targetToSourceNTMap[p->second] = p->first;
    }
  }

  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  // Write the source side of the rule to sourceSS.
  int i = 0;
  for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
       p != sourceRHS.end(); ++p, ++i) {
    WriteSymbol(*p, sourceSS);
    if (p->GetType() == NonTerminal) {
      int targetIndex = sourceToTargetNTMap[i];
      WriteSymbol(targetRHS[targetIndex], sourceSS);
    }
    sourceSS << " ";
  }
  WriteSymbol(rule.GetSourceLHS(), sourceSS);

  // Write the target side of the rule to targetSS.
  i = 0;
  for (std::vector<Symbol>::const_iterator p(targetRHS.begin());
       p != targetRHS.end(); ++p, ++i) {
    if (p->GetType() == NonTerminal) {
      int sourceIndex = targetToSourceNTMap[i];
      WriteSymbol(sourceRHS[sourceIndex], targetSS);
    }
    WriteSymbol(*p, targetSS);
    targetSS << " ";
  }
  WriteSymbol(rule.GetTargetLHS(), targetSS);

  // Write the rule to the forward and inverse extract files.
  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
  for (Alignment::const_iterator p(alignment.begin());
       p != alignment.end(); ++p) {
    m_fwd << " " << p->first << "-" << p->second;
    m_inv << " " << p->second << "-" << p->first;
  }
  m_fwd << " ||| 1" << std::endl;
  m_inv << " ||| 1" << std::endl;
}
Ejemplo n.º 4
0
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
{
  const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
  const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
  const Alignment &alignment = rule.GetAlignment();

  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  // Write the source side of the rule to sourceSS.
  int i = 0;
  for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
       p != sourceRHS.end(); ++p, ++i) {
    WriteSymbol(*p, sourceSS);
    sourceSS << " ";
  }
  WriteSymbol(rule.GetSourceLHS(), sourceSS);

  // Write the target side of the rule to targetSS.
  i = 0;
  for (std::vector<Symbol>::const_iterator p(targetRHS.begin());
       p != targetRHS.end(); ++p, ++i) {
    WriteSymbol(*p, targetSS);
    targetSS << " ";
  }
  WriteSymbol(rule.GetTargetLHS(), targetSS);

  // Write the rule to the forward and inverse extract files.
  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
  for (Alignment::const_iterator p(alignment.begin());
       p != alignment.end(); ++p) {
    m_fwd << " " << p->first << "-" << p->second;
    m_inv << " " << p->second << "-" << p->first;
  }
  m_fwd << " ||| 1" << std::endl;
  m_inv << " ||| 1" << std::endl;
}
Ejemplo n.º 5
0
void ScfgRuleWriter::Write(const ScfgRule &rule, bool printEndl)
{
  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  if (m_options.unpairedExtractFormat) {
    WriteUnpairedFormat(rule, sourceSS, targetSS);
  } else {
    WriteStandardFormat(rule, sourceSS, targetSS);
  }

  // Write the rule to the forward and inverse extract files.
  m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";

  const Alignment &alignment = rule.GetAlignment();
  for (Alignment::const_iterator p = alignment.begin();
       p != alignment.end(); ++p) {
    m_fwd << " " << p->first << "-" << p->second;
    m_inv << " " << p->second << "-" << p->first;
  }

  // Write a count of 1.
  m_fwd << " ||| 1";
  m_inv << " ||| 1";

  // Write the PCFG score (if requested).
  if (m_options.pcfg) {
    m_fwd << " ||| " << std::exp(rule.GetPcfgScore());
  }

  if (printEndl) {
    m_fwd << std::endl;
    m_inv << std::endl;
  }
}
Ejemplo n.º 6
0
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
    std::ostream &sourceSS,
    std::ostream &targetSS)
{
  const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
  const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();

  std::map<int, int> sourceToTargetNTMap;
  std::map<int, int> targetToSourceNTMap;

  const Alignment &alignment = rule.GetAlignment();

  for (Alignment::const_iterator p(alignment.begin());
       p != alignment.end(); ++p) {
    if (sourceRHS[p->first].GetType() == NonTerminal) {
      assert(targetRHS[p->second].GetType() == NonTerminal);
      sourceToTargetNTMap[p->first] = p->second;
      targetToSourceNTMap[p->second] = p->first;
    }
  }

  // Write the source side of the rule to sourceSS.
  int i = 0;
  for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
       p != sourceRHS.end(); ++p, ++i) {
    WriteSymbol(*p, sourceSS);
    if (p->GetType() == NonTerminal) {
      int targetIndex = sourceToTargetNTMap[i];
      WriteSymbol(targetRHS[targetIndex], sourceSS);
    }
    sourceSS << " ";
  }
  if (m_options.conditionOnTargetLhs) {
    WriteSymbol(rule.GetTargetLHS(), sourceSS);
  } else {
    WriteSymbol(rule.GetSourceLHS(), sourceSS);
  }

  // Write the target side of the rule to targetSS.
  i = 0;
  for (std::vector<Symbol>::const_iterator p(targetRHS.begin());
       p != targetRHS.end(); ++p, ++i) {
    if (p->GetType() == NonTerminal) {
      int sourceIndex = targetToSourceNTMap[i];
      WriteSymbol(sourceRHS[sourceIndex], targetSS);
    }
    WriteSymbol(*p, targetSS);
    targetSS << " ";
  }
  WriteSymbol(rule.GetTargetLHS(), targetSS);
}
Ejemplo n.º 7
0
void ScfgRuleWriter::Write(const ScfgRule &rule, size_t lineNum, bool printEndl)
{
  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  if (m_options.unpairedExtractFormat) {
    WriteUnpairedFormat(rule, sourceSS, targetSS);
  } else {
    WriteStandardFormat(rule, sourceSS, targetSS);
  }

  // Write the rule to the forward and inverse extract files.
  if (m_options.t2s) {
    // If model is tree-to-string then flip the source and target.
    m_fwd << targetSS.str() << " ||| " << sourceSS.str() << " |||";
    m_inv << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  } else {
    m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
    m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
  }

  const Alignment &alignment = rule.GetAlignment();
  for (Alignment::const_iterator p = alignment.begin();
       p != alignment.end(); ++p) {
    if (m_options.t2s) {
      // If model is tree-to-string then flip the source and target.
      m_fwd << " " << p->second << "-" << p->first;
      m_inv << " " << p->first << "-" << p->second;
    } else {
      m_fwd << " " << p->first << "-" << p->second;
      m_inv << " " << p->second << "-" << p->first;
    }
  }

  if (m_options.includeSentenceId) {
    if (m_options.t2s) {
      m_inv << " ||| " << lineNum;
    } else {
      m_fwd << " ||| " << lineNum;
    }
  }

  // Write a count of 1.
  m_fwd << " ||| 1";
  m_inv << " ||| 1";

  // Write the PCFG score (if requested).
  if (m_options.pcfg) {
    m_fwd << " ||| " << std::exp(rule.GetPcfgScore());
  }

  m_fwd << " |||";

  if (m_options.sourceLabels && rule.HasSourceLabels()) {
    m_fwd << " {{SourceLabels";
    rule.PrintSourceLabels(m_fwd);
    m_fwd << "}}";
  }

  if (printEndl) {
    m_fwd << std::endl;
    m_inv << std::endl;
  }
}