Пример #1
0
std::string SymForceAligner::alignSentenceStr(std::string e, std::string f) {
    Alignment a = alignSentence(e, f);

    std::stringstream ss;
    for(Alignment::iterator it = a.begin(); it != a.end(); it++) {
        if(it != a.begin())
            ss << " ";
        ss << it->first << "-" << it->second;
    }
    return ss.str();
}
Пример #2
0
std::string SymForceAligner::getAlignmentsStr() {
    std::stringstream ss;
    for(Alignments::iterator it1 = m_alignments.begin(); it1 != m_alignments.end(); it1++) {
        Alignment a = *it1;
        for(Alignment::iterator it2 = a.begin(); it2 != a.end(); it2++) {
            if(it2 != a.begin())
                ss << " ";
            ss << it2->first << "-" << it2->second;
        }
        ss << std::endl;
    }
    return ss.str();
}
Пример #3
0
Alignment SymForceAligner::symmetrize(Alignment& a_, Alignment& b_) {
    int a[MAX_M], b[MAX_N], m, n;
    std::fill_n(a, MAX_M, 0);
    std::fill_n(b, MAX_N, 0);
    
    m = 0;
    for(Alignment::iterator it = a_.begin(); it != a_.end(); it++) {
        a[it->second+1] = it->first+1;
        if(it->second+1 > m)
            m = it->second+1;
    }
    
    n = 0;
    for(Alignment::iterator it = b_.begin(); it != b_.end(); it++) {
        b[it->second+1] = it->first+1;
        if(it->second+1 > n)
            n = it->second+1;
    }
    
    switch(m_mode) {
      case Src2Trg:
        return a_;
      case Trg2Src:
        return invert(b_);
      case Intersection:
        return cIntersection(a, m, b, n);
      case Union:
        return cUnion(a, m, b, n);
      case Grow:
        m_diagonal = false; m_final = false; m_bothuncovered = false;
        return cGrow(a, m, b, n);
      case GrowDiag:
        m_diagonal = true; m_final = false; m_bothuncovered = false;
        return cGrow(a, m, b, n);
      case GrowDiagFinal:
        m_diagonal = true; m_final = true; m_bothuncovered = false;
        return cGrow(a, m, b, n);
      case GrowDiagFinalAnd:
        m_diagonal = true; m_final = true; m_bothuncovered = true;
        return cGrow(a, m, b, n);
    }
    
    return cGrow(a, m, b, n);
}
Пример #4
0
AlignmentGraph::AlignmentGraph(const ParseTree *t,
                               const std::vector<std::string> &s,
                               const Alignment &a)
{
  // Copy the parse tree nodes and add them to m_targetNodes.
  m_root = CopyParseTree(t);

  // Create a node for each source word.
  m_sourceNodes.reserve(s.size());
  for (std::vector<std::string>::const_iterator p(s.begin());
       p != s.end(); ++p) {
    m_sourceNodes.push_back(new Node(*p, SOURCE));
  }

  // Connect source nodes to parse tree leaves according to the given word
  // alignment.
  std::vector<Node *> targetTreeLeaves;
  GetTargetTreeLeaves(m_root, targetTreeLeaves);
  for (Alignment::const_iterator p(a.begin()); p != a.end(); ++p) {
    Node *src = m_sourceNodes[p->first];
    Node *tgt = targetTreeLeaves[p->second];
    src->AddParent(tgt);
    tgt->AddChild(src);
  }

  // Attach unaligned source words (if any).
  AttachUnalignedSourceWords();

  // Populate node spans.
  std::vector<Node *>::const_iterator p(m_sourceNodes.begin());
  for (int i = 0; p != m_sourceNodes.end(); ++p, ++i) {
    (*p)->PropagateIndex(i);
  }

  // Calculate complement spans.
  CalcComplementSpans(m_root);
}
AlignmentGraph::AlignmentGraph(const ParseTree * t,
                               const std::vector<std::string> & s,
                               const Alignment & a)
{
    m_root = copyParseTree(t, m_targetNodes);

    m_sourceNodes.reserve(s.size());
    for (std::vector<std::string>::const_iterator p(s.begin());
         p != s.end(); ++p)
    {
        m_sourceNodes.push_back(new Node(*p, SOURCE));
    }

    std::vector<Node *> targetTreeLeaves;
    getTargetTreeLeaves(m_root, targetTreeLeaves);

    for (Alignment::const_iterator p(a.begin()); p != a.end(); ++p)
    {
        Node * src = m_sourceNodes[p->first];
        Node * tgt = targetTreeLeaves[p->second];
        src->addParent(tgt);
        tgt->addChild(src);
    }
}
Пример #6
0
Alignment invert(Alignment a) {
    Alignment b;
    for(Alignment::iterator it = a.begin(); it != a.end(); it++)
        b.insert(AlignmentPoint(it->second, it->first));
    return b;
} 
PhraseOrientation::PhraseOrientation(int sourceSize,
                                     int targetSize,
                                     const Alignment &alignment)
  : m_countF(sourceSize)
  , m_countE(targetSize)
{

  // prepare data structures for alignments
  std::vector<std::vector<int> > alignedToS;
  for(int i=0; i<m_countF; ++i) {
    std::vector< int > dummy;
    alignedToS.push_back(dummy);
  }
  for(int i=0; i<m_countE; ++i) {
    std::vector< int > dummy;
    m_alignedToT.push_back(dummy);
  }
  std::vector<int> alignedCountS(m_countF,0);

  for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) {
    m_alignedToT[a->second].push_back(a->first);
    alignedCountS[a->first]++;
    alignedToS[a->first].push_back(a->second);
  }

  for (int startF=0; startF<m_countF; ++startF) {
    for (int endF=startF; endF<m_countF; ++endF) {

      int minE = std::numeric_limits<int>::max();
      int maxE = -1;
      for (int fi=startF; fi<=endF; ++fi) {
        for (size_t i=0; i<alignedToS[fi].size(); ++i) {
          int ei = alignedToS[fi][i];
          if (ei<minE) {
            minE = ei;
          }
          if (ei>maxE) {
            maxE = ei;
          }
        }
      }

      m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE); 
    }
  }

  // check alignments for target phrase startE...endE
  // loop over continuous phrases which are compatible with the word alignments
  for (int startE=0; startE<m_countE; ++startE) {
    for (int endE=startE; endE<m_countE; ++endE) {

      int minF = std::numeric_limits<int>::max();
      int maxF = -1;
      std::vector< int > usedF = alignedCountS;
      for (int ei=startE; ei<=endE; ++ei) {
        for (size_t i=0; i<m_alignedToT[ei].size(); ++i) {
          int fi = m_alignedToT[ei][i];
          if (fi<minF) {
            minF = fi;
          }
          if (fi>maxF) {
            maxF = fi;
          }
          usedF[fi]--;
        }
      }

      m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF); 

      if (maxF >= 0) { // aligned to any source words at all

        // check if source words are aligned to out of bounds target words
        bool out_of_bounds = false;
        for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi)
          if (usedF[fi]>0) {
            // cout << "out of bounds: " << fi << "\n";
            out_of_bounds = true;
          }

        // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
        if (!out_of_bounds) {
          // start point of source phrase may retreat over unaligned
          for (int startF=minF;
               (startF>=0 &&
                (startF==minF || alignedCountS[startF]==0)); // unaligned
               startF--) {
            // end point of source phrase may advance over unaligned
            for (int endF=maxF;
                 (endF<m_countF &&
                  (endF==maxF || alignedCountS[endF]==0)); // unaligned
                 endF++) { // at this point we have extracted a phrase

              InsertPhraseVertices(m_topLeft, m_topRight, m_bottomLeft, m_bottomRight,
                                   startF, startE, endF, endE);
            }
          }
        }
      }
    }
  }
}