Alignment SymForceAligner::symmetrize(Alignment& a_, Alignment& b_) { int a[MAX_M], b[MAX_N], m, n; std::fill_n(a, MAX_M, 0); std::fill_n(b, MAX_N, 0); m = 0; for(Alignment::iterator it = a_.begin(); it != a_.end(); it++) { a[it->second+1] = it->first+1; if(it->second+1 > m) m = it->second+1; } n = 0; for(Alignment::iterator it = b_.begin(); it != b_.end(); it++) { b[it->second+1] = it->first+1; if(it->second+1 > n) n = it->second+1; } switch(m_mode) { case Src2Trg: return a_; case Trg2Src: return invert(b_); case Intersection: return cIntersection(a, m, b, n); case Union: return cUnion(a, m, b, n); case Grow: m_diagonal = false; m_final = false; m_bothuncovered = false; return cGrow(a, m, b, n); case GrowDiag: m_diagonal = true; m_final = false; m_bothuncovered = false; return cGrow(a, m, b, n); case GrowDiagFinal: m_diagonal = true; m_final = true; m_bothuncovered = false; return cGrow(a, m, b, n); case GrowDiagFinalAnd: m_diagonal = true; m_final = true; m_bothuncovered = true; return cGrow(a, m, b, n); } return cGrow(a, m, b, n); }
std::string SymForceAligner::alignSentenceStr(std::string e, std::string f) { Alignment a = alignSentence(e, f); std::stringstream ss; for(Alignment::iterator it = a.begin(); it != a.end(); it++) { if(it != a.begin()) ss << " "; ss << it->first << "-" << it->second; } return ss.str(); }
std::string SymForceAligner::getAlignmentsStr() { std::stringstream ss; for(Alignments::iterator it1 = m_alignments.begin(); it1 != m_alignments.end(); it1++) { Alignment a = *it1; for(Alignment::iterator it2 = a.begin(); it2 != a.end(); it2++) { if(it2 != a.begin()) ss << " "; ss << it2->first << "-" << it2->second; } ss << std::endl; } return ss.str(); }
AlignmentGraph::AlignmentGraph(const ParseTree *t, const std::vector<std::string> &s, const Alignment &a) { // Copy the parse tree nodes and add them to m_targetNodes. m_root = CopyParseTree(t); // Create a node for each source word. m_sourceNodes.reserve(s.size()); for (std::vector<std::string>::const_iterator p(s.begin()); p != s.end(); ++p) { m_sourceNodes.push_back(new Node(*p, SOURCE)); } // Connect source nodes to parse tree leaves according to the given word // alignment. std::vector<Node *> targetTreeLeaves; GetTargetTreeLeaves(m_root, targetTreeLeaves); for (Alignment::const_iterator p(a.begin()); p != a.end(); ++p) { Node *src = m_sourceNodes[p->first]; Node *tgt = targetTreeLeaves[p->second]; src->AddParent(tgt); tgt->AddChild(src); } // Attach unaligned source words (if any). AttachUnalignedSourceWords(); // Populate node spans. std::vector<Node *>::const_iterator p(m_sourceNodes.begin()); for (int i = 0; p != m_sourceNodes.end(); ++p, ++i) { (*p)->PropagateIndex(i); } // Calculate complement spans. CalcComplementSpans(m_root); }
AlignmentGraph::AlignmentGraph(const ParseTree * t, const std::vector<std::string> & s, const Alignment & a) { m_root = copyParseTree(t, m_targetNodes); m_sourceNodes.reserve(s.size()); for (std::vector<std::string>::const_iterator p(s.begin()); p != s.end(); ++p) { m_sourceNodes.push_back(new Node(*p, SOURCE)); } std::vector<Node *> targetTreeLeaves; getTargetTreeLeaves(m_root, targetTreeLeaves); for (Alignment::const_iterator p(a.begin()); p != a.end(); ++p) { Node * src = m_sourceNodes[p->first]; Node * tgt = targetTreeLeaves[p->second]; src->addParent(tgt); tgt->addChild(src); } }
Alignment invert(Alignment a) { Alignment b; for(Alignment::iterator it = a.begin(); it != a.end(); it++) b.insert(AlignmentPoint(it->second, it->first)); return b; }
PhraseOrientation::PhraseOrientation(int sourceSize, int targetSize, const Alignment &alignment) : m_countF(sourceSize) , m_countE(targetSize) { // prepare data structures for alignments std::vector<std::vector<int> > alignedToS; for(int i=0; i<m_countF; ++i) { std::vector< int > dummy; alignedToS.push_back(dummy); } for(int i=0; i<m_countE; ++i) { std::vector< int > dummy; m_alignedToT.push_back(dummy); } std::vector<int> alignedCountS(m_countF,0); for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) { m_alignedToT[a->second].push_back(a->first); alignedCountS[a->first]++; alignedToS[a->first].push_back(a->second); } for (int startF=0; startF<m_countF; ++startF) { for (int endF=startF; endF<m_countF; ++endF) { int minE = std::numeric_limits<int>::max(); int maxE = -1; for (int fi=startF; fi<=endF; ++fi) { for (size_t i=0; i<alignedToS[fi].size(); ++i) { int ei = alignedToS[fi][i]; if (ei<minE) { minE = ei; } if (ei>maxE) { maxE = ei; } } } m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE); } } // check alignments for target phrase startE...endE // loop over continuous phrases which are compatible with the word alignments for (int startE=0; startE<m_countE; ++startE) { for (int endE=startE; endE<m_countE; ++endE) { int minF = std::numeric_limits<int>::max(); int maxF = -1; std::vector< int > usedF = alignedCountS; for (int ei=startE; ei<=endE; ++ei) { for (size_t i=0; i<m_alignedToT[ei].size(); ++i) { int fi = m_alignedToT[ei][i]; if (fi<minF) { minF = fi; } if (fi>maxF) { maxF = fi; } usedF[fi]--; } } m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF); if (maxF >= 0) { // aligned to any source words at all // check if source words are aligned to out of bounds target words bool out_of_bounds = false; for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi) if (usedF[fi]>0) { // cout << "out of bounds: " << fi << "\n"; out_of_bounds = true; } // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n"; if (!out_of_bounds) { // start point of source phrase may retreat over unaligned for (int startF=minF; (startF>=0 && (startF==minF || alignedCountS[startF]==0)); // unaligned startF--) { // end point of source phrase may advance over unaligned for (int endF=maxF; (endF<m_countF && (endF==maxF || alignedCountS[endF]==0)); // unaligned endF++) { // at this point we have extracted a phrase InsertPhraseVertices(m_topLeft, m_topRight, m_bottomLeft, m_bottomRight, startF, startE, endF, endE); } } } } } } }