// O(n*w) void Aho_Corasick::Trie::buildForwards(const vector <Entry> &dict) { for (int i=0; i<dict.size(); i++) { addPhrase(i, dict[i]); } }
int main(int argc, char** argv) { int size = ARR_DEFAULT; Phrase * phrases = (Phrase*)malloc(size*sizeof(Phrase)); char * line; int index; while((line = getLine()) != 0) { Phrase * p = parseLine(line); phrases = addPhrase(phrases, p, index, &size); free(p); index++; } //free(line); //qsort(phrases, index, sizeof(Phrase*), phraseComp); printPhrases(phrases, index); freeBullshit(phrases, index); return 0; }
void ExtractTask::extract(SentenceAlignment &sentence) { int countE = sentence.target.size(); int countF = sentence.source.size(); HPhraseVector inboundPhrases; HSentenceVertices inTopLeft; HSentenceVertices inTopRight; HSentenceVertices inBottomLeft; HSentenceVertices inBottomRight; HSentenceVertices outTopLeft; HSentenceVertices outTopRight; HSentenceVertices outBottomLeft; HSentenceVertices outBottomRight; HSentenceVertices::const_iterator it; bool relaxLimit = m_options.isHierModel(); bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel(); // check alignments for target phrase startE...endE // loop over extracted phrases which are compatible with the word-alignments for(int startE=0; startE<countE; startE++) { for(int endE=startE; (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength)); endE++) { int minF = 9999; int maxF = -1; vector< int > usedF = sentence.alignedCountS; for(int ei=startE; ei<=endE; ei++) { for(size_t i=0; i<sentence.alignedToT[ei].size(); i++) { int fi = sentence.alignedToT[ei][i]; if (fi<minF) { minF = fi; } if (fi>maxF) { maxF = fi; } usedF[ fi ]--; } } if (maxF >= 0 && // aligned to any source words at all (relaxLimit || maxF-minF < m_options.maxPhraseLength)) { // source phrase within limits // check if source words are aligned to out of bound target words bool out_of_bounds = false; for(int fi=minF; fi<=maxF && !out_of_bounds; fi++) if (usedF[fi]>0) { // cout << "ouf of bounds: " << fi << "\n"; out_of_bounds = true; } // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n"; if (!out_of_bounds) { // start point of source phrase may retreat over unaligned for(int startF=minF; (startF>=0 && (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit (startF==minF || sentence.alignedCountS[startF]==0)); // unaligned startF--) // end point of source phrase may advance over unaligned for(int endF=maxF; (endF<countF && (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit (endF - startF + 1 > m_options.minPhraseLength) && // within length limit (endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned endF++) { // at this point we have extracted a phrase if(buildExtraStructure) { // phrase || hier if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE), HPhraseVertex(endF,endE))); insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight, startF, startE, endF, endE); } else insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight, startF, startE, endF, endE); } else { string orientationInfo = ""; if(m_options.isWordModel()) { REO_POS wordPrevOrient, wordNextOrient; bool connectedLeftTopP = isAligned( sentence, startF-1, startE-1 ); bool connectedRightTopP = isAligned( sentence, endF+1, startE-1 ); bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 ); bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 ); wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, <); wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, <, &ge); orientationInfo += getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType()); if(m_options.isAllModelsOutputFlag()) " | | "; } addPhrase(sentence, startE, endE, startF, endF, orientationInfo); } } } } } } }