void TransfererRunner::processEdge(Lattice& lattice, Lattice::EdgeDescriptor edge) { std::cerr << "PROCESSING:"; tmil::FileParsingScriptFactory* scriptFactory = new tmil::FileParsingScriptFactory; boost::shared_ptr<tmil::Transferer> transferer_; transferer_.reset( new tmil::Transferer( scriptFactory, lattice.getAnnotationItemManager().getZObjectsHolderPtr(), lattice.getAnnotationItemManager().getSymbolFactory())); transferer_->include(rulesFile_.string().c_str()); EdgeToZsyntreeConverter converter(lattice); zsyntree* tree = converter.convertEdgeToZsyntree(edge); std::cerr << " GOT SOURCE:" << tree->zsyntree_to_string() << std::endl; zsyntree* targetTree = transferer_->doTranslate(tree, NULL, NULL); std::cerr << " GOT TARGET:" << targetTree->zsyntree_to_string() << std::endl; putZsyntreeIntoLattice( lattice, lattice.getLayerTagManager().createTagCollection(tags_), targetTree); putTargetForms_( lattice, targetTree, transferer_); }
EdgeToZsyntreeConverter::EdgeToZsyntreeConverter(Lattice& latticeArg) :lattice(latticeArg), sym_fac(latticeArg.getAnnotationItemManager().getSymbolFactory()), holder(latticeArg.getAnnotationItemManager().getZObjectsHolderPtr()), lexemeTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("lexeme")), formTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("form")), parseTerminalTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("parse-terminal")), equivMask_(lattice.getLayerTagManager().getSingletonMask("bilexicon")) { }
void RuleMatcher::addPosEdges(Lattice &lattice) { LayerTagMask mask = lattice.getLayerTagManager().getMask( lattice.getLayerTagManager(). createSingletonTagCollection("form") ); std::multimap<Lattice::VertexDescriptor, std::string> posEdgesMap; Lattice::EdgesSortedBySourceIterator edgeIterator = lattice.edgesSortedBySource(mask); while (edgeIterator.hasNext()) { Lattice::EdgeDescriptor edge = edgeIterator.next(); int start = lattice.getEdgeBeginIndex(edge); int end = start + lattice.getEdgeLength(edge); AnnotationItem annotationItem = lattice.getEdgeAnnotationItem(edge); if (lattice::isDiscarded(lattice, edge)) continue; //skip discarded forms std::string partOfSpeech = lattice::getPartOfSpeech(lattice, edge); std::pair< std::multimap<Lattice::VertexDescriptor, std::string>::iterator, std::multimap<Lattice::VertexDescriptor, std::string>::iterator > posEdgesMapIt = posEdgesMap.equal_range(start); bool alreadyAdded = false; if (posEdgesMapIt.first != posEdgesMapIt.second) { while (posEdgesMapIt.first != posEdgesMapIt.second) { if (posEdgesMapIt.first->second == partOfSpeech) { alreadyAdded = true; break; } posEdgesMapIt.first ++; } } if (! alreadyAdded) { AnnotationItem ai(partOfSpeech); lattice.getAnnotationItemManager().setValue(ai, "discard", "0"); lattice.getAnnotationItemManager().setValue(ai, "head", "0"); Lattice::EdgeSequence::Builder seqBuilder(lattice); seqBuilder.addEdge(edge); lattice.addEdge( start, end, ai, lattice.getLayerTagManager(). createSingletonTagCollection("parse"), seqBuilder.build() ); posEdgesMap.insert(std::pair< Lattice::VertexDescriptor, std::string>( start, partOfSpeech)); } } }
void TransfererRunner::putTargetForm_(Lattice& lattice, Lattice::EdgeDescriptor edge, zvalue surf) { // could be a form or a token bool isForm = ZPAIRP(surf); std::string category = (isForm ? zvalue_to_string(ZPAIRC(surf)->getSecond()) : "T"); zvalue text = (isForm ? ZPAIRC(surf)->getFirst() : surf); LayerTagCollection tags = lattice.getLayerTagManager().createTagCollection( isForm ? targetFormTags_ : targetTokenTags_); AnnotationItem annotationItem(category, StringFrag(zvalue_to_string(text))); lattice.getAnnotationItemManager().setValue( annotationItem, "SurfacePosition", formsCounter_++); Lattice::EdgeSequence::Builder builder(lattice); builder.addEdge(edge); Lattice::VertexDescriptor fromVertex = lattice.getEdgeSource(edge); Lattice::VertexDescriptor toVertex = lattice.getEdgeTarget(edge); lattice.addEdge( fromVertex, toVertex, annotationItem, tags, builder.build()); }
std::string RuleMatcher::generateSentenceString(Lattice &lattice, std::string langCode, int startVertex) { std::stringstream ss; if (startVertex == 0) ss << "<<s<0<0<sb<>"; Lattice::VertexDescriptor vertex = lattice.getVertexForRawCharIndex(startVertex); while (vertex < lattice.getLastVertex() ) { std::list<Lattice::EdgeDescriptor> edges = lattice::getTopEdges( lattice, langCode, vertex); if (edges.empty()) { vertex ++; continue; } Lattice::EdgeDescriptor edge = edges.front(); LayerTagCollection tags = lattice.getEdgeLayerTags(edge); if (lattice.getLayerTagManager().isThere("form", tags) || lattice.getLayerTagManager().isThere("token", tags)) { ss << "<<t"; } else { ss << "<<g"; } Lattice::VertexDescriptor start = lattice.getEdgeBeginIndex(edge); Lattice::VertexDescriptor end = start + lattice.getEdgeLength(edge); ss << "<" << start; ss << "<" << end; AnnotationItem annotationItem = lattice.getEdgeAnnotationItem(edge); if (lattice.getLayerTagManager().isThere("parse", tags)) { ss << "<" << lattice.getAnnotationItemManager(). getCategory(annotationItem); std::string orth = lattice.getEdgeText(edge); if (orth != "") { ss << "<" << util::escapeSpecialChars(orth); } else { ss << "<" << lattice.getAnnotationItemManager(). getCategory(annotationItem); } } else { ss << "<" << "TOKEN"; std::string orth = //lattice.getAnnotationItemManager(). //getCategory(annotationItem); ////@todo: trzeba poprawic ustawianie orth dla krawedzi 'parse' lattice.getEdgeText(edge); ss << "<" << util::escapeSpecialChars(orth); } for (std::list<Lattice::EdgeDescriptor>::iterator edgeIt = edges.begin(); edgeIt != edges.end(); ++ edgeIt) { AnnotationItem ai = lattice.getEdgeAnnotationItem(*edgeIt); if (lattice::isDiscarded(lattice, *edgeIt)) continue; //skip discarded edges std::string base = lattice::getBase(lattice, *edgeIt); //@todo: ustawianie base nie bedzie dzialalo dla krawedzi 'parse' std::string morphology = lattice::getMorphologyString( lattice, *edgeIt); ss << "<"; ss << util::escapeSpecialChars(base); ss << "<"; ss << util::escapeSpecialChars(morphology); } ss << ">"; vertex = end; } ss << "<<s<" << vertex << "<" << vertex << "<se<>"; return ss.str(); }