void TransfererRunner::processEdge(Lattice& lattice, Lattice::EdgeDescriptor edge) {
    std::cerr << "PROCESSING:";

    tmil::FileParsingScriptFactory* scriptFactory = new tmil::FileParsingScriptFactory;

    boost::shared_ptr<tmil::Transferer> transferer_;

    transferer_.reset(
        new tmil::Transferer(
            scriptFactory,
            lattice.getAnnotationItemManager().getZObjectsHolderPtr(),
            lattice.getAnnotationItemManager().getSymbolFactory()));

    transferer_->include(rulesFile_.string().c_str());

    EdgeToZsyntreeConverter converter(lattice);
    zsyntree* tree = converter.convertEdgeToZsyntree(edge);

    std::cerr << "  GOT SOURCE:" << tree->zsyntree_to_string() << std::endl;

    zsyntree* targetTree = transferer_->doTranslate(tree, NULL, NULL);

    std::cerr << "  GOT TARGET:" << targetTree->zsyntree_to_string() << std::endl;

    putZsyntreeIntoLattice(
        lattice,
        lattice.getLayerTagManager().createTagCollection(tags_),
        targetTree);

    putTargetForms_(
        lattice,
        targetTree,
        transferer_);
}
Beispiel #2
0
EdgeToZsyntreeConverter::EdgeToZsyntreeConverter(Lattice& latticeArg)
    :lattice(latticeArg),
     sym_fac(latticeArg.getAnnotationItemManager().getSymbolFactory()),
     holder(latticeArg.getAnnotationItemManager().getZObjectsHolderPtr()),
     lexemeTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("lexeme")),
     formTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("form")),
     parseTerminalTag_(latticeArg.getLayerTagManager().createSingletonTagCollection("parse-terminal")),
     equivMask_(lattice.getLayerTagManager().getSingletonMask("bilexicon")) {
}
Beispiel #3
0
void RuleMatcher::addPosEdges(Lattice &lattice) {
    LayerTagMask mask = lattice.getLayerTagManager().getMask(
            lattice.getLayerTagManager().
            createSingletonTagCollection("form")
            );

    std::multimap<Lattice::VertexDescriptor, std::string> posEdgesMap;
    Lattice::EdgesSortedBySourceIterator edgeIterator =
        lattice.edgesSortedBySource(mask);
    while (edgeIterator.hasNext()) {
        Lattice::EdgeDescriptor edge = edgeIterator.next();
        int start = lattice.getEdgeBeginIndex(edge);
        int end = start + lattice.getEdgeLength(edge);
        AnnotationItem annotationItem = lattice.getEdgeAnnotationItem(edge);
        if (lattice::isDiscarded(lattice, edge))
            continue; //skip discarded forms
        std::string partOfSpeech = lattice::getPartOfSpeech(lattice, edge);
        std::pair<
            std::multimap<Lattice::VertexDescriptor, std::string>::iterator,
            std::multimap<Lattice::VertexDescriptor, std::string>::iterator
                > posEdgesMapIt =
                posEdgesMap.equal_range(start);
        bool alreadyAdded = false;
        if (posEdgesMapIt.first != posEdgesMapIt.second) {
            while (posEdgesMapIt.first != posEdgesMapIt.second) {
                if (posEdgesMapIt.first->second ==
                        partOfSpeech) {
                    alreadyAdded = true;
                    break;
                }
                posEdgesMapIt.first ++;
            }
        }
        if (! alreadyAdded) {
            AnnotationItem ai(partOfSpeech);
            lattice.getAnnotationItemManager().setValue(ai, "discard", "0");
            lattice.getAnnotationItemManager().setValue(ai, "head", "0");
            Lattice::EdgeSequence::Builder seqBuilder(lattice);
            seqBuilder.addEdge(edge);
            lattice.addEdge(
                    start,
                    end,
                    ai,
                    lattice.getLayerTagManager().
                    createSingletonTagCollection("parse"),
                    seqBuilder.build()
                    );
            posEdgesMap.insert(std::pair< Lattice::VertexDescriptor, std::string>(
                        start, partOfSpeech));
        }
    }
}
void TransfererRunner::putTargetForm_(Lattice& lattice, Lattice::EdgeDescriptor edge, zvalue surf) {

    // could be a form or a token
    bool isForm = ZPAIRP(surf);

    std::string category =
        (isForm ? zvalue_to_string(ZPAIRC(surf)->getSecond()) : "T");

    zvalue text =
        (isForm ? ZPAIRC(surf)->getFirst() : surf);

    LayerTagCollection tags =
        lattice.getLayerTagManager().createTagCollection(
            isForm ? targetFormTags_ : targetTokenTags_);

    AnnotationItem annotationItem(category, StringFrag(zvalue_to_string(text)));

    lattice.getAnnotationItemManager().setValue(
        annotationItem, "SurfacePosition", formsCounter_++);

    Lattice::EdgeSequence::Builder builder(lattice);
    builder.addEdge(edge);

    Lattice::VertexDescriptor fromVertex = lattice.getEdgeSource(edge);
    Lattice::VertexDescriptor toVertex = lattice.getEdgeTarget(edge);

    lattice.addEdge(
        fromVertex,
        toVertex,
        annotationItem,
        tags,
        builder.build());
}
Beispiel #5
0
std::string RuleMatcher::generateSentenceString(Lattice &lattice,
        std::string langCode, int startVertex) {
    std::stringstream ss;
    if (startVertex == 0)
        ss << "<<s<0<0<sb<>";

    Lattice::VertexDescriptor vertex = lattice.getVertexForRawCharIndex(startVertex);
    while (vertex < lattice.getLastVertex() ) {
        std::list<Lattice::EdgeDescriptor> edges = lattice::getTopEdges(
                lattice, langCode, vertex);

        if (edges.empty()) {
            vertex ++;
            continue;
        }

        Lattice::EdgeDescriptor edge = edges.front();

        LayerTagCollection tags = lattice.getEdgeLayerTags(edge);
        if (lattice.getLayerTagManager().isThere("form", tags) ||
            lattice.getLayerTagManager().isThere("token", tags)) {
            ss << "<<t";
        } else {
            ss << "<<g";
        }

        Lattice::VertexDescriptor start = lattice.getEdgeBeginIndex(edge);
        Lattice::VertexDescriptor end = start + lattice.getEdgeLength(edge);
        ss << "<" << start;
        ss << "<" << end;
        AnnotationItem annotationItem = lattice.getEdgeAnnotationItem(edge);
        if (lattice.getLayerTagManager().isThere("parse", tags)) {
            ss << "<" << lattice.getAnnotationItemManager().
                getCategory(annotationItem);
            std::string orth = lattice.getEdgeText(edge);
            if (orth != "") {
            ss << "<" << util::escapeSpecialChars(orth);
            } else {
                ss << "<" << lattice.getAnnotationItemManager().
                getCategory(annotationItem);
            }
        } else {
            ss << "<" << "TOKEN";
            std::string orth = //lattice.getAnnotationItemManager().
                //getCategory(annotationItem);
                ////@todo: trzeba poprawic ustawianie orth dla krawedzi 'parse'
                lattice.getEdgeText(edge);
            ss << "<" << util::escapeSpecialChars(orth);
        }
        for (std::list<Lattice::EdgeDescriptor>::iterator edgeIt = edges.begin();
                edgeIt != edges.end(); ++ edgeIt) {
            AnnotationItem ai = lattice.getEdgeAnnotationItem(*edgeIt);
            if (lattice::isDiscarded(lattice, *edgeIt))
                continue; //skip discarded edges
            std::string base = lattice::getBase(lattice, *edgeIt);
            //@todo: ustawianie base nie bedzie dzialalo dla krawedzi 'parse'
            std::string morphology = lattice::getMorphologyString(
                    lattice, *edgeIt);
            ss << "<";
            ss << util::escapeSpecialChars(base);
            ss << "<";
            ss << util::escapeSpecialChars(morphology);
        }
        ss << ">";
        vertex = end;
    }
    ss << "<<s<" << vertex << "<" << vertex << "<se<>";

    return ss.str();
}