Beispiel #1
0
void LTRTextBuilder::updateLTR_TokenFromVertex(
    const LinguisticGraphVertex& vertex,
    const LinguisticGraph& graph,
    LTR_Token* tokenRep,
    uint64_t offset) const {

    // get data from the result of the linguistic analysis
    Token* fullToken = get(vertex_token, graph, vertex);
    MorphoSyntacticData* data = get(vertex_data, graph, vertex);
    const FsaStringsPool& sp = (Common::MediaticData::MediaticData::single().stringsPool(m_language));

    sort(data->begin(),data->end(),ltNormProperty(m_macroAccessor));

    StringsPoolIndex norm(0),lastNorm(0);
    LinguisticCode macro(0),lastMacro(0);
    for (MorphoSyntacticData::const_iterator elemItr=data->begin();
         elemItr!=data->end(); elemItr++) {
        norm = elemItr->normalizedForm;
        macro = m_macroAccessor->readValue(elemItr->properties);
        if (norm == lastNorm && macro == lastMacro) {
            continue;
        }
        else {
            lastNorm=norm;
            lastMacro=macro;
            LimaString normStr= sp[norm];
            // test if the same word was not already met at this position
            bool selectionFlag = true;
            LTR_Token::const_iterator itTok = tokenRep->begin();
            while (selectionFlag && (itTok != tokenRep->end())) {
                selectionFlag = (itTok->first->getLemma() != normStr) ||
                                (itTok->first->getCategory() != macro);
                itTok ++;
            }
            if (selectionFlag) {
                // test if the current token is a plain word
                bool plainWordFlag =
                    this->isWordToSelect(normStr, macro,m_microAccessor->readValue(elemItr->properties));
                BoWToken* bowToken = new BoWToken(normStr,macro,
                                                  fullToken->position() + offset,
                                                  fullToken->length());
                bowToken->setInflectedForm(fullToken->stringForm());
                tokenRep->push_back(make_pair(bowToken, plainWordFlag));
            }
        }
    }
}
Beispiel #2
0
void DumpXMLVisitor::discover_vertex(LinguisticGraphVertex v,
                                     const LinguisticGraph& g)
{

  m_ostream << "<vertex id=\"" << v << "\">" << std::endl;
  Token* t = get(vertex_token,g,v);
  if (t != 0) 
  {
    m_ostream << "  <token>" << std::endl;
    t->outputXml(m_ostream,m_propertyCodeManager,m_stringsPool);
    m_ostream << "  </token>" << std::endl;
  }
  MorphoSyntacticData* data = get(vertex_data,g,v);
  if (data != 0 )
  {
    data->outputXml(m_ostream,m_propertyCodeManager,m_stringsPool);
  }
  m_ostream << "</vertex>" << std::endl;
}
Beispiel #3
0
void SyntacticAnalysisXmlLogger::outputVertex(const LinguisticGraphVertex v,
        const LinguisticGraph& graph,
        const uint64_t offsetBegin,
        const SyntacticData* syntacticData,
        std::ostream& xmlStream,
        std::map< LinguisticAnalysisStructure::Token*, uint64_t >& tokens,
        std::vector< bool >& alreadyDumpedTokens) const
{
    if (v == syntacticData->iterator()->firstVertex() ||
            v == syntacticData->iterator()->lastVertex())
    {
        xmlStream << "<vertex id=\"" << v << "\" />" << std::endl;
        return;
    }
    Token* token = get(vertex_token, graph, v);

    uint64_t tokenId = (*(tokens.find(token))).second;
//    bool alreadyDumped = alreadyDumpedTokens[tokenId];

    xmlStream << "<vertex id=\"" << v << "\" form=\"" << limastring2utf8stdstring(token->stringForm()) << "\" pos=\"" << getPosition(token->position(),offsetBegin) << "\" ";
    const VertexChainIdProp& chains = get(vertex_chain_id, graph,v);
    xmlStream << " >" << std::endl;
    if (chains.size() > 0)
    {
        xmlStream << "<chains>" << std::endl;
        VertexChainIdProp::const_iterator itChains, itChains_end;
        itChains = chains.begin();
        itChains_end = chains.end();
        for (; itChains != itChains_end; itChains++)
        {
            const ChainIdStruct& ids = (*itChains);
            xmlStream << "<chain type=\"";
            if (ids.chainType() == Common::MediaticData::NO_CHAIN_TYPE)
                xmlStream << "0";
            else if (ids.chainType() == Common::MediaticData::NOMINAL)
                xmlStream << "N";
            else
                xmlStream << "V";
            xmlStream << "\" id=\"" << (ids.chainId()) << "\" />" << std::endl;
        }
        xmlStream << "</chains>" << std::endl;
    }

    const DependencyGraph* depGraph = syntacticData->dependencyGraph();
    DependencyGraphVertex depV = syntacticData->depVertexForTokenVertex(v);
    if (out_degree(depV, *depGraph) > 0)
    {

        xmlStream << "<dependents>" << std::endl;
        DependencyGraphOutEdgeIt depIt, depIt_end;
        boost::tie(depIt, depIt_end) = out_edges(depV, *depGraph);
        for (; depIt != depIt_end; depIt++)
        {
            DependencyGraphVertex depTargV = target(*depIt, *depGraph);
            LinguisticGraphVertex targV = syntacticData-> tokenVertexForDepVertex(depTargV);
//             CEdgeDepChainIdPropertyMap chainsMap = get(edge_depchain_id, *depGraph);
            CEdgeDepRelTypePropertyMap relTypeMap = get(edge_deprel_type, *depGraph);
            xmlStream << "<dep v=\"" << targV;
//             xmlStream << "\" c=\"" << chainsMap[*depIt];
            std::string relName=static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getSyntacticRelationName(relTypeMap[*depIt]);
            if (relName.empty())
            {
                relName="UNKNOWN";
            }
            xmlStream << "\" t=\"" << relName << "\" />" << std::endl;
        }
        xmlStream << "</dependents>" << std::endl;
    }

    const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language);

    MorphoSyntacticData* word = get(vertex_data, graph, v);
    word->outputXml(xmlStream,*m_propertyCodeManager,sp);
    xmlStream << "<ref>" << tokenId << "</ref>" << std::endl;
    alreadyDumpedTokens[tokenId] = true;
    xmlStream << "</vertex>" << std::endl;

}