void LTRTextBuilder::updateLTR_TokenFromVertex( const LinguisticGraphVertex& vertex, const LinguisticGraph& graph, LTR_Token* tokenRep, uint64_t offset) const { // get data from the result of the linguistic analysis Token* fullToken = get(vertex_token, graph, vertex); MorphoSyntacticData* data = get(vertex_data, graph, vertex); const FsaStringsPool& sp = (Common::MediaticData::MediaticData::single().stringsPool(m_language)); sort(data->begin(),data->end(),ltNormProperty(m_macroAccessor)); StringsPoolIndex norm(0),lastNorm(0); LinguisticCode macro(0),lastMacro(0); for (MorphoSyntacticData::const_iterator elemItr=data->begin(); elemItr!=data->end(); elemItr++) { norm = elemItr->normalizedForm; macro = m_macroAccessor->readValue(elemItr->properties); if (norm == lastNorm && macro == lastMacro) { continue; } else { lastNorm=norm; lastMacro=macro; LimaString normStr= sp[norm]; // test if the same word was not already met at this position bool selectionFlag = true; LTR_Token::const_iterator itTok = tokenRep->begin(); while (selectionFlag && (itTok != tokenRep->end())) { selectionFlag = (itTok->first->getLemma() != normStr) || (itTok->first->getCategory() != macro); itTok ++; } if (selectionFlag) { // test if the current token is a plain word bool plainWordFlag = this->isWordToSelect(normStr, macro,m_microAccessor->readValue(elemItr->properties)); BoWToken* bowToken = new BoWToken(normStr,macro, fullToken->position() + offset, fullToken->length()); bowToken->setInflectedForm(fullToken->stringForm()); tokenRep->push_back(make_pair(bowToken, plainWordFlag)); } } } }
void DumpXMLVisitor::discover_vertex(LinguisticGraphVertex v, const LinguisticGraph& g) { m_ostream << "<vertex id=\"" << v << "\">" << std::endl; Token* t = get(vertex_token,g,v); if (t != 0) { m_ostream << " <token>" << std::endl; t->outputXml(m_ostream,m_propertyCodeManager,m_stringsPool); m_ostream << " </token>" << std::endl; } MorphoSyntacticData* data = get(vertex_data,g,v); if (data != 0 ) { data->outputXml(m_ostream,m_propertyCodeManager,m_stringsPool); } m_ostream << "</vertex>" << std::endl; }
void SyntacticAnalysisXmlLogger::outputVertex(const LinguisticGraphVertex v, const LinguisticGraph& graph, const uint64_t offsetBegin, const SyntacticData* syntacticData, std::ostream& xmlStream, std::map< LinguisticAnalysisStructure::Token*, uint64_t >& tokens, std::vector< bool >& alreadyDumpedTokens) const { if (v == syntacticData->iterator()->firstVertex() || v == syntacticData->iterator()->lastVertex()) { xmlStream << "<vertex id=\"" << v << "\" />" << std::endl; return; } Token* token = get(vertex_token, graph, v); uint64_t tokenId = (*(tokens.find(token))).second; // bool alreadyDumped = alreadyDumpedTokens[tokenId]; xmlStream << "<vertex id=\"" << v << "\" form=\"" << limastring2utf8stdstring(token->stringForm()) << "\" pos=\"" << getPosition(token->position(),offsetBegin) << "\" "; const VertexChainIdProp& chains = get(vertex_chain_id, graph,v); xmlStream << " >" << std::endl; if (chains.size() > 0) { xmlStream << "<chains>" << std::endl; VertexChainIdProp::const_iterator itChains, itChains_end; itChains = chains.begin(); itChains_end = chains.end(); for (; itChains != itChains_end; itChains++) { const ChainIdStruct& ids = (*itChains); xmlStream << "<chain type=\""; if (ids.chainType() == Common::MediaticData::NO_CHAIN_TYPE) xmlStream << "0"; else if (ids.chainType() == Common::MediaticData::NOMINAL) xmlStream << "N"; else xmlStream << "V"; xmlStream << "\" id=\"" << (ids.chainId()) << "\" />" << std::endl; } xmlStream << "</chains>" << std::endl; } const DependencyGraph* depGraph = syntacticData->dependencyGraph(); DependencyGraphVertex depV = syntacticData->depVertexForTokenVertex(v); if (out_degree(depV, *depGraph) > 0) { xmlStream << "<dependents>" << std::endl; DependencyGraphOutEdgeIt depIt, depIt_end; boost::tie(depIt, depIt_end) = out_edges(depV, *depGraph); for (; depIt != depIt_end; depIt++) { DependencyGraphVertex depTargV = target(*depIt, *depGraph); LinguisticGraphVertex targV = syntacticData-> tokenVertexForDepVertex(depTargV); // CEdgeDepChainIdPropertyMap chainsMap = get(edge_depchain_id, *depGraph); CEdgeDepRelTypePropertyMap relTypeMap = get(edge_deprel_type, *depGraph); xmlStream << "<dep v=\"" << targV; // xmlStream << "\" c=\"" << chainsMap[*depIt]; std::string relName=static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getSyntacticRelationName(relTypeMap[*depIt]); if (relName.empty()) { relName="UNKNOWN"; } xmlStream << "\" t=\"" << relName << "\" />" << std::endl; } xmlStream << "</dependents>" << std::endl; } const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language); MorphoSyntacticData* word = get(vertex_data, graph, v); word->outputXml(xmlStream,*m_propertyCodeManager,sp); xmlStream << "<ref>" << tokenId << "</ref>" << std::endl; alreadyDumpedTokens[tokenId] = true; xmlStream << "</vertex>" << std::endl; }