std::string SemanticRelationsXmlLogger:: vertexStringForSemanticAnnotation(const std::string& vertexRole, const AnnotationGraphVertex& vertex, const VertexTokenPropertyMap& tokenMap, AnnotationData* annotationData, uint64_t offset) const { ostringstream oss; // get id of the corresponding vertex in analysis graph LinguisticGraphVertex v; if (!annotationData->hasIntAnnotation(vertex,Common::Misc::utf8stdstring2limastring(m_graph))) { // SEMLOGINIT; // LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it." << LENDL; return ""; } v = annotationData->intAnnotation(vertex,Common::Misc::utf8stdstring2limastring(m_graph)); LinguisticAnalysisStructure::Token* vToken = tokenMap[v]; // LDEBUG << "SemanticRelationsXmlLogger tokenMap[" << v << "] = " << vToken << LENDL; if (vToken == 0) { SEMLOGINIT; LERROR << "Vertex " << v << " has no entry in the analysis graph token map. This should not happen !!" << LENDL; return ""; } // get annotation : element in relation can be an entity => get entity type // otherwise, its type is "token" std::string type("token"); std::set< uint32_t > matches = annotationData->matches(m_graph,v,"annot"); for (std::set< uint32_t >::const_iterator it = matches.begin(); it != matches.end(); it++) { if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SpecificEntity"))) { const SpecificEntityAnnotation* annot = 0; try { annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SpecificEntity")) .pointerValue<SpecificEntityAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticAnnotation" << LENDL; continue; } type=Common::Misc::limastring2utf8stdstring(Common::LinguisticData::LinguisticData::single().getEntityName(annot->getType())); break; } } oss << " <" << vertexRole << " type=\"" << type << "\"" << " pos=\"" << offset+vToken->position() << "\"" << " len=\"" << vToken->length() << "\"" << " string=\"" << vToken->stringForm() << "\"" << "/>" << endl; return oss.str(); }
void SpecificEntitiesXmlLogger:: outputEntity(std::ostream& out, LinguisticGraphVertex v, const SpecificEntityAnnotation* annot, const VertexTokenPropertyMap& tokenMap, uint64_t offset) const { LinguisticAnalysisStructure::Token* vToken = tokenMap[v]; // LDEBUG << "SpecificEntitiesXmlLogger tokenMap[" << v << "] = " << vToken; if (vToken == 0) { SELOGINIT; LERROR << "Vertex " << v << " has no entry in the analysis graph token map. This should not happen !!"; } else { if (m_compactFormat) { // same format as RecognizerResultLogger out << "<entity>" //<< "<pos>" << offset+annot->getPosition() << "</pos>" << "<pos>" << offset+annot->getPosition() << "</pos>" << "<len>" << annot->getLength() << "</len>" //<< "<typeNum>" << (*m).getType() << "</typeNum>" << "<type>" << Common::MediaticData::MediaticData::single().getEntityName(annot->getType()).toUtf8().data() << "</type>" << "<string>"<< Common::Misc::transcodeToXmlEntities(vToken->stringForm()).toUtf8().data() << "</string>" << "<norm>"; const Automaton::EntityFeatures& features=annot->getFeatures(); for (Automaton::EntityFeatures::const_iterator featureItr=features.begin(),features_end=features.end(); featureItr!=features_end; featureItr++) { out << "<" << featureItr->getName() << ">" << Common::Misc::limastring2utf8stdstring(Common::Misc::transcodeToXmlEntities(Common::Misc::utf8stdstring2limastring(featureItr->getValueString()))) << "</" << featureItr->getName() << ">"; } out << "</norm>" << "</entity>" << endl; } else { // recuperer le vertex morph en question dans le graphe morph // recuperer la chaine, la position et la longueur pour ce vertex morph out << "<specific_entity>" << endl; out << " <string>" << Common::Misc::transcodeToXmlEntities(vToken->stringForm()).toUtf8().data() << "</string>" << endl; out << " <position>" << vToken->position()<< "</position>" << endl; out << " <length>" << vToken->length() << "</length>" << endl; out << " <type>" << Common::MediaticData::MediaticData::single().getEntityName(annot->getType()) << "</type>" << endl; out << "</specific_entity>" << endl; } } }