void SpecificEntitiesXmlLogger::
outputEntity(std::ostream& out, 
             LinguisticGraphVertex v,
             const SpecificEntityAnnotation* annot,
             const VertexTokenPropertyMap& tokenMap,
             uint64_t offset) const
{
  LinguisticAnalysisStructure::Token* vToken = tokenMap[v];
  //       LDEBUG << "SpecificEntitiesXmlLogger tokenMap[" << v << "] = " << vToken;
  if (vToken == 0)
  {
    SELOGINIT;
    LERROR << "Vertex " << v << " has no entry in the analysis graph token map. This should not happen !!";
  }
  else
  {
    if (m_compactFormat) {
      // same format as RecognizerResultLogger
      out 
      << "<entity>" 
      //<< "<pos>" << offset+annot->getPosition() << "</pos>" 
      << "<pos>" << offset+annot->getPosition() << "</pos>" 
      << "<len>" << annot->getLength() << "</len>" 
      //<< "<typeNum>" << (*m).getType() << "</typeNum>"
      << "<type>" 
      << Common::MediaticData::MediaticData::single().getEntityName(annot->getType()).toUtf8().data()
      << "</type>"
      << "<string>"<< Common::Misc::transcodeToXmlEntities(vToken->stringForm()).toUtf8().data() << "</string>"
      << "<norm>";
      const Automaton::EntityFeatures& features=annot->getFeatures();
      for (Automaton::EntityFeatures::const_iterator 
        featureItr=features.begin(),features_end=features.end();
      featureItr!=features_end; featureItr++)
      {
        out << "<" << featureItr->getName() << ">"
        << Common::Misc::limastring2utf8stdstring(Common::Misc::transcodeToXmlEntities(Common::Misc::utf8stdstring2limastring(featureItr->getValueString())))
        << "</" << featureItr->getName() << ">";
      }
      out << "</norm>"
      << "</entity>"
      << endl;
    }
    else {
      // recuperer le vertex morph en question dans le graphe morph
      // recuperer la chaine, la position et la longueur pour ce vertex morph
      out << "<specific_entity>" << endl;
      out << "  <string>" << Common::Misc::transcodeToXmlEntities(vToken->stringForm()).toUtf8().data() << "</string>" << endl;
      out << "  <position>" << vToken->position()<< "</position>" << endl;
      out << "  <length>" << vToken->length() << "</length>" << endl;
      out << "  <type>" 
      << Common::MediaticData::MediaticData::single().getEntityName(annot->getType())
      << "</type>" << endl;
      out << "</specific_entity>" << endl;
    }
  }
}
示例#2
0
std::string SemanticRelationsXmlLogger::
vertexStringForSemanticAnnotation(const std::string& vertexRole, 
                                  const AnnotationGraphVertex& vertex,
                                  const VertexTokenPropertyMap& tokenMap,
                                  AnnotationData* annotationData,
                                  uint64_t offset) const
{
  ostringstream oss;

  // get id of the corresponding vertex in analysis graph
  LinguisticGraphVertex v;
  if (!annotationData->hasIntAnnotation(vertex,Common::Misc::utf8stdstring2limastring(m_graph)))
  {
    // SEMLOGINIT;
    // LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it." << LENDL;
    return "";
  }
  v = annotationData->intAnnotation(vertex,Common::Misc::utf8stdstring2limastring(m_graph));
  LinguisticAnalysisStructure::Token* vToken = tokenMap[v];
  //       LDEBUG << "SemanticRelationsXmlLogger tokenMap[" << v << "] = " << vToken << LENDL;
  if (vToken == 0)
  {
    SEMLOGINIT;
    LERROR << "Vertex " << v << " has no entry in the analysis graph token map. This should not happen !!" << LENDL;
    return "";
  }

  // get annotation : element in relation can be an entity => get entity type
  // otherwise, its type is "token"
  std::string type("token");

  std::set< uint32_t > matches = annotationData->matches(m_graph,v,"annot");
  for (std::set< uint32_t >::const_iterator it = matches.begin();
       it != matches.end(); it++)
  {
    if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SpecificEntity"))) {
      const SpecificEntityAnnotation* annot = 0;
      try {
        annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SpecificEntity"))
          .pointerValue<SpecificEntityAnnotation>();
      }
      catch (const boost::bad_any_cast& e) {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticAnnotation" << LENDL;
        continue;
      }
      type=Common::Misc::limastring2utf8stdstring(Common::LinguisticData::LinguisticData::single().getEntityName(annot->getType()));
      break;
    }
  }

  oss << "  <" << vertexRole 
      << " type=\"" << type << "\"" 
      << " pos=\"" << offset+vToken->position() << "\"" 
      << " len=\"" << vToken->length() << "\"" 
      << " string=\"" << vToken->stringForm() << "\"" 
      << "/>" << endl;
  return oss.str();
}