LimaStatusCode SpecificEntitiesXmlLogger::process(
  AnalysisContent& analysis) const
{
  SELOGINIT;
  LDEBUG << "SpecificEntitiesXmlLogger::process";
  TimeUtils::updateCurrentTime();

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData == 0) {
    SELOGINIT;
    LERROR << "no annotationData ! abort";
    return MISSING_DATA;
  }
  
  
  LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph));
  if (graphp == 0) {
    SELOGINIT;
    LERROR << "no graph "<< m_graph <<" ! abort";
    return MISSING_DATA;
  }
  const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp;
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SELOGINIT;
      LERROR << "no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }

  DumperStream* dstream=initialize(analysis);
  ostream& out=dstream->out();

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  if (m_compactFormat) {
    out << "<entities docid=\"" << docId
    << "\" offsetNode=\"" << offsetIndexingNode 
    << "\" offset=\"" << offset
    << "\">" << endl;
  }
  else {
    out << "<specific_entities>" << endl;
  }
//   SELOGINIT;

  if (m_followGraph) {
    // instead of looking to all annotations, follow the graph (in
    // morphological graph, some vertices are not related to main graph:
    // idiomatic expressions parts and named entity parts)
    // -> this will not include nested entities

    AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
    if (tokenList==0) {
      LERROR << "graph " << m_graph << " has not been produced: check pipeline";
      return MISSING_DATA;
    }
    LinguisticGraph* graph=tokenList->getGraph();
    //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language);
    
    std::queue<LinguisticGraphVertex> toVisit;
    std::set<LinguisticGraphVertex> visited;
    toVisit.push(tokenList->firstVertex());
    
    LinguisticGraphOutEdgeIt outItr,outItrEnd;
    while (!toVisit.empty()) {
      LinguisticGraphVertex v=toVisit.front();
      toVisit.pop();
      if (v == tokenList->lastVertex()) {
        continue;
      }
      
      for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) 
      {
        LinguisticGraphVertex next=target(*outItr,*graph);
        if (visited.find(next)==visited.end())
        {
          visited.insert(next);
          toVisit.push(next);
        }
      }
      const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData);
      if (annot != 0) {
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }
  else {
    // take all annotations
    AnnotationGraphVertexIt itv, itv_end;
    boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
    for (; itv != itv_end; itv++)
    {
      //     LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv;
      if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")))
      {
        //       LDEBUG << "    it has SpecificEntityAnnotation";
        const SpecificEntityAnnotation* annot = 0;
        try
        {
          annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))
          .pointerValue<SpecificEntityAnnotation>();
        }
        catch (const boost::bad_any_cast& )
        {
          SELOGINIT;
          LERROR << "This annotation is not a SpecificEntity; SE not logged";
          continue;
        }
        
        // recuperer l'id du vertex morph cree
        LinguisticGraphVertex v;
        if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)))
        {
          //         SELOGINIT;
          //         LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it.";
          continue;
        }
        v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph));
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }   
  
  //   LDEBUG << "    all vertices done";
  if (m_compactFormat) {
    out << "</entities>" << endl;
  }
  else {
    out << "</specific_entities>" << endl;
  }
  delete dstream;
  TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger");
  return SUCCESS_ID;
  
}
Exemple #2
0
LimaStatusCode SemanticRelationsXmlLogger::
process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  
  SEMLOGINIT;
  LERROR << "SemanticRelationsXmlLogger" << LENDL;
    
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  
  const LinguisticAnalysisStructure::AnalysisGraph& graph = 
    *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)));
  
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SEMLOGINIT;
      LERROR << "no LinguisticMetaData ! abort" << LENDL;
      return MISSING_DATA;
  }

  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    SEMLOGINIT;
    LERROR << "Can't open log file " << LENDL;
    return UNKNOWN_ERROR;
  }

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  out << "<relations docid=\"" << docId
      << "\" offsetNode=\"" << offsetIndexingNode 
      << "\">" << endl;

//   LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL;
  
  //look at all vertices for annotations
  AnnotationGraphVertexIt itv, itv_end;
  boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
  for (; itv != itv_end; itv++)
  {
    LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL;
    if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")))
    {
//       LDEBUG << "    it has SemanticRelationAnnotation" << LENDL;
      const SemanticAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))
          .pointerValue<SemanticAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticRelation" << LENDL;
        continue;
      }

      // output
      out << "<annotation type=\"" << annot->getType() << "\">" << endl
          << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset)
          << "</annotation>" << endl;
    }
  }

  // look at all edges for relations
  AnnotationGraphEdgeIt it,it_end;
  const AnnotationGraph& annotGraph=annotationData->getGraph();
  boost::tie(it, it_end) = edges(annotGraph);
  for (; it != it_end; it++) {
    LDEBUG << "SemanticRelationsXmlLogger on annotation edge " 
           << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL;
    if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")))
    {
      SEMLOGINIT;
      LDEBUG << "found semantic relation" << LENDL;
      const SemanticRelationAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))
          .pointerValue<SemanticRelationAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticAnnotation" << LENDL;
        continue;
      }

      //output
      out << "<relation type=\"" << annot->type() << "\">" << endl
          << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset)
          << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset)
          << "</relation>" << endl;
      
    }
  }

//   LDEBUG << "    all vertices done" << LENDL;
  out << "</relations>" << endl;
  out.close();

  TimeUtils::logElapsedTime("SemanticRelationsXmlLogger");
  return SUCCESS_ID;
}