LimaStatusCode SpecificEntitiesXmlLogger::process( AnalysisContent& analysis) const { SELOGINIT; LDEBUG << "SpecificEntitiesXmlLogger::process"; TimeUtils::updateCurrentTime(); AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData == 0) { SELOGINIT; LERROR << "no annotationData ! abort"; return MISSING_DATA; } LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)); if (graphp == 0) { SELOGINIT; LERROR << "no graph "<< m_graph <<" ! abort"; return MISSING_DATA; } const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp; LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SELOGINIT; LERROR << "no LinguisticMetaData ! abort"; return MISSING_DATA; } DumperStream* dstream=initialize(analysis); ostream& out=dstream->out(); uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } if (m_compactFormat) { out << "<entities docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\" offset=\"" << offset << "\">" << endl; } else { out << "<specific_entities>" << endl; } // SELOGINIT; if (m_followGraph) { // instead of looking to all annotations, follow the graph (in // morphological graph, some vertices are not related to main graph: // idiomatic expressions parts and named entity parts) // -> this will not include nested entities AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph)); if (tokenList==0) { LERROR << "graph " << m_graph << " has not been produced: check pipeline"; return MISSING_DATA; } LinguisticGraph* graph=tokenList->getGraph(); //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language); std::queue<LinguisticGraphVertex> toVisit; std::set<LinguisticGraphVertex> visited; toVisit.push(tokenList->firstVertex()); LinguisticGraphOutEdgeIt outItr,outItrEnd; while (!toVisit.empty()) { LinguisticGraphVertex v=toVisit.front(); toVisit.pop(); if (v == tokenList->lastVertex()) { continue; } for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) { LinguisticGraphVertex next=target(*outItr,*graph); if (visited.find(next)==visited.end()) { visited.insert(next); toVisit.push(next); } } const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData); if (annot != 0) { outputEntity(out,v,annot,tokenMap,offset); } } } else { // take all annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { // LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))) { // LDEBUG << " it has SpecificEntityAnnotation"; const SpecificEntityAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")) .pointerValue<SpecificEntityAnnotation>(); } catch (const boost::bad_any_cast& ) { SELOGINIT; LERROR << "This annotation is not a SpecificEntity; SE not logged"; continue; } // recuperer l'id du vertex morph cree LinguisticGraphVertex v; if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph))) { // SELOGINIT; // LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it."; continue; } v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)); outputEntity(out,v,annot,tokenMap,offset); } } } // LDEBUG << " all vertices done"; if (m_compactFormat) { out << "</entities>" << endl; } else { out << "</specific_entities>" << endl; } delete dstream; TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger"); return SUCCESS_ID; }
LimaStatusCode SemanticRelationsXmlLogger:: process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); SEMLOGINIT; LERROR << "SemanticRelationsXmlLogger" << LENDL; AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); const LinguisticAnalysisStructure::AnalysisGraph& graph = *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph))); LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SEMLOGINIT; LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } ofstream out; if (!openLogFile(out,metadata->getMetaData("FileName"))) { SEMLOGINIT; LERROR << "Can't open log file " << LENDL; return UNKNOWN_ERROR; } uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } out << "<relations docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\">" << endl; // LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL; //look at all vertices for annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))) { // LDEBUG << " it has SemanticRelationAnnotation" << LENDL; const SemanticAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")) .pointerValue<SemanticAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticRelation" << LENDL; continue; } // output out << "<annotation type=\"" << annot->getType() << "\">" << endl << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset) << "</annotation>" << endl; } } // look at all edges for relations AnnotationGraphEdgeIt it,it_end; const AnnotationGraph& annotGraph=annotationData->getGraph(); boost::tie(it, it_end) = edges(annotGraph); for (; it != it_end; it++) { LDEBUG << "SemanticRelationsXmlLogger on annotation edge " << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL; if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))) { SEMLOGINIT; LDEBUG << "found semantic relation" << LENDL; const SemanticRelationAnnotation* annot = 0; try { annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")) .pointerValue<SemanticRelationAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticAnnotation" << LENDL; continue; } //output out << "<relation type=\"" << annot->type() << "\">" << endl << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset) << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset) << "</relation>" << endl; } } // LDEBUG << " all vertices done" << LENDL; out << "</relations>" << endl; out.close(); TimeUtils::logElapsedTime("SemanticRelationsXmlLogger"); return SUCCESS_ID; }