LimaStatusCode SemanticRelationsXmlLogger:: process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); SEMLOGINIT; LERROR << "SemanticRelationsXmlLogger" << LENDL; AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); const LinguisticAnalysisStructure::AnalysisGraph& graph = *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph))); LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SEMLOGINIT; LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } ofstream out; if (!openLogFile(out,metadata->getMetaData("FileName"))) { SEMLOGINIT; LERROR << "Can't open log file " << LENDL; return UNKNOWN_ERROR; } uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } out << "<relations docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\">" << endl; // LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL; //look at all vertices for annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))) { // LDEBUG << " it has SemanticRelationAnnotation" << LENDL; const SemanticAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")) .pointerValue<SemanticAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticRelation" << LENDL; continue; } // output out << "<annotation type=\"" << annot->getType() << "\">" << endl << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset) << "</annotation>" << endl; } } // look at all edges for relations AnnotationGraphEdgeIt it,it_end; const AnnotationGraph& annotGraph=annotationData->getGraph(); boost::tie(it, it_end) = edges(annotGraph); for (; it != it_end; it++) { LDEBUG << "SemanticRelationsXmlLogger on annotation edge " << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL; if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))) { SEMLOGINIT; LDEBUG << "found semantic relation" << LENDL; const SemanticRelationAnnotation* annot = 0; try { annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")) .pointerValue<SemanticRelationAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticAnnotation" << LENDL; continue; } //output out << "<relation type=\"" << annot->type() << "\">" << endl << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset) << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset) << "</relation>" << endl; } } // LDEBUG << " all vertices done" << LENDL; out << "</relations>" << endl; out.close(); TimeUtils::logElapsedTime("SemanticRelationsXmlLogger"); return SUCCESS_ID; }
LimaStatusCode SpecificEntitiesXmlLogger::process( AnalysisContent& analysis) const { SELOGINIT; LDEBUG << "SpecificEntitiesXmlLogger::process"; TimeUtils::updateCurrentTime(); AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData == 0) { SELOGINIT; LERROR << "no annotationData ! abort"; return MISSING_DATA; } LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)); if (graphp == 0) { SELOGINIT; LERROR << "no graph "<< m_graph <<" ! abort"; return MISSING_DATA; } const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp; LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SELOGINIT; LERROR << "no LinguisticMetaData ! abort"; return MISSING_DATA; } DumperStream* dstream=initialize(analysis); ostream& out=dstream->out(); uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } if (m_compactFormat) { out << "<entities docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\" offset=\"" << offset << "\">" << endl; } else { out << "<specific_entities>" << endl; } // SELOGINIT; if (m_followGraph) { // instead of looking to all annotations, follow the graph (in // morphological graph, some vertices are not related to main graph: // idiomatic expressions parts and named entity parts) // -> this will not include nested entities AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph)); if (tokenList==0) { LERROR << "graph " << m_graph << " has not been produced: check pipeline"; return MISSING_DATA; } LinguisticGraph* graph=tokenList->getGraph(); //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language); std::queue<LinguisticGraphVertex> toVisit; std::set<LinguisticGraphVertex> visited; toVisit.push(tokenList->firstVertex()); LinguisticGraphOutEdgeIt outItr,outItrEnd; while (!toVisit.empty()) { LinguisticGraphVertex v=toVisit.front(); toVisit.pop(); if (v == tokenList->lastVertex()) { continue; } for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) { LinguisticGraphVertex next=target(*outItr,*graph); if (visited.find(next)==visited.end()) { visited.insert(next); toVisit.push(next); } } const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData); if (annot != 0) { outputEntity(out,v,annot,tokenMap,offset); } } } else { // take all annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { // LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))) { // LDEBUG << " it has SpecificEntityAnnotation"; const SpecificEntityAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")) .pointerValue<SpecificEntityAnnotation>(); } catch (const boost::bad_any_cast& ) { SELOGINIT; LERROR << "This annotation is not a SpecificEntity; SE not logged"; continue; } // recuperer l'id du vertex morph cree LinguisticGraphVertex v; if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph))) { // SELOGINIT; // LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it."; continue; } v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)); outputEntity(out,v,annot,tokenMap,offset); } } } // LDEBUG << " all vertices done"; if (m_compactFormat) { out << "</entities>" << endl; } else { out << "</specific_entities>" << endl; } delete dstream; TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger"); return SUCCESS_ID; }
LimaStatusCode CorefSolvingNormalizedXmlLogger::process( AnalysisContent& analysis) const { // COREFSOLVERLOGINIT; TimeUtils::updateCurrentTime(); AnnotationData* annotationData = static_cast<AnnotationData*>(analysis.getData("AnnotationData")); const LinguisticAnalysisStructure::AnalysisGraph& graph = *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph))); // LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { COREFSOLVERLOGINIT; LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } ofstream out; if (!openLogFile(out,metadata->getMetaData("FileName"))) { COREFSOLVERLOGINIT; LERROR << "Can't open log file " << LENDL; return UNKNOWN_ERROR; } out << "<coreferences>" << endl; // LDEBUG << "CorefSolvingNormalizedXmlLogger on graph " << m_graph << LENDL; AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { // process //LDEBUG << "CorefSolvingNormalizedXmlLogger on annotation vertex " << *itv << LENDL; if (annotationData->hasAnnotation(*itv,utf8stdstring2limastring("Coreferent"))) //if (annotationData->hasAnnotation(*itv,utf8stdstring2limastring("Coreferent"))) { CoreferentAnnotation* annot ; try { annot = annotationData->annotation(*itv,utf8stdstring2limastring("Coreferent")) .pointerValue<CoreferentAnnotation>(); } catch (const boost::bad_any_cast& ) { COREFSOLVERLOGINIT; LERROR << "One annotation on vertex " << *itv << " you are trying to cast is not a Coreference; Coreference not logged" << LENDL; for (int i = 0; i < 19 ; i++) { LERROR << "annot "<< i << " : " << limastring2utf8stdstring(annotationData->annotationName(i)) << LENDL ; } continue; } LinguisticProcessing::LinguisticAnalysisStructure::Token* token = get(vertex_token, *graph.getGraph(), annot->morphVertex()); if (token == 0) { COREFSOLVERLOGINIT; LERROR << "Vertex " << *itv << " has no entry in the analysis graph token map. This should not happen !!" << LENDL; } else { CoreferentAnnotation* antecedent; // bool hasAntecedent = false; AnnotationGraphOutEdgeIt it, it_end; boost::tie(it, it_end) = boost::out_edges(static_cast<AnnotationGraphVertex>(*itv), annotationData->getGraph()); for (; it != it_end; it++) { if (annotationData->hasAnnotation(target(*it,annotationData->getGraph()),utf8stdstring2limastring("Coreferent"))) { try { antecedent = annotationData->annotation(target(*it, annotationData->getGraph()), utf8stdstring2limastring("Coreferent")).pointerValue<CoreferentAnnotation>(); // hasAntecedent = true; } catch (const boost::bad_any_cast& ) { COREFSOLVERLOGINIT; LERROR << "One annotation on vertex you are trying to cast resulting from an edge out of " << *itv << " is not a Coreference; Coreference not logged" << LENDL; continue; } } } out << " <reference>\n" << " <pos>" << get(vertex_token,*graph.getGraph(),annot->morphVertex())->position() << "</pos>\n" << " <len>" << token->stringForm().length() << "</len>\n" << " <string>"<< limastring2utf8stdstring(transcodeToXmlEntities(token->stringForm())) << "</string>\n" << " <npId>" << annot->id() << "</npId>\n" << " <posVertex>" << annot->morphVertex() << "</posVertex>\n"; //if (hasAntecedent) if (false) { out << " <npRef>" << antecedent->id() << "</npRef>\n"; out << " <refPosVertex>" << antecedent->morphVertex() << "</refPosVertex>\n"; } out << " <categ>" << annot->categ() << "</categ>\n" << " </reference>\n" << endl; } } } out << "</coreferences>" << endl; out.close(); TimeUtils::logElapsedTime("CorefSolvingNormalizedXmlLogger"); return SUCCESS_ID; }