bool EntityGroupTransition:: compare(const LinguisticAnalysisStructure::AnalysisGraph& graph, const LinguisticGraphVertex& v, AnalysisContent& analysis, const LinguisticAnalysisStructure::Token* /*token*/, const LinguisticAnalysisStructure::MorphoSyntacticData* /*data*/) const { // should compare to vertex ? AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData==0) { AULOGINIT; LDEBUG << "EntityGroupTransition::compare: no annotation graph available !"; return false; } // find annotationGraphVertex matching the vertex of the current graph std::set<AnnotationGraphVertex> matches = annotationData->matches(graph.getGraphId(), v, "annot"); if (matches.empty()) { AULOGINIT; LDEBUG << "annotation ("<<graph.getGraphId()<<", "<<v<<", \"annot\") available"; return false; } AnnotationGraphVertex annotVertex = *(matches.begin()); if (!annotationData->hasAnnotation(annotVertex, m_entityAnnotation)) { AULOGINIT; LDEBUG << "EntityGroupTransition::compare: No " << m_entityAnnotation << " annotation available on " << v; return false; } const SpecificEntityAnnotation* se = annotationData->annotation(annotVertex, m_entityAnnotation). pointerValue<SpecificEntityAnnotation>(); Common::MediaticData::EntityType type = se->getType(); AULOGINIT; LDEBUG << "EntityGroupTransition::compare: type = " << type << ", groupId = " << type.getGroupId(); LDEBUG << "EntityGroupTransition::compare: m_entityGroupId = " << m_entityGroupId; LDEBUG << "EntityGroupTransition::compare: tests m_entityGroupId == type.getGroupId() = " << (m_entityGroupId == type.getGroupId()); return( m_entityGroupId == type.getGroupId() ); }
LimaStatusCode EntityTracker::process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); SELOGINIT; LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")); if (anagraph==0) { LERROR << "no graph 'AnaGraph' available !" << LENDL; return MISSING_DATA; } AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData==0) { LERROR << "no annotation graph available !" << LENDL; return MISSING_DATA; } // add new data to store co-references CoreferenceData* corefData = new CoreferenceData; analysis.setData("CoreferenceData",corefData); CoreferenceEngine ref; LinguisticGraph* graph=anagraph->getGraph(); LinguisticGraphVertex lastVertex=anagraph->lastVertex(); LinguisticGraphVertex firstVertex=anagraph->firstVertex(); std::queue<LinguisticGraphVertex> toVisit; std::set<LinguisticGraphVertex> visited; LinguisticGraphOutEdgeIt outItr,outItrEnd; // output vertices between begin and end, // but do not include begin (beginning of text or previous end of sentence) and include end (end of sentence) toVisit.push(firstVertex); bool first=true; bool last=false; while (!toVisit.empty()) { LinguisticGraphVertex v=toVisit.front(); toVisit.pop(); if (last || v == lastVertex) { continue; } if (v == lastVertex) { last=true; } for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) { LinguisticGraphVertex next=target(*outItr,*graph); if (visited.find(next)==visited.end()) { visited.insert(next); toVisit.push(next); } } if (first) { first=false; } else { // first, check if vertex corresponds to a specific entity std::set< AnnotationGraphVertex > matches = annotationData->matches("AnalysisGraph",v,"annot"); for (std::set< AnnotationGraphVertex >::const_iterator it = matches.begin(); it != matches.end(); it++) { AnnotationGraphVertex vx=*it; Token* t=get(vertex_token,*graph,vx); /* sauvegarde de tous les vertex */ if (t != 0) { //storeAllToken(t); //allToken.push_back(t); ref.storeAllToken(*t); } if (annotationData->hasAnnotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity"))) { /*const SpecificEntityAnnotation* se = annotationData->annotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity")). pointerValue<SpecificEntityAnnotation>();*/ //storeSpecificEntity(se); //Token* t=get(vertex_token,*graph,vx); //storedAnnotations.push_back(*t); ref.storeAnnot(*t); // std::cout<< "le vertex de nom "<< t->stringForm()<<std::endl; } } } } /* recherche des coréferences entre les entitées nommées précédemment détectées */ vector<Token> vectTok; vector<Token>::const_iterator it1=ref.getAnnotations().begin(), it1_end=ref.getAnnotations().end(); for (; it1 != it1_end; it1++) { // checkCoreference (*it1,ref); vectTok = ref.searchCoreference(*it1); if (vectTok.size() > 0) { corefData->push_back(vectTok); } ref.searchCoreference(*it1); } /* get the text */ // LimaStringText* text=static_cast<LimaStringText*>(analysis.getData("Text")); return SUCCESS_ID; }
bool CreateIdiomaticAlternative::operator()(Automaton::RecognizerMatch& result, AnalysisContent& analysis) const { #ifdef DEBUG_LP MORPHOLOGINIT; LDEBUG << "CreateIdiomaticAlternative, match is " << result; LDEBUG << " expression is " << (result.isContiguous()?"":"non") << " contiguous and" << (result.isContextual()?" non":"") << " absolute"; #endif if (result.empty()) return false; const LinguisticAnalysisStructure::AnalysisGraph& graph = *(result.getGraph()); AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData->dumpFunction("IdiomExpr") == 0) { annotationData->dumpFunction("IdiomExpr", new DumpIdiomaticExpressionAnnotation()); } RecognizerData* recoData=static_cast<RecognizerData*>(analysis.getData("RecognizerData")); std::set<LinguisticGraphVertex> addedVertices; // initialize the vertices to clear if (result.isContiguous()) { // MORPHOLOGINIT; // LDEBUG << "contiguous idiomatic expression found: " // << result.concatString(); // only one part : terms in expression are adjacent -> easy part // check if there is an overlap first if (recoData->matchOnRemovedVertices(result)) { // ignore current idiomatic expression, continue MORPHOLOGINIT; LWARN << "idiomatic expression ignored: " << Common::Misc::limastring2utf8stdstring(result.concatString()) << ": overlapping with a previous one"; return false; } // create the new token std::pair<Token*,MorphoSyntacticData*> newToken = createAlternativeToken(result); if (newToken.second->empty()) { // ignore current idiomatic expression, continue MORPHOLOGINIT; LERROR << "CreateIdiomaticAlternative::operator() Got empty morphosyntactic data. Abort"; delete newToken.first; delete newToken.second; return false; } // add the vertex LinguisticGraphVertex idiomaticVertex = addAlternativeVertex(newToken.first, newToken.second, const_cast<LinguisticGraph*>(graph.getGraph())); AnnotationGraphVertex agv = annotationData->createAnnotationVertex(); annotationData->addMatching("AnalysisGraph", idiomaticVertex, "annot", agv); annotationData->annotate(agv, Common::Misc::utf8stdstring2limastring("AnalysisGraph"), idiomaticVertex); IdiomaticExpressionAnnotation annot(result); GenericAnnotation ga(annot); annotationData->annotate(agv, Common::Misc::utf8stdstring2limastring("IdiomExpr"), ga); addedVertices.insert(idiomaticVertex); //create the alternative with this only vertex createBeginAlternative(result.front().getVertex(), idiomaticVertex,const_cast<LinguisticGraph&>(*graph.getGraph())); attachEndOfAlternative(idiomaticVertex, result.back().getVertex(),const_cast<LinguisticGraph&>(*graph.getGraph())); // if expression is not contextual, only keep alternative if (! result.isContextual()) { recoData->storeVerticesToRemove(result,const_cast<LinguisticGraph*>(graph.getGraph())); removeEdges(const_cast<LinguisticGraph&>(*graph.getGraph()), result, analysis); //recoData->setNextVertex(idiomaticVertex); // if match was on single token, use next vertices (to avoid loops) if (result.size() > 1) { recoData->setNextVertex(idiomaticVertex); } else { LinguisticGraphOutEdgeIt outItr,outItrEnd; boost::tie(outItr,outItrEnd) = out_edges(idiomaticVertex,*(graph.getGraph())); for (;outItr!=outItrEnd;outItr++) { recoData->setNextVertex(target(*outItr, *(graph.getGraph()))); } } } } else { // several parts : tough case // MORPHOLOGINIT; // LDEBUG << "non contiguous idiomatic expression found: " // << result.concatString(); // check if there is an overlap first if (recoData->matchOnRemovedVertices(result)) { // ignore current idiomatic expression, continue MORPHOLOGINIT; LWARN << "idiomatic expression ignored: " << Common::Misc::limastring2utf8stdstring(result.concatString()) << ": overlapping with a previous one"; return false; } // create the new token pair<Token*,MorphoSyntacticData*> newToken = createAlternativeToken(result); if (newToken.second->empty()) { // ignore current idiomatic expression, continue MORPHOLOGINIT; LERROR << "CreateIdiomaticAlternative::operator() Got empty morphosyntactic data. Abort"; delete newToken.first; delete newToken.second; return false; } // add the vertex LinguisticGraphVertex idiomaticVertex = addAlternativeVertex(newToken.first,newToken.second,const_cast<LinguisticGraph*>(graph.getGraph())); addedVertices.insert(idiomaticVertex); AnnotationGraphVertex agv = annotationData->createAnnotationVertex(); annotationData->addMatching("AnalysisGraph", idiomaticVertex, "annot", agv); annotationData->annotate(agv, Common::Misc::utf8stdstring2limastring("AnalysisGraph"), idiomaticVertex); IdiomaticExpressionAnnotation annot(result); GenericAnnotation ga(annot); annotationData->annotate(agv, Common::Misc::utf8stdstring2limastring("IdiomExpr"), ga); //create the alternative with this vertex and duplicate of other vertices deque<LinguisticGraphVertex> idiomAlternative; LinguisticGraphVertex headVertex=result.getHead(); #ifdef DEBUG_LP LDEBUG << "headVertex = " << headVertex; if (headVertex!=0) { LDEBUG << "=> " << Common::Misc::limastring2utf8stdstring(get(vertex_token,*graph.getGraph(),headVertex)->stringForm()); } #endif bool foundHead=false; bool keeping = false; std::pair< LinguisticGraphVertex, LinguisticGraphVertex > idiomPartBounds; std::set< std::pair< LinguisticGraphVertex, LinguisticGraphVertex > > edgesToRemove; RecognizerMatch::const_iterator matchItr=result.begin(); for (; matchItr!=result.end(); matchItr++) { if (!matchItr->isKept()) { if (keeping) { RecognizerMatch::const_iterator prevItr = matchItr - 1; idiomPartBounds.second = prevItr->getVertex(); keeping = false; #ifdef DEBUG_LP LDEBUG << "adding " << idiomPartBounds.first << " -> " << idiomPartBounds.second << " in edgesToRemove"; #endif edgesToRemove.insert(idiomPartBounds); } // duplicate this vertex #ifdef DEBUG_LP LDEBUG << "duplication of vertex " << matchItr->getVertex();; #endif Token* token=get(vertex_token,*graph.getGraph(),matchItr->getVertex()); MorphoSyntacticData* data = new MorphoSyntacticData(*get(vertex_data,*graph.getGraph(),matchItr->getVertex())); LinguisticGraphVertex dupVx = add_vertex(const_cast<LinguisticGraph&>(*graph.getGraph())); put(vertex_token,const_cast<LinguisticGraph&>(*graph.getGraph()),dupVx,token); put(vertex_data,const_cast<LinguisticGraph&>(*graph.getGraph()),dupVx,data); idiomAlternative.push_back(dupVx); AnnotationGraphVertex agv = annotationData->createAnnotationVertex(); annotationData->addMatching("AnalysisGraph", dupVx, "annot", agv); annotationData->annotate(agv, Common::Misc::utf8stdstring2limastring("AnalysisGraph"), dupVx); std::set< LinguisticGraphVertex > annotMatches = annotationData->matches("AnalysisGraph",matchItr->getVertex(),"annot"); for (std::set< LinguisticGraphVertex >::const_iterator annotIt(annotMatches.begin()); annotIt != annotMatches.end(); annotIt++) { std::set< std::string > excepted; excepted.insert("AnalysisGraph"); annotationData->cloneAnnotations(*annotIt, agv, excepted); } addedVertices.insert(dupVx); // verticesToRemove.insert(matchItr->getVertex()); } else { if (!keeping) { idiomPartBounds.first = matchItr->getVertex(); keeping = true; } #ifdef DEBUG_LP LDEBUG << "kept vertex " << matchItr->getVertex(); #endif if (matchItr->getVertex()==headVertex) { foundHead=true; #ifdef DEBUG_LP LDEBUG << "add head vertex " << idiomaticVertex; #endif idiomAlternative.push_back(idiomaticVertex); } } } if (!foundHead) { MORPHOLOGINIT; LWARN << "head token has not been found in non contiguous expression. " << "Idiomatic token is placed first"; idiomAlternative.push_front(idiomaticVertex); } if (keeping) { RecognizerMatch::const_iterator prevItr = matchItr - 1; idiomPartBounds.second = prevItr->getVertex(); keeping = false; #ifdef DEBUG_LP LDEBUG << "adding " << idiomPartBounds.first << " -> " << idiomPartBounds.second << " in edgesToRemove"; #endif edgesToRemove.insert(idiomPartBounds); } // link alternatives #ifdef DEBUG_LP LDEBUG << "idiomAlternative has " << idiomAlternative.size() << " vertex"; #endif createBeginAlternative(result.front().getVertex(), idiomAlternative.front(),const_cast<LinguisticGraph&>(*graph.getGraph())); { deque<LinguisticGraphVertex>::const_iterator idItr=idiomAlternative.begin(); LinguisticGraphVertex lastIdiomVx=*idItr; idItr++; while (idItr!=idiomAlternative.end()) { LinguisticGraphEdge newEdge; bool ok; boost::tie(newEdge, ok) = add_edge(lastIdiomVx,*idItr,const_cast<LinguisticGraph&>(*graph.getGraph())); #ifdef DEBUG_LP LDEBUG << "added new edge in alternatives linking: " << newEdge.m_source << " -> " << newEdge.m_target; #endif lastIdiomVx=*idItr; idItr++; } } attachEndOfAlternative(idiomAlternative.back(), result.back().getVertex(),const_cast<LinguisticGraph&>(*graph.getGraph())); // if expression is not contextual, only keep alternative if (! result.isContextual()) { #ifdef DEBUG_LP LDEBUG << "expression is not contextual, only keep alternative"; #endif std::set< std::pair< LinguisticGraphVertex, LinguisticGraphVertex > >::const_iterator edgesToRemoveIt, edgesToRemoveIt_end; edgesToRemoveIt = edgesToRemove.begin(); edgesToRemoveIt_end = edgesToRemove.end(); for (; edgesToRemoveIt != edgesToRemoveIt_end; edgesToRemoveIt++) { #ifdef DEBUG_LP LDEBUG << "Removing edge " << (*edgesToRemoveIt).first << " -> " << (*edgesToRemoveIt).second; #endif removeEdges(const_cast<LinguisticGraph&>(*graph.getGraph()), result, analysis); } // recoData->storeVerticesToRemove(result,*graph); // no need to check size: if several parts, more than one vertex recoData->setNextVertex(idiomaticVertex); } } RecognizerMatch::const_iterator matchItr=result.begin(); for (; matchItr!=result.end(); matchItr++) { recoData->clearUnreachableVertices( analysis, (*matchItr).getVertex()); } // recoData->clearUnreachableVertices( analysis, result.front().getVertex(), result.back().getVertex(), storedEdges); return true; }