Пример #1
0
// Each token of the specified path is
// searched into the specified dictionary.
LimaStatusCode FullTokenXmlLogger::process(
  AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0)
  {
    DICTIONARYLOGINIT;
    LERROR << "no LinguisticMetaData ! abort" << LENDL;
    return MISSING_DATA;
  }

  AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graphId));
  std::ofstream fout;
  if (!openLogFile(fout,metadata->getMetaData("FileName"))) {
    MORPHOLOGINIT;
    LERROR << "Error: cannot open log file" << LENDL;
    return CANNOT_OPEN_FILE_ERROR;
  }

  dump(fout, *tokenList);
  fout.close();
  TimeUtils::logElapsedTime("FullTokenXmlLogger");
  return SUCCESS_ID;
}
Пример #2
0
bool SemanticRelationData::addRelations(AnalysisContent& analysis)
{
#ifdef DEBUG_LP
  SEMANTICANALYSISLOGINIT;
#endif
  auto annotationData = static_cast< AnnotationData* >(
    analysis.getData("AnnotationData"));

  if (annotationData->dumpFunction("SemanticRelation") == 0)
  {
    annotationData->dumpFunction("SemanticRelation", 
                                 new DumpSemanticRelation());
  }
  auto recoData=static_cast<RecognizerData*>(
    analysis.getData("RecognizerData"));

  for (auto i = m_relations.begin(); i != m_relations.end(); i++) 
  {
    LinguisticGraphVertex vertex1 = i->get<0>();
    LinguisticGraphVertex vertex2 = i->get<1>();

    auto matchesVtx1 = annotationData->matches(recoData->getGraphId(),
                                               vertex1,
                                               "annot");
    auto matchesVtx2 = annotationData->matches(recoData->getGraphId(),
                                               vertex2,
                                               "annot");

    if (!annotationData->hasAnnotation(*(matchesVtx1.begin()), 
                                       *(matchesVtx2.begin()),
                                       "SemanticRelation"))
    {
      SemanticRelationAnnotation annot(i->get<2>());
      GenericAnnotation ga(annot);
      annotationData->annotate(*(matchesVtx1.begin()), 
                               *(matchesVtx2.begin()),
                               "SemanticRelation", 
                               ga);
    }
    else
    {
      auto annot = annotationData->annotation(*(matchesVtx1.begin()), 
                               *(matchesVtx2.begin()),
                               "SemanticRelation").pointerValue<SemanticRelationAnnotation>();
      SEMANTICANALYSISLOGINIT;
      LWARN << "SemanticRelationData::addRelations There is already a SemanticRelation between" 
            << *(matchesVtx1.begin()) << "and" << *(matchesVtx2.begin()) << annot->type();
      LWARN << "Adding new type" << i->get<2>();
      QString type = QString::fromUtf8(annot->type().c_str());
      QStringList typeList = type.split(',');
      typeList << i->get<2>().c_str();
      typeList.sort();
      typeList.removeDuplicates();
      annot->type(typeList.join(',').toUtf8().constData());
      LWARN << "Adding type is now" << annot->type();
    }
  }
  m_relations.clear();
  return true;
}
Пример #3
0
// Each token of the specified path is
// searched into the specified dictionary.
LimaStatusCode DotGraphWriter::process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData(m_graphId));
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      PTLOGINIT;
      LERROR << "no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }
  if (anagraph == 0) {
      PTLOGINIT;
      LERROR << "no AnalysisGraph named " << m_graphId << " ! ";
      return MISSING_DATA;
  }
  string outputFileName=metadata->getMetaData("FileName") + m_outputSuffix;
  PosTagger::PosTaggingGraphWriter gw(
    anagraph->getGraph(),
    m_language,
    m_trigramMatrix,
    m_bigramMatrix);
  gw.setOptions(m_graphDotOptions,m_nodeDotOptions,m_edgeDotOptions);
  gw.writeToDotFile(outputFileName,m_vertexDisplay);

  TimeUtils::logElapsedTime("DotGraphWriter");
  return SUCCESS_ID;
}
Пример #4
0
LimaStatusCode EntityTrackerXmlLogger::process(
  AnalysisContent& analysis) const
{
  SELOGINIT;
  LDEBUG << "EntityTrackerXmlLogger::process";
  TimeUtils::updateCurrentTime();
  /* permet de récupérer les annotations */
  //AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));

  /* recupérer le graph après l'analyse */
  //const LinguisticAnalysisStructure::AnalysisGraph& graph = *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)));

  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SELOGINIT;
      LERROR << "no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }

  CoreferenceData* corefData=static_cast<CoreferenceData*>(analysis.getData("CoreferenceData"));
  if (corefData == 0) {
      SELOGINIT;
      LERROR << "no CoreferenceData ! abort";
      return MISSING_DATA;
  }

  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    SELOGINIT;
    LERROR << "Can't open log file '" << metadata->getMetaData("FileName") << "'";
    return UNKNOWN_ERROR;
  }

  out << "<coreference>" << endl;
  for (CoreferenceData::const_iterator it=corefData->begin(), 
         it_end=corefData->end(); it != it_end; it++)
  {
    out << "<entity mentions=\"" << (*it).size() << "\">" << endl;
    for (vector<Token>::const_iterator it2=(*it).begin(), it2_end=(*it).end();
         it2 != it2_end; it2++)
    {
      out << "  <entity_mention>" 
          << limastring2utf8stdstring((*it2).stringForm())
          <<"</entity_mention>";
    }
    out << "<entity>" <<endl;
  }
  out.close();

  return SUCCESS_ID;
}
LimaStatusCode LinearTextRepresentationDumper::process(
    AnalysisContent& analysis) const {

    DUMPERLOGINIT;
    // get metadata    
    LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
    if (metadata == 0) {
        LERROR << "LinearTextRepresentationDumper::process: no LinguisticMetaData ! abort" << LENDL;
        return MISSING_DATA;
    }
    // get the analysis graph    
    AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
    if (anaGraph == 0) {
        LERROR << "LinearTextRepresentationDumper::process: no AnalysisGraph ! abort" << LENDL;
        return MISSING_DATA;
    }
    // get sentence boundaries    
    SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries"));
    if (sb == 0) {
        LERROR << "LinearTextRepresentationDumper::process: no SentenceBounds ! abort" << LENDL;
        return MISSING_DATA;
    }
    // build LTRText
    LTR_Text textRep;
    LTRTextBuilder builder(m_language, m_stopList);
    builder.buildLTRTextFrom(
        *(anaGraph->getGraph()),
        sb,
        anaGraph->lastVertex(),
        &textRep,
        metadata->getStartOffset());
    // write LTR_Text
    LDEBUG << "handler will be: " << m_handler << LENDL;
//     MediaId langid = static_cast<const  Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia();
    AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer"));
    AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler));
    if (handler == 0) {
      LERROR << "LinearTextRepresentationDumper::process: handler " << m_handler << " has not been given to the core client" << LENDL;
      return MISSING_DATA;
    }    
    handler->startAnalysis();
    HandlerStreamBuf hsb(handler);
    ostream out(&hsb);
    LDEBUG << textRep << LENDL;
    textRep.binaryWriteOn(out);
    out.flush();
    handler->endAnalysis();
    return SUCCESS_ID;
}
LimaStatusCode EnchantSpellingAlternatives::process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  MORPHOLOGINIT;
  LINFO << "MorphologicalAnalysis: starting process EnchantSpellingAlternatives";
  
  FsaStringsPool& sp=Common::MediaticData::MediaticData::changeable().stringsPool(m_d->m_language);
  AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"));
  LinguisticGraph* g=tokenList->getGraph();
  VertexDataPropertyMap dataMap=get(vertex_data,*g);
  VertexTokenPropertyMap tokenMap=get(vertex_token,*g);
  LinguisticGraphVertexIt it,itEnd;
  for (boost::tie(it,itEnd)=vertices(*g) ; it != itEnd ; it++)
  {
    LDEBUG << "EnchantSpellingAlternatives::process processing vertex " << *it;
    Token* currentToken=tokenMap[*it];
    MorphoSyntacticData* msd=dataMap[*it];
    
    if (currentToken!=0)
    {
      if (msd->empty())
      {
        m_d->setEnchantSpellingAlternatives(
          currentToken,
          msd,
          sp);
      }
    }
  }
  LINFO << "MorphologicalAnalysis: ending process EnchantSpellingAlternatives";
  return SUCCESS_ID;
}
Пример #7
0
LimaStatusCode OrthographicAlternatives::process(
  AnalysisContent& analysis) const
{

  TimeUtils::updateCurrentTime();
  MORPHOLOGINIT;
  LINFO << "MorphologicalAnalysis: starting process OrthographicAlternatives";

  StringsPool& sp=Common::LinguisticData::LinguisticData::changeable().stringsPool(m_language);
  AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"));
  LinguisticGraph* g=tokenList->getGraph();
  LinguisticGraphVertexIt it,itEnd;
  VertexDataPropertyMap dataMap=get(vertex_data,*g);
  VertexTokenPropertyMap tokenMap=get(vertex_token,*g);
  boost::tie(it,itEnd)=vertices(*g);
  for (;it!=itEnd;it++)
  {
    LDEBUG << "processing vertex " << *it;
    MorphoSyntacticData* currentTokenData=dataMap[*it];
    Token* tok=tokenMap[*it];
    if (currentTokenData!=0)
    {

      // if in confidentMode and token has already ling infos, skip
      if ( m_confidentMode && (currentTokenData->size()>0) ) continue;

      // set orthographic alternatives given by dictionary
      // using the alternatives directly given by the morphosyntactic data
      {
        LDEBUG << "processing alternatives from dico";
        DictionaryEntry* entry=tok->dictionaryEntry();
        entry->reset();
        if (entry->hasAccented()) {
          LimaString oa = entry->nextAccented();
          while ( oa.size() > 0 )
          {
            createAlternative(tok,currentTokenData,oa,m_dictionary,sp);
            oa = entry->nextAccented();
          }
        }
      }

      // if in confidentMode and token has already ling infos, skip
      if (m_confidentMode && (currentTokenData->size() > 0) ) continue;

      // if no ling infos, then lower and unmark string
      LDEBUG << "set unmark alternatives";
      setOrthographicAlternatives(
        tok,
        currentTokenData,
        m_dictionary,
        m_charChart,
        sp);
    }
  }
  LINFO << "MorphologicalAnalysis: ending process OrthographicAlternatives";
  TimeUtils::logElapsedTime("OrthographicAlternatives");
  return SUCCESS_ID;
}
Пример #8
0
LimaStatusCode SpecificEntitiesLoader::
process(AnalysisContent& analysis) const
{
  // get analysis graph
  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph==0)
  {
    LOGINIT("LP::SpecificEntities");
    LERROR << "no graph '" << m_graph << "' available !";
    return MISSING_DATA;
  }

  //create a RecognizerData (such as in ApplyRecognizer) to be able to use
  //CreateSpecificEntity actions
  RecognizerData* recoData=new RecognizerData;
  analysis.setData("RecognizerData",recoData);
  RecognizerResultData* resultData=new RecognizerResultData(m_graph);
  recoData->setResultData(resultData);
  
  try
  {
    SpecificEntitiesLoader::XMLHandler handler(m_language,analysis,graph);
    m_parser->setContentHandler(&handler);
    m_parser->setErrorHandler(&handler);
    QFile file(getInputFile(analysis).c_str());
    if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
      throw XMLException();
    if (!m_parser->parse( QXmlInputSource(&file)))
    {
      throw XMLException();
    }
  }
  catch (const XMLException& )
  {
    LOGINIT("LP::SpecificEntities");
    LERROR << "Error: failed to parse XML input file";
  }

  // remove recognizer data (used only internally to this process unit)
  recoData->deleteResultData();
  resultData=0;
  analysis.removeData("RecognizerData");

  return SUCCESS_ID;
}
LimaStatusCode LinearTextRepresentationLogger::process(
    AnalysisContent& analysis) const {

    DUMPERLOGINIT;
    // get metadata
    LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
    if (metadata == 0) {
        LERROR << "no LinguisticMetaData ! abort";
        return MISSING_DATA;
    }
    // get the analysis graph
    AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
    if (anaGraph == 0) {
        LERROR << "no AnalysisGraph ! abort";
        return MISSING_DATA;
    }
    // get sentence boundaries
    SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries"));
    if (sb == 0) {
      LDEBUG << "LinearTextRepresentationDumper::process: no SentenceBounds available: ignored";
      // sentence bounds ignored: null pointer passed to LTRTextBuilder will be handled there
    }
    // build LTRText
    LTR_Text textRep;
    LTRTextBuilder builder(m_language, m_stopList);
    builder.buildLTRTextFrom(
        *(anaGraph->getGraph()),
        sb,
        anaGraph->firstVertex(),
        anaGraph->lastVertex(),
        &textRep,
        metadata->getStartOffset());

    // write LTR_Text
    string textFileName = metadata->getMetaData("FileName");
    string outputFile = textFileName + m_outputSuffix;
    ofstream out(outputFile.c_str(), std::ofstream::binary);
    if (!out.good()) {
        throw runtime_error("can't open file " + outputFile);
    }
    textRep.binaryWriteOn(out);
    out.flush();
    out.close();
    return SUCCESS_ID;
}
Пример #10
0
bool CreateSemanticRelation::
operator()(const LinguisticAnalysisStructure::AnalysisGraph& anagraph,
           const LinguisticGraphVertex& vertex1,
           const LinguisticGraphVertex& vertex2,
           AnalysisContent& analysis ) const
{
  LIMA_UNUSED(anagraph);
  SemanticRelationData * semanticData=static_cast<SemanticRelationData*>(analysis.getData("SemanticRelationData"));
  if (semanticData==0)
  {
    semanticData=new SemanticRelationData();
    analysis.setData("SemanticRelationData",semanticData);
    
  }
  
  return semanticData->relation(vertex1,vertex2,m_semanticRelationType);

}
Пример #11
0
void SegmentFeatureEntity::
update(const AnalysisContent& analysis) {
  m_annotationData = static_cast<const AnnotationData*>(analysis.getData("AnnotationData"));
  if (m_annotationData==0)
  {
    LOGINIT("LP::Segmentation");
    LERROR << "no annotation graph available !" << LENDL;
  }
}
Пример #12
0
void SegmentFeatureRank::
update(const AnalysisContent& analysis) {
  // store information from segmentation data to know at which segment we are
  const AnalysisData* data=analysis.getData(m_data);
  if (data==0) {
    return;
  }
  m_segmData=static_cast<const SegmentationData*>(data);
}
Пример #13
0
LimaStatusCode EventTemplateDataDumper::process(AnalysisContent& analysis) const
{
  LOGINIT("LP::EventAnalysis");
  LDEBUG << "EventTemplateDataDumper::process" << LENDL;
  TimeUtils::updateCurrentTime();

  // initialize output
  DumperStream* dstream=AbstractTextualAnalysisDumper::initialize(analysis);
  ostream& out=dstream->out();

  const AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData==0)
  {
    LERROR << "no annotation graph available !" << LENDL;
    return MISSING_DATA;
  }
  
  if (! m_eventData.empty()) {
    const AnalysisData* data =analysis.getData(m_eventData);
    if (data!=0) {
      // see if the data is of type Events
      const EventTemplateData* eventData=dynamic_cast<const EventTemplateData*>(data);
      if (eventData==0) {
        LOGINIT("LP::EventAnalysis");
        LERROR << "data '" << m_eventData << "' is neither of type EventData nor Events" << LENDL;
        return MISSING_DATA;
      }
      else {
        Events *events=eventData->convertToEvents(annotationData);
        events->write(out);
      }
    }
    else {
      LOGINIT("LP::EventAnalysis");
      LERROR << "no data of name " << m_eventData << LENDL;
    }
  }
  
  delete dstream;
  TimeUtils::logElapsedTime("EventTemplateDataDumper");
  return SUCCESS_ID;
}
Пример #14
0
bool SaveSemanticRelation::operator()(AnalysisContent& analysis ) const
{
  SemanticRelationData * semanticData=static_cast<SemanticRelationData*>(analysis.getData("SemanticRelationData"));
  if (semanticData==0)
  {
    return false;
  }
  else
  {
    return semanticData->addRelations(analysis);
  }
}
Пример #15
0
void SegmentFeatureInSegment::
update(const AnalysisContent& analysis) 
{
  const AnalysisData* data=analysis.getData(m_segmentData);
  if (data==0) {
    LOGINIT("LP::Segmentation");
    LERROR << SegmentFeatureInSegment_ID << ": No data " << m_segmentData << LENDL;
    m_data=0;
  }
  else {
    m_data=static_cast<const SegmentationData*>(data);
  }
}
Пример #16
0
// Datas are extracted from word sense annotations and written on the xml file according to the given dtd format
LimaStatusCode WordSenseXmlLogger::process(
  AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime(); 
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0)
  {
    LOGINIT("WordSenseDisambiguator");
    LERROR << "no LinguisticMetaData ! abort";
    return MISSING_DATA;
  }

  string textFileName = metadata->getMetaData("FileName");
  string outputFile = textFileName + m_outputSuffix;
  ofstream out(outputFile.c_str(), std::ofstream::binary);
  if (!out.good()) {
      throw runtime_error("can't open file " + outputFile);
  }
   AnalysisGraph* /*anagraph=static_cast<AnalysisGraph*>(analysis.getData("SimpleGraph"));
   if (anagraph==0)*/
  
    anagraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
    if (anagraph==0)
    {
        LOGINIT("WordSenseDisambiguator");
        LERROR << "no AnalysisGraph ! abort";
        return MISSING_DATA;
    }
  

  dump(out, anagraph,/* static_cast<SyntacticData*>(analysis.getData("SyntacticData")),*/ static_cast<AnnotationData*>(analysis.getData("AnnotationData")));
  out.flush();
  out.close();  
  TimeUtils::logElapsedTime("WordSenseDisambiguatorXmlLogger");
  return SUCCESS_ID;
}
Пример #17
0
bool SaveSemanticRelation::operator()(AnalysisContent& analysis ) const
{
#ifdef DEBUG_LP
  SEMLOGINIT;
  LDEBUG << "SaveSemanticRelation::operator()";
#endif
  SemanticRelationData * semanticData=static_cast<SemanticRelationData*>(analysis.getData("SemanticRelationData"));
  if (semanticData==0)
  {
    return false;
  }
  else
  {
    return semanticData->addRelations(analysis);
  }
}
Пример #18
0
void SegmentFeatureEntityInData::
update(const AnalysisContent& analysis) {

  // get result data
  const AnalysisData* resultData=analysis.getData(m_dataName);
  if (resultData == 0) {
    LOGINIT("LP::Segmentation");
    LERROR << "no data " << m_data << "in AnalysisContent" << LENDL;
  }
  m_data=dynamic_cast<const ApplyRecognizer::RecognizerResultData*>(resultData);
  if (m_data == 0) {
    LOGINIT("LP::Segmentation");
    LERROR << "data " << m_data << "in AnalysisContent is not a RecognizerResultData" << LENDL;
  }
  
}
Пример #19
0
void SegmentFeaturePosition::
update(const AnalysisContent& analysis) {
  // update offset from metadata
  const LinguisticMetaData* metadata=static_cast<const LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
    LOGINIT("LP::Segmentation");
    LWARN << "no LinguisticMetaData ! abort" << LENDL;
  }
  else {
    try {
      m_offset=atoi(metadata->getMetaData("StartOffset").c_str());
    }
    catch (LinguisticProcessingException& ) {
      // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
    }
  }
}
Пример #20
0
bool EntityGroupTransition::
compare(const LinguisticAnalysisStructure::AnalysisGraph& graph,
        const LinguisticGraphVertex& v,
        AnalysisContent& analysis,
        const LinguisticAnalysisStructure::Token* /*token*/,
        const LinguisticAnalysisStructure::MorphoSyntacticData* /*data*/) const
{
  // should compare to vertex ?
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData==0) {
    AULOGINIT;
    LDEBUG << "EntityGroupTransition::compare: no annotation graph available !";
    return false;
  }

  // find annotationGraphVertex matching the vertex of the current graph
  std::set<AnnotationGraphVertex> matches = annotationData->matches(graph.getGraphId(), v, "annot");
  if (matches.empty())
  {
    AULOGINIT;
    LDEBUG << "annotation ("<<graph.getGraphId()<<", "<<v<<", \"annot\") available";
    return false;
  }
  AnnotationGraphVertex annotVertex = *(matches.begin());

  if (!annotationData->hasAnnotation(annotVertex, m_entityAnnotation))
  {
    AULOGINIT;
    LDEBUG << "EntityGroupTransition::compare: No " << m_entityAnnotation << " annotation available on " << v;
    return false;
  }
  
  const SpecificEntityAnnotation* se =
    annotationData->annotation(annotVertex, m_entityAnnotation).
    pointerValue<SpecificEntityAnnotation>();
  Common::MediaticData::EntityType type = se->getType();
  AULOGINIT;
  LDEBUG << "EntityGroupTransition::compare: type = " << type << ", groupId = " << type.getGroupId();
  LDEBUG << "EntityGroupTransition::compare: m_entityGroupId = " << m_entityGroupId;
  LDEBUG << "EntityGroupTransition::compare: tests m_entityGroupId == type.getGroupId() = " << (m_entityGroupId == type.getGroupId());
  return( m_entityGroupId == type.getGroupId() );
}
bool StoreForDisambiguation::operator()(
  const LinguisticAnalysisStructure::AnalysisGraph& graph,
  const LinguisticGraphVertex& v1,
  const LinguisticGraphVertex& v2,
  AnalysisContent& analysis ) const
{
/*
  Critical Function : comment logging messages
*/
  SyntacticData* syntacticData=static_cast<SyntacticData*>(analysis.getData("SyntacticData"));

  if (v1 == graph.firstVertex() || v1 == graph.lastVertex() ||
    v2 == graph.firstVertex() || v2 == graph.lastVertex() )
  {
    //     LDEBUG << "SecondUngovernedBy: false" << LENDL;
    return false;
  }
  SAPLOGINIT;
  LDEBUG << "StoreForDisambiguation " << v1 << ", " << v2 << ", " << m_relation << LENDL;
  syntacticData->storeRelationForSelectionalConstraint(v1, v2, m_relation);
  return true;
}
Пример #22
0
LimaStatusCode SpecificEntitiesXmlLogger::process(
  AnalysisContent& analysis) const
{
  SELOGINIT;
  LDEBUG << "SpecificEntitiesXmlLogger::process";
  TimeUtils::updateCurrentTime();

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData == 0) {
    SELOGINIT;
    LERROR << "no annotationData ! abort";
    return MISSING_DATA;
  }
  
  
  LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph));
  if (graphp == 0) {
    SELOGINIT;
    LERROR << "no graph "<< m_graph <<" ! abort";
    return MISSING_DATA;
  }
  const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp;
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SELOGINIT;
      LERROR << "no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }

  DumperStream* dstream=initialize(analysis);
  ostream& out=dstream->out();

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  if (m_compactFormat) {
    out << "<entities docid=\"" << docId
    << "\" offsetNode=\"" << offsetIndexingNode 
    << "\" offset=\"" << offset
    << "\">" << endl;
  }
  else {
    out << "<specific_entities>" << endl;
  }
//   SELOGINIT;

  if (m_followGraph) {
    // instead of looking to all annotations, follow the graph (in
    // morphological graph, some vertices are not related to main graph:
    // idiomatic expressions parts and named entity parts)
    // -> this will not include nested entities

    AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
    if (tokenList==0) {
      LERROR << "graph " << m_graph << " has not been produced: check pipeline";
      return MISSING_DATA;
    }
    LinguisticGraph* graph=tokenList->getGraph();
    //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language);
    
    std::queue<LinguisticGraphVertex> toVisit;
    std::set<LinguisticGraphVertex> visited;
    toVisit.push(tokenList->firstVertex());
    
    LinguisticGraphOutEdgeIt outItr,outItrEnd;
    while (!toVisit.empty()) {
      LinguisticGraphVertex v=toVisit.front();
      toVisit.pop();
      if (v == tokenList->lastVertex()) {
        continue;
      }
      
      for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) 
      {
        LinguisticGraphVertex next=target(*outItr,*graph);
        if (visited.find(next)==visited.end())
        {
          visited.insert(next);
          toVisit.push(next);
        }
      }
      const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData);
      if (annot != 0) {
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }
  else {
    // take all annotations
    AnnotationGraphVertexIt itv, itv_end;
    boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
    for (; itv != itv_end; itv++)
    {
      //     LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv;
      if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")))
      {
        //       LDEBUG << "    it has SpecificEntityAnnotation";
        const SpecificEntityAnnotation* annot = 0;
        try
        {
          annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))
          .pointerValue<SpecificEntityAnnotation>();
        }
        catch (const boost::bad_any_cast& )
        {
          SELOGINIT;
          LERROR << "This annotation is not a SpecificEntity; SE not logged";
          continue;
        }
        
        // recuperer l'id du vertex morph cree
        LinguisticGraphVertex v;
        if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)))
        {
          //         SELOGINIT;
          //         LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it.";
          continue;
        }
        v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph));
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }   
  
  //   LDEBUG << "    all vertices done";
  if (m_compactFormat) {
    out << "</entities>" << endl;
  }
  else {
    out << "</specific_entities>" << endl;
  }
  delete dstream;
  TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger");
  return SUCCESS_ID;
  
}
Пример #23
0
LimaStatusCode SimpleEventBuilder::process(AnalysisContent& analysis) const
{
  EVENTANALYZERLOGINIT;
  TimeUtils::updateCurrentTime();
  LDEBUG << "start SimpleEventBuilder" << LENDL;

  // get annotation data (for entities)
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData==0)
  {
    LERROR << "no annotation graph available !" << LENDL;
    return MISSING_DATA;
  }
    
  // get segmentation data
  AnalysisData* data=analysis.getData(m_segmData);
  if (data==0) {
    LERROR << "Missing data '" << m_segmData << "'" << LENDL;
    return MISSING_DATA;
  }
  SegmentationData* segmData=static_cast<SegmentationData*>(data);
  if (segmData==0)
  {
    LERROR << "Failed to interpret data '" << m_segmData << "' as SegmentationData" << LENDL;
    return MISSING_DATA;
  }

  // get graph on which the segmentation data relies
  string graphId=segmData->getGraphId();
  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(graphId));
  if (graph==0) {
    LERROR << "Cannot get graph '" << graphId << "' (from segmentation data)" << LENDL;
    return MISSING_DATA;
  }
  
  EventData* eventData=new EventData;
  LDEBUG << "set new data EventData of type EventData" << LENDL;
  analysis.setData("EventData", eventData);

  // get entities
  map<Common::MediaticData::EntityType,vector<Entity> >& entities=eventData->getEntities();
  // ??OME2 for (SegmentationData::const_iterator it=segmData->begin(),it_end=segmData->end();it!=it_end;it++) {
  for (std::vector<Segment>::const_iterator it=(segmData->getSegments()).begin(),it_end=(segmData->getSegments()).end();it!=it_end;it++) {
    if ((*it).getType()==m_segmentType) {
      LDEBUG << "in segment " << m_segmentType << " [" << (*it).getPosBegin() << "," << (*it).getLength() << "]" << LENDL;
      // get entities in this segment
      getEntitiesFromSegment(entities,graph,(*it).getFirstVertex(),(*it).getLastVertex(),annotationData);
      LDEBUG << "found " << entities.size() << " entities" << LENDL;
    }
    else {
      LDEBUG << "ignored segment " << (*it).getType() << LENDL;
    }
  }

  // choose main entities : take first
  for (map<Common::MediaticData::EntityType,vector<Entity> >::iterator it=entities.begin(),
    it_end=entities.end();it!=it_end;it++) {
    if ((*it).second.size()!=0) {
      LDEBUG << "set main for entity of type " << (*it).first << " at pos " << (*it).second[0].getPosition() << LENDL;
      (*it).second[0].setMain(true);
    }
  }


  TimeUtils::logElapsedTime("SimpleEventBuilder");
  return SUCCESS_ID;
}
Пример #24
0
LimaStatusCode SegmentationDataXmlLogger::process(
  AnalysisContent& analysis) const
{
  LOGINIT("LP::Segmentation");
  LDEBUG << "SegmentationDataXmlLogger::process" << LENDL;
  TimeUtils::updateCurrentTime();

  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      LOGINIT("LP::Segmentation");
      LERROR << "no LinguisticMetaData ! abort" << LENDL;
      return MISSING_DATA;
  }

  // open output file
  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    LOGINIT("LP::Segmentation");
    LERROR << "Can't open log file '" << metadata->getMetaData("FileName") << "'" << LENDL;
    return UNKNOWN_ERROR;
  }

  // get metadata (useful for XML documents)
//   uint64_t offset(0);
//   uint64_t offsetIndexingNode(0);
  std::string docId("");
  try {
//     offset=atoi(metadata->getMetaData("StartOffset").c_str());
//     offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  // log 
  out << "<segmentation>" << endl;
  const AnalysisData* data =analysis.getData(m_data);
  if (data!=0) {
    const SegmentationData* segData=static_cast<const SegmentationData*>(data);
    const vector<Segment> seg=segData->getSegments();
    for (vector<Segment>::const_iterator it=seg.begin(), it_end=seg.end(); it!=it_end; it++) {
      out 
        << "<segment>" 
        << "<pos>" << (*it).getPosBegin() << "</pos>" 
        << "<len>" << (*it).getLength() << "</len>"
        << "<type>" << (*it).getType() << "</type>"
        << "</segment>" 
        << endl;
    }
  }
  else {
    LOGINIT("LP::Segmentation");
    LDEBUG << "no SegmentationData of name " << m_data << LENDL;
  }
  
  out << "</segmentation>" << endl;
  out.close();

  TimeUtils::logElapsedTime("SegmentationDataXmlLogger");
  return SUCCESS_ID;
}
Пример #25
0
std::string FeatureLemmaSpecificEntity::
getValue(const LinguisticAnalysisStructure::AnalysisGraph* graph, 
         LinguisticGraphVertex v,
         AnalysisContent &analysis
) const
{
  std::string mxvalue("NAN");
  Common::AnnotationGraphs::AnnotationData *annot = static_cast<  Common::AnnotationGraphs::AnnotationData* >(analysis.getData("AnnotationData"));
  
  std::set< AnnotationGraphVertex > matches = annot->matches(graph->getGraphId(),v,"annot"); 
  for (std::set< AnnotationGraphVertex >::const_iterator it = matches.begin(); it != matches.end(); it++)
  {
    if (annot->hasAnnotation(*it, Common::Misc::utf8stdstring2limastring("SpecificEntity")))
    {
      AnnotationGraphVertex vx=*it;
      const SpecificEntityAnnotation* se = annot->annotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity")).
      pointerValue<SpecificEntityAnnotation>();
      
      LimaString str= Common::MediaticData::MediaticData::single().getEntityName(se->getType());
      mxvalue=Common::Misc::limastring2utf8stdstring(str);
    }
  }
  // replace NAN values by lemmas
  if (mxvalue == "NAN") {
    MorphoSyntacticData* data=get(vertex_data,*(graph->getGraph()),v);
    // take first
    for (MorphoSyntacticData::const_iterator it=data->begin(),it_end=data->end();it!=it_end;it++) {
      mxvalue = Common::Misc::limastring2utf8stdstring((*&(Common::MediaticData::MediaticData::single().stringsPool(m_language)))[(*it).normalizedForm]);
      break;
    }
  }
  // replace empty lemma values by tokens
  if (mxvalue == "" ) {
    Token* token=get(vertex_token,*(graph->getGraph()),v);
    mxvalue = Common::Misc::limastring2utf8stdstring(token->stringForm());
  }
  
  return mxvalue;
}
Пример #26
0
LimaStatusCode ParagraphBoundariesFinder::process(
  AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  SENTBOUNDLOGINIT;
  LINFO << "start finding paragraph founds";
  
  // find paragraphs in text (positions of double carriage returns),
  // then find corresponding vertices in graph

  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph==0) {
    LERROR << "no graph '" << m_graph << "' available !";
    return MISSING_DATA;
  }
  SegmentationData* boundaries=new SegmentationData(m_graph);
  analysis.setData("ParagraphBoundaries",boundaries);

  LimaStringText* text=static_cast<LimaStringText*>(analysis.getData("Text"));
  
  std::vector<uint64_t> paragraphPositions;
  int currentPos=0;
  int i=text->indexOf(m_paragraphSeparator,currentPos);
  while (i!=-1) {
    paragraphPositions.push_back((uint64_t)i);
    // goto next char that is not a carriage return
    currentPos=text->indexOf(QRegExp(QString(QLatin1String("[^%1]")).arg(m_paragraphSeparator)),i+1);
    i=text->indexOf(m_paragraphSeparator,currentPos);
  }

  if (paragraphPositions.empty()) {
    LWARN << "no paragraph found";
    return SUCCESS_ID;
  }

  // find vertices related to positions in graph
  uint64_t parNum=0;
  std::deque<LinguisticGraphVertex> toVisit;
  std::set<LinguisticGraphVertex> visited;

  LinguisticGraphVertex beginParagraph=graph->firstVertex();

  toVisit.push_back(graph->firstVertex());
  visited.insert(graph->firstVertex());

  while (!toVisit.empty())
  {
    LinguisticGraphVertex currentVertex=toVisit.front();
    toVisit.pop_front();

    if (currentVertex == graph->lastVertex()) { // end of the graph
      continue;  // may be other nodes to test in queue
    }

    if (currentVertex != graph->firstVertex()) {
      Token* t = get(vertex_token,*(graph->getGraph()),currentVertex);
      uint64_t position=t->position();
      if (position >= (paragraphPositions[parNum]+1)) {
        boundaries->add(Segment("paragraph",beginParagraph,currentVertex,graph));
        beginParagraph=currentVertex;
        parNum++;
        if (parNum >= paragraphPositions.size()) {
          break;
        }
      }
    }

    // store following nodes to test
    LinguisticGraphOutEdgeIt outEdge,outEdge_end;
    boost::tie(outEdge,outEdge_end)=out_edges(currentVertex,*(graph->getGraph()));

    for (; outEdge!=outEdge_end; outEdge++) {
      LinguisticGraphVertex next=target(*outEdge,*(graph->getGraph()));
      if (visited.find(next)==visited.end()) {
        toVisit.push_back(next);
        visited.insert(next);
      }
    }
  }
  
  TimeUtils::logElapsedTime("ParagraphBoundariesFinder");
  return SUCCESS_ID;
}
Пример #27
0
LimaStatusCode ExampleLoader::process(AnalysisContent& analysis) const
{
  // get linguistic graph
  AnalysisGraph* anaGraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
  LinguisticGraph* lingGraph=anaGraph->getGraph();
  if (lingGraph==0)
  {
    PROCESSORSLOGINIT;
    LERROR << "no graph 'PosGraph' available !";
    return MISSING_DATA;
  }
  else{
    try{
      ExampleLoader::XMLHandler handler(m_language,analysis,anaGraph);
      m_parser->setContentHandler(&handler);
      m_parser->setErrorHandler(&handler);
      QFile file("/tmp/mm-lp.morphoSyntacticalAnalysis-changed.tmp");
      if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
        throw XMLException();
      if (!m_parser->parse( QXmlInputSource(&file)))
      {
        throw XMLException();
      }
      LinguisticGraph::vertex_iterator vxItr,vxItrEnd;
      boost::tie(vxItr,vxItrEnd) = boost::vertices(*lingGraph);
      for (;vxItr!=vxItrEnd;vxItr++){
       MorphoSyntacticData* morphoData=get(vertex_data,*lingGraph, *vxItr);
        Token* ft=get(vertex_token,*lingGraph,*vxItr);
        if( ft!=0){
          const QString tag=QString::fromStdString(static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getPropertyCodeManager().getPropertyManager("MICRO").getPropertySymbolicValue(handler.m_tagIndex[ft->position()]));

          const Common::PropertyCode::PropertyCodeManager& codeManager=static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getPropertyCodeManager();
          const Common::PropertyCode::PropertyAccessor m_propertyAccessor=codeManager.getPropertyAccessor("MICRO");

          const QString graphTag=QString::fromStdString(static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getPropertyCodeManager().getPropertyManager("MICRO").getPropertySymbolicValue(morphoData->firstValue(m_propertyAccessor)));

          cout << " la premiere categorie de  " << ft->stringForm() << " est " << graphTag << endl;
          //si différence entre valeur de la map et noeud du graphe à la position n, remplacer la valeur du noeud //par la valeur de la map
          if(tag!=graphTag){
            const QString tagBefore=QString::fromStdString(static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getPropertyCodeManager().getPropertyManager("MICRO").getPropertySymbolicValue(morphoData->at(0).properties));
            
            cout << "le token a la position " << ft->position() << " passe de " << morphoData->at(0).properties  << endl;
            morphoData->at(0).properties=handler.m_tagIndex[ft->position()];
            cout << " a la position " << morphoData->at(0).properties << endl;
            
            const QString tagAfter=QString::fromStdString(static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(m_language)).getPropertyCodeManager().getPropertyManager("MICRO").getPropertySymbolicValue(morphoData->at(0).properties));
            
            cout << "Et la chaîne passe de " << tagBefore << " à " << tagAfter << endl;
           
           //LinguisticCode lc = morphoData->at(0).properties;
           
           put(vertex_data, *lingGraph, *vxItr, morphoData);
           cout << " a la position " << morphoData->at(0).properties << endl;
           }
        }
      }
    }
    catch (const XMLException& ){
      PROCESSORSLOGINIT;
      LERROR << "Error: failed to parse XML input file";
    }
     return SUCCESS_ID;
  }
}
Пример #28
0
LimaStatusCode SyntacticAnalysisXmlLogger::process(
    AnalysisContent& analysis) const
{
    TimeUtils::updateCurrentTime();

    LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
    if (metadata == 0) {
        SALOGINIT;
        LERROR << "no LinguisticMetaData ! abort" << LENDL;
        return MISSING_DATA;
    }

    DumperStream* dstream=initialize(analysis);
    std::ostream& outputStream=dstream->out();
    /*std::ofstream outputStream;
    if (!openLogFile(outputStream,metadata->getMetaData("FileName")))
    {
      SALOGINIT;
      LERROR << "Can't open log file " << LENDL;
      return CANNOT_OPEN_FILE_ERROR;
    }*/

    SALOGINIT;

    const SyntacticData* syntacticData=static_cast<const SyntacticData*>(analysis.getData("SyntacticData"));
    if (syntacticData==0)
    {
        LERROR << "no SyntacticData ! abort" << LENDL;
        return MISSING_DATA;
    }

    AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
    if (anagraph==0)
    {
        LERROR << "no AnalysisGraph ! abort" << LENDL;
        return MISSING_DATA;
    }
    SegmentationData* sb=static_cast<SegmentationData*>(analysis.getData("SentenceBoundaries"));
    if (sb==0)
    {
        LERROR << "no SentenceBounds ! abort" << LENDL;
        return MISSING_DATA;
    }
//  LinguisticGraph* graph=anagraph->getGraph();

    std::set< std::pair<size_t, size_t> > alreadyDumped;

    outputStream << "<?xml version='1.0' encoding='UTF-8'?>" << std::endl;
    outputStream << "<syntactic_analysis_dump>" << std::endl;


    // ??OME2 SegmentationData::iterator sbItr=sb->begin();
    std::vector<Segment>::iterator sbItr=(sb->getSegments()).begin();

    while (sbItr!=(sb->getSegments()).end())
    {
        LinguisticGraphVertex beginSentence=sbItr->getFirstVertex();
        LinguisticGraphVertex endSentence=sbItr->getLastVertex();

        dumpLimaData(outputStream,
                     beginSentence,
                     endSentence,
                     anagraph,
                     syntacticData);

        sbItr++;
    }

    outputStream << "</syntactic_analysis_dump>" << std::endl;
    delete dstream;
    TimeUtils::logElapsedTime("SyntacticAnalysisXmlLogger");
    return SUCCESS_ID;
}
Пример #29
0
LimaStatusCode SemanticRelationsXmlLogger::
process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  
  SEMLOGINIT;
  LERROR << "SemanticRelationsXmlLogger" << LENDL;
    
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  
  const LinguisticAnalysisStructure::AnalysisGraph& graph = 
    *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)));
  
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SEMLOGINIT;
      LERROR << "no LinguisticMetaData ! abort" << LENDL;
      return MISSING_DATA;
  }

  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    SEMLOGINIT;
    LERROR << "Can't open log file " << LENDL;
    return UNKNOWN_ERROR;
  }

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  out << "<relations docid=\"" << docId
      << "\" offsetNode=\"" << offsetIndexingNode 
      << "\">" << endl;

//   LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL;
  
  //look at all vertices for annotations
  AnnotationGraphVertexIt itv, itv_end;
  boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
  for (; itv != itv_end; itv++)
  {
    LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL;
    if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")))
    {
//       LDEBUG << "    it has SemanticRelationAnnotation" << LENDL;
      const SemanticAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))
          .pointerValue<SemanticAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticRelation" << LENDL;
        continue;
      }

      // output
      out << "<annotation type=\"" << annot->getType() << "\">" << endl
          << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset)
          << "</annotation>" << endl;
    }
  }

  // look at all edges for relations
  AnnotationGraphEdgeIt it,it_end;
  const AnnotationGraph& annotGraph=annotationData->getGraph();
  boost::tie(it, it_end) = edges(annotGraph);
  for (; it != it_end; it++) {
    LDEBUG << "SemanticRelationsXmlLogger on annotation edge " 
           << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL;
    if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")))
    {
      SEMLOGINIT;
      LDEBUG << "found semantic relation" << LENDL;
      const SemanticRelationAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))
          .pointerValue<SemanticRelationAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticAnnotation" << LENDL;
        continue;
      }

      //output
      out << "<relation type=\"" << annot->type() << "\">" << endl
          << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset)
          << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset)
          << "</relation>" << endl;
      
    }
  }

//   LDEBUG << "    all vertices done" << LENDL;
  out << "</relations>" << endl;
  out.close();

  TimeUtils::logElapsedTime("SemanticRelationsXmlLogger");
  return SUCCESS_ID;
}
Пример #30
0
std::string FeatureSpecificEntity::
getValue(const LinguisticAnalysisStructure::AnalysisGraph* graph, 
         LinguisticGraphVertex v,
         AnalysisContent &analysis
        ) const
{
  std::string typeName("NAN");
  Common::AnnotationGraphs::AnnotationData *annot = static_cast<  Common::AnnotationGraphs::AnnotationData* >(analysis.getData("AnnotationData"));
  
  std::set< AnnotationGraphVertex > matches = annot->matches(graph->getGraphId(),v,"annot"); 
  for (std::set< AnnotationGraphVertex >::const_iterator it = matches.begin(); it != matches.end(); it++)
  {
    if (annot->hasAnnotation(*it, Common::Misc::utf8stdstring2limastring("SpecificEntity")))
    {
      AnnotationGraphVertex vx=*it;
      const SpecificEntityAnnotation* se = annot->annotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity")).
      pointerValue<SpecificEntityAnnotation>();
      
      LimaString str= Common::MediaticData::MediaticData::single().getEntityName(se->getType());
      typeName=Common::Misc::limastring2utf8stdstring(str);
    }
  }
  return typeName;
}