Example #1
0
LimaStatusCode SyntacticAnalysisXmlLogger::process(
    AnalysisContent& analysis) const
{
    TimeUtils::updateCurrentTime();

    LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
    if (metadata == 0) {
        SALOGINIT;
        LERROR << "no LinguisticMetaData ! abort" << LENDL;
        return MISSING_DATA;
    }

    DumperStream* dstream=initialize(analysis);
    std::ostream& outputStream=dstream->out();
    /*std::ofstream outputStream;
    if (!openLogFile(outputStream,metadata->getMetaData("FileName")))
    {
      SALOGINIT;
      LERROR << "Can't open log file " << LENDL;
      return CANNOT_OPEN_FILE_ERROR;
    }*/

    SALOGINIT;

    const SyntacticData* syntacticData=static_cast<const SyntacticData*>(analysis.getData("SyntacticData"));
    if (syntacticData==0)
    {
        LERROR << "no SyntacticData ! abort" << LENDL;
        return MISSING_DATA;
    }

    AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
    if (anagraph==0)
    {
        LERROR << "no AnalysisGraph ! abort" << LENDL;
        return MISSING_DATA;
    }
    SegmentationData* sb=static_cast<SegmentationData*>(analysis.getData("SentenceBoundaries"));
    if (sb==0)
    {
        LERROR << "no SentenceBounds ! abort" << LENDL;
        return MISSING_DATA;
    }
//  LinguisticGraph* graph=anagraph->getGraph();

    std::set< std::pair<size_t, size_t> > alreadyDumped;

    outputStream << "<?xml version='1.0' encoding='UTF-8'?>" << std::endl;
    outputStream << "<syntactic_analysis_dump>" << std::endl;


    // ??OME2 SegmentationData::iterator sbItr=sb->begin();
    std::vector<Segment>::iterator sbItr=(sb->getSegments()).begin();

    while (sbItr!=(sb->getSegments()).end())
    {
        LinguisticGraphVertex beginSentence=sbItr->getFirstVertex();
        LinguisticGraphVertex endSentence=sbItr->getLastVertex();

        dumpLimaData(outputStream,
                     beginSentence,
                     endSentence,
                     anagraph,
                     syntacticData);

        sbItr++;
    }

    outputStream << "</syntactic_analysis_dump>" << std::endl;
    delete dstream;
    TimeUtils::logElapsedTime("SyntacticAnalysisXmlLogger");
    return SUCCESS_ID;
}
Example #2
0
LimaStatusCode EasyXmlDumper::process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  DUMPERLOGINIT;

  LinguisticMetaData* metadata = static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
    LERROR << "EasyXmlDumper::process no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }
  string filename = metadata->getMetaData("FileName");
  LDEBUG << "EasyXmlDumper::process Filename: " << filename;

  LDEBUG << "handler will be: " << m_handler;
//   MediaId langid = static_cast<const  Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia();
  AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer"));
  AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler));
  if (handler==0)
  {
    LERROR << "EasyXmlDumper::process: handler " << m_handler << " has not been given to the core client";
    return MISSING_DATA;
  }
  
  AnalysisGraph* graph = static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph == 0)
  {
    graph = new AnalysisGraph(m_graph,m_language,true,true);
    analysis.setData(m_graph,graph);
  }

  SyntacticData* syntacticData = static_cast<SyntacticData*>(analysis.getData("SyntacticData"));
  if (syntacticData == 0)
  {
    syntacticData = new SyntacticAnalysis::SyntacticData(static_cast<AnalysisGraph*>(analysis.getData(m_graph)),0);
    syntacticData->setupDependencyGraph();
    analysis.setData("SyntacticData",syntacticData);
  }

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData == 0)
  {
    annotationData = new AnnotationData();
    if (static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")) != 0)
    {
      static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"))->populateAnnotationGraph(annotationData, "AnalysisGraph");
    }
    analysis.setData("AnnotationData",annotationData);
  }

  handler->startAnalysis();
  HandlerStreamBuf hsb(handler);
  std::ostream outputStream(&hsb);

  LDEBUG << "EasyXmlDumper:: process before printing heading";
  AnalysisGraph* anaGraph = static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"));
  AnalysisGraph* posGraph = static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
  if (anaGraph != 0 && posGraph != 0)
  {
    LDEBUG << "EasyXmlDumper:: begin of posgraph";
    std::vector< bool > alreadyDumpedTokens;
    std::map< LinguisticAnalysisStructure::Token*, uint64_t > fullTokens;
    LinguisticGraphVertexIt i, i_end;
    uint64_t id = 0;
    alreadyDumpedTokens.resize(num_vertices(*posGraph->getGraph()));
    for (boost::tie(i, i_end) = vertices(*posGraph->getGraph()); i != i_end; ++i)
    {
      LDEBUG << "EasyXmlDumper:: examine posgraph for " << id;
      alreadyDumpedTokens[id] = false;
      fullTokens[get(vertex_token, *posGraph->getGraph(), *i)] = id;
      id++;
    }
    /* No need for sentence boundaries in Easy input
    LinguisticGraphVertex sentenceBegin = sb->getStartVertex();
    SegmentationData::iterator sbItr = sb->begin();
    LinguisticGraphVertex sentenceBegin = sb->getStartVertex();
    SegmentationData::iterator sbItr = sb->begin();
    */
    LinguisticGraphVertex sentenceBegin = posGraph->firstVertex();
    LinguisticGraphVertex sentenceEnd = posGraph->lastVertex();
    string sentIdPrefix;
    try {
      sentIdPrefix = metadata->getMetaData("docid");
      LDEBUG << "EasyXmlDumper:: retrieve sentence id " << sentIdPrefix;
    }catch (LinguisticProcessingException& ) {
      sentIdPrefix = "";
    }
    if(sentIdPrefix.length() <= 0)
      sentIdPrefix = "E";
    /* No need for sentence boundaries in Easy input
    while (sbItr != sb->end())
    {
      LinguisticGraphVertex sentenceEnd = *sbItr;
    */
    LDEBUG << "EasyXmlDumper:: inside posgraph while ";
    dumpLimaData(outputStream,
                  sentenceBegin,
                  sentenceEnd,
                  *anaGraph,
                  *posGraph,
                  *annotationData,
                  *syntacticData,
                  "PosGraph",
                  alreadyDumpedTokens,
                  fullTokens,
                  sentIdPrefix);
    /* No need for sentence boundaries in Easy input
      sentenceBegin = sentenceEnd;
      sbItr++;
    }
    */
    LDEBUG << "EasyXmlDumper:: end of posgraph";
  }

  return SUCCESS_ID;
}