LimaStatusCode SyntacticAnalysisXmlLogger::process( AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SALOGINIT; LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } DumperStream* dstream=initialize(analysis); std::ostream& outputStream=dstream->out(); /*std::ofstream outputStream; if (!openLogFile(outputStream,metadata->getMetaData("FileName"))) { SALOGINIT; LERROR << "Can't open log file " << LENDL; return CANNOT_OPEN_FILE_ERROR; }*/ SALOGINIT; const SyntacticData* syntacticData=static_cast<const SyntacticData*>(analysis.getData("SyntacticData")); if (syntacticData==0) { LERROR << "no SyntacticData ! abort" << LENDL; return MISSING_DATA; } AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anagraph==0) { LERROR << "no AnalysisGraph ! abort" << LENDL; return MISSING_DATA; } SegmentationData* sb=static_cast<SegmentationData*>(analysis.getData("SentenceBoundaries")); if (sb==0) { LERROR << "no SentenceBounds ! abort" << LENDL; return MISSING_DATA; } // LinguisticGraph* graph=anagraph->getGraph(); std::set< std::pair<size_t, size_t> > alreadyDumped; outputStream << "<?xml version='1.0' encoding='UTF-8'?>" << std::endl; outputStream << "<syntactic_analysis_dump>" << std::endl; // ??OME2 SegmentationData::iterator sbItr=sb->begin(); std::vector<Segment>::iterator sbItr=(sb->getSegments()).begin(); while (sbItr!=(sb->getSegments()).end()) { LinguisticGraphVertex beginSentence=sbItr->getFirstVertex(); LinguisticGraphVertex endSentence=sbItr->getLastVertex(); dumpLimaData(outputStream, beginSentence, endSentence, anagraph, syntacticData); sbItr++; } outputStream << "</syntactic_analysis_dump>" << std::endl; delete dstream; TimeUtils::logElapsedTime("SyntacticAnalysisXmlLogger"); return SUCCESS_ID; }
LimaStatusCode EasyXmlDumper::process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); DUMPERLOGINIT; LinguisticMetaData* metadata = static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "EasyXmlDumper::process no LinguisticMetaData ! abort"; return MISSING_DATA; } string filename = metadata->getMetaData("FileName"); LDEBUG << "EasyXmlDumper::process Filename: " << filename; LDEBUG << "handler will be: " << m_handler; // MediaId langid = static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia(); AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer")); AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler)); if (handler==0) { LERROR << "EasyXmlDumper::process: handler " << m_handler << " has not been given to the core client"; return MISSING_DATA; } AnalysisGraph* graph = static_cast<AnalysisGraph*>(analysis.getData(m_graph)); if (graph == 0) { graph = new AnalysisGraph(m_graph,m_language,true,true); analysis.setData(m_graph,graph); } SyntacticData* syntacticData = static_cast<SyntacticData*>(analysis.getData("SyntacticData")); if (syntacticData == 0) { syntacticData = new SyntacticAnalysis::SyntacticData(static_cast<AnalysisGraph*>(analysis.getData(m_graph)),0); syntacticData->setupDependencyGraph(); analysis.setData("SyntacticData",syntacticData); } AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData == 0) { annotationData = new AnnotationData(); if (static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")) != 0) { static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"))->populateAnnotationGraph(annotationData, "AnalysisGraph"); } analysis.setData("AnnotationData",annotationData); } handler->startAnalysis(); HandlerStreamBuf hsb(handler); std::ostream outputStream(&hsb); LDEBUG << "EasyXmlDumper:: process before printing heading"; AnalysisGraph* anaGraph = static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")); AnalysisGraph* posGraph = static_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph != 0 && posGraph != 0) { LDEBUG << "EasyXmlDumper:: begin of posgraph"; std::vector< bool > alreadyDumpedTokens; std::map< LinguisticAnalysisStructure::Token*, uint64_t > fullTokens; LinguisticGraphVertexIt i, i_end; uint64_t id = 0; alreadyDumpedTokens.resize(num_vertices(*posGraph->getGraph())); for (boost::tie(i, i_end) = vertices(*posGraph->getGraph()); i != i_end; ++i) { LDEBUG << "EasyXmlDumper:: examine posgraph for " << id; alreadyDumpedTokens[id] = false; fullTokens[get(vertex_token, *posGraph->getGraph(), *i)] = id; id++; } /* No need for sentence boundaries in Easy input LinguisticGraphVertex sentenceBegin = sb->getStartVertex(); SegmentationData::iterator sbItr = sb->begin(); LinguisticGraphVertex sentenceBegin = sb->getStartVertex(); SegmentationData::iterator sbItr = sb->begin(); */ LinguisticGraphVertex sentenceBegin = posGraph->firstVertex(); LinguisticGraphVertex sentenceEnd = posGraph->lastVertex(); string sentIdPrefix; try { sentIdPrefix = metadata->getMetaData("docid"); LDEBUG << "EasyXmlDumper:: retrieve sentence id " << sentIdPrefix; }catch (LinguisticProcessingException& ) { sentIdPrefix = ""; } if(sentIdPrefix.length() <= 0) sentIdPrefix = "E"; /* No need for sentence boundaries in Easy input while (sbItr != sb->end()) { LinguisticGraphVertex sentenceEnd = *sbItr; */ LDEBUG << "EasyXmlDumper:: inside posgraph while "; dumpLimaData(outputStream, sentenceBegin, sentenceEnd, *anaGraph, *posGraph, *annotationData, *syntacticData, "PosGraph", alreadyDumpedTokens, fullTokens, sentIdPrefix); /* No need for sentence boundaries in Easy input sentenceBegin = sentenceEnd; sbItr++; } */ LDEBUG << "EasyXmlDumper:: end of posgraph"; } return SUCCESS_ID; }