std::string Constraint:: checkStringDebug(const AnalysisGraph& graph, const LinguisticGraphVertex vertex) const { string functionName; LimaString complement; if (! ConstraintFunctionManager::single(). getFunctionName(m_functionAddr, functionName, complement)) { AULOGINIT; LERROR << "constraint function " << m_functionAddr << " not availale" << LENDL; } ostringstream oss; oss << "Constraint:" << actionString() << " vertex " << vertex << " (" << Common::Misc::limastring2utf8stdstring((get(vertex_token,*(graph.getGraph()),vertex))->stringForm()) << ")" << " in constraint " << m_index << ",compl=" << Common::Misc::limastring2utf8stdstring(complement) << "\", using function " << functionName; return oss.str(); }
void FullTokenXmlLogger::dump(std::ostream& xmlStream, AnalysisGraph& tTokenList) const { //LASLOGINIT; xmlStream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl; xmlStream << "<!--generated by MM project on "; // const uint64_t dateLen = strlen("Tue Oct 22 13:42:36 2002"); time_t aclock; time(&aclock); /* Get time in seconds */ std::string str(ctime(&aclock)); xmlStream << str; xmlStream << "-->" << std::endl; xmlStream << "<?xml-stylesheet type=\"text/xsl\" href=\"DataStructure.xslt\"?>" << std::endl; xmlStream << "<data_structure xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""; xmlStream << " xsi:noNamespaceSchemaLocation=\"DataStructure.xsd\">" << std::endl; // dump the graph const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language); DumpXMLVisitor vis(xmlStream,*m_propertyCodeManager,sp); breadth_first_search(*(tTokenList.getGraph()), tTokenList.firstVertex(), visitor(vis)); xmlStream << "</data_structure>" << std::endl; }
LimaStatusCode LinearTextRepresentationDumper::process( AnalysisContent& analysis) const { DUMPERLOGINIT; // get metadata LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "LinearTextRepresentationDumper::process: no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } // get the analysis graph AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph == 0) { LERROR << "LinearTextRepresentationDumper::process: no AnalysisGraph ! abort" << LENDL; return MISSING_DATA; } // get sentence boundaries SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries")); if (sb == 0) { LERROR << "LinearTextRepresentationDumper::process: no SentenceBounds ! abort" << LENDL; return MISSING_DATA; } // build LTRText LTR_Text textRep; LTRTextBuilder builder(m_language, m_stopList); builder.buildLTRTextFrom( *(anaGraph->getGraph()), sb, anaGraph->lastVertex(), &textRep, metadata->getStartOffset()); // write LTR_Text LDEBUG << "handler will be: " << m_handler << LENDL; // MediaId langid = static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia(); AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer")); AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler)); if (handler == 0) { LERROR << "LinearTextRepresentationDumper::process: handler " << m_handler << " has not been given to the core client" << LENDL; return MISSING_DATA; } handler->startAnalysis(); HandlerStreamBuf hsb(handler); ostream out(&hsb); LDEBUG << textRep << LENDL; textRep.binaryWriteOn(out); out.flush(); handler->endAnalysis(); return SUCCESS_ID; }
LimaStatusCode LinearTextRepresentationLogger::process( AnalysisContent& analysis) const { DUMPERLOGINIT; // get metadata LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "no LinguisticMetaData ! abort"; return MISSING_DATA; } // get the analysis graph AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph == 0) { LERROR << "no AnalysisGraph ! abort"; return MISSING_DATA; } // get sentence boundaries SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries")); if (sb == 0) { LDEBUG << "LinearTextRepresentationDumper::process: no SentenceBounds available: ignored"; // sentence bounds ignored: null pointer passed to LTRTextBuilder will be handled there } // build LTRText LTR_Text textRep; LTRTextBuilder builder(m_language, m_stopList); builder.buildLTRTextFrom( *(anaGraph->getGraph()), sb, anaGraph->firstVertex(), anaGraph->lastVertex(), &textRep, metadata->getStartOffset()); // write LTR_Text string textFileName = metadata->getMetaData("FileName"); string outputFile = textFileName + m_outputSuffix; ofstream out(outputFile.c_str(), std::ofstream::binary); if (!out.good()) { throw runtime_error("can't open file " + outputFile); } textRep.binaryWriteOn(out); out.flush(); out.close(); return SUCCESS_ID; }
//*********************************************************************** // main function for outputing the graph //*********************************************************************** void EasyXmlDumper::dumpLimaData(std::ostream& os, const LinguisticGraphVertex& begin, const LinguisticGraphVertex& end, const AnalysisGraph& anaGraph, const AnalysisGraph& posGraph, const AnnotationData& annotationData, const SyntacticData& syntacticData, const std::string& graphId, std::vector< bool >& alreadyDumpedTokens, std::map< LinguisticAnalysisStructure::Token*, uint64_t >& fullTokens, std::string sentIdPrefix) const { DUMPERLOGINIT; LDEBUG << "EasyXmlDumper:: dumpLimaData parameters: "; LDEBUG << "EasyXmlDumper:: begin = " << begin; LDEBUG << "EasyXmlDumper:: end = " << end; LDEBUG << "EasyXmlDumper:: posgraph first vertex = " << posGraph.firstVertex(); LDEBUG << "EasyXmlDumper:: posgraph last vertex = " << posGraph.lastVertex(); LDEBUG << "EasyXmlDumper:: graphId = " << graphId; LDEBUG << "EasyXmlDumper:: sentIdPrefix = " << sentIdPrefix; // just in case we want to check alreadt dumped tokens' array for (uint64_t i = 0; i<alreadyDumpedTokens.size(); i++) { if (alreadyDumpedTokens[i]) { LDEBUG << "EasyXmlDumper:: already_dumped_tokens[" << i << "] =" << alreadyDumpedTokens[i]; } } std::string sentIdStr = sentIdPrefix; if(find(m_sentIds.begin(), m_sentIds.end(), sentIdStr) != m_sentIds.end() || sentIdStr == "E" ) { uint64_t sentIdsuffix = 0; do{ sentIdsuffix++; std::stringstream sentIdStream; sentIdStream << sentIdPrefix << sentIdsuffix; sentIdStr = sentIdStream.str(); }while(find(m_sentIds.begin(), m_sentIds.end(), sentIdStr) != m_sentIds.end()); } LDEBUG << "EasyXmlDumper:: searching and extracting vertices and relations"; LinguisticGraph* anaGraphL = const_cast<LinguisticGraph*>(anaGraph.getGraph()); LinguisticGraph* posGraphL = const_cast<LinguisticGraph*>(posGraph.getGraph()); ConstituantAndRelationExtractor care(m_propertyCodeManager); care.visitBoostGraph(begin, end, *anaGraphL, *posGraphL, annotationData, syntacticData, fullTokens, alreadyDumpedTokens, m_language); LDEBUG << "EasyXmlDumper:: all found vertices and relations extracted"; care.replaceSEWithCompounds(); care.constructionDesRelationsEntrantes(); care.splitCompoundTenses(); care.constructionDesGroupes(); care.addLastFormsInGroups(); EasyDumper ed(care, m_typeMapping, m_srcTag, m_tgtTag, sentIdStr); std::stringstream sentEasyStream; ed.dump(sentEasyStream); if(sentEasyStream.str().length() > 0) { // Makes object mutable for adding sentence ID EasyXmlDumper* self = const_cast<EasyXmlDumper*>(this); self->m_sentIds.push_back(sentIdStr); os << "<E id=\"" << sentIdStr << "\">" << std::endl; os << sentEasyStream.str(); os << "</E>" << std::endl; } }
LimaStatusCode EasyXmlDumper::process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); DUMPERLOGINIT; LinguisticMetaData* metadata = static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "EasyXmlDumper::process no LinguisticMetaData ! abort"; return MISSING_DATA; } string filename = metadata->getMetaData("FileName"); LDEBUG << "EasyXmlDumper::process Filename: " << filename; LDEBUG << "handler will be: " << m_handler; // MediaId langid = static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia(); AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer")); AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler)); if (handler==0) { LERROR << "EasyXmlDumper::process: handler " << m_handler << " has not been given to the core client"; return MISSING_DATA; } AnalysisGraph* graph = static_cast<AnalysisGraph*>(analysis.getData(m_graph)); if (graph == 0) { graph = new AnalysisGraph(m_graph,m_language,true,true); analysis.setData(m_graph,graph); } SyntacticData* syntacticData = static_cast<SyntacticData*>(analysis.getData("SyntacticData")); if (syntacticData == 0) { syntacticData = new SyntacticAnalysis::SyntacticData(static_cast<AnalysisGraph*>(analysis.getData(m_graph)),0); syntacticData->setupDependencyGraph(); analysis.setData("SyntacticData",syntacticData); } AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData == 0) { annotationData = new AnnotationData(); if (static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")) != 0) { static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"))->populateAnnotationGraph(annotationData, "AnalysisGraph"); } analysis.setData("AnnotationData",annotationData); } handler->startAnalysis(); HandlerStreamBuf hsb(handler); std::ostream outputStream(&hsb); LDEBUG << "EasyXmlDumper:: process before printing heading"; AnalysisGraph* anaGraph = static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")); AnalysisGraph* posGraph = static_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph != 0 && posGraph != 0) { LDEBUG << "EasyXmlDumper:: begin of posgraph"; std::vector< bool > alreadyDumpedTokens; std::map< LinguisticAnalysisStructure::Token*, uint64_t > fullTokens; LinguisticGraphVertexIt i, i_end; uint64_t id = 0; alreadyDumpedTokens.resize(num_vertices(*posGraph->getGraph())); for (boost::tie(i, i_end) = vertices(*posGraph->getGraph()); i != i_end; ++i) { LDEBUG << "EasyXmlDumper:: examine posgraph for " << id; alreadyDumpedTokens[id] = false; fullTokens[get(vertex_token, *posGraph->getGraph(), *i)] = id; id++; } /* No need for sentence boundaries in Easy input LinguisticGraphVertex sentenceBegin = sb->getStartVertex(); SegmentationData::iterator sbItr = sb->begin(); LinguisticGraphVertex sentenceBegin = sb->getStartVertex(); SegmentationData::iterator sbItr = sb->begin(); */ LinguisticGraphVertex sentenceBegin = posGraph->firstVertex(); LinguisticGraphVertex sentenceEnd = posGraph->lastVertex(); string sentIdPrefix; try { sentIdPrefix = metadata->getMetaData("docid"); LDEBUG << "EasyXmlDumper:: retrieve sentence id " << sentIdPrefix; }catch (LinguisticProcessingException& ) { sentIdPrefix = ""; } if(sentIdPrefix.length() <= 0) sentIdPrefix = "E"; /* No need for sentence boundaries in Easy input while (sbItr != sb->end()) { LinguisticGraphVertex sentenceEnd = *sbItr; */ LDEBUG << "EasyXmlDumper:: inside posgraph while "; dumpLimaData(outputStream, sentenceBegin, sentenceEnd, *anaGraph, *posGraph, *annotationData, *syntacticData, "PosGraph", alreadyDumpedTokens, fullTokens, sentIdPrefix); /* No need for sentence boundaries in Easy input sentenceBegin = sentenceEnd; sbItr++; } */ LDEBUG << "EasyXmlDumper:: end of posgraph"; } return SUCCESS_ID; }