LTR_Text::LTR_Text(const LTR_Text& text): std::vector<LTR_Token*>() { this->reserve(text.size()); for (LTR_Text::const_iterator itTok = text.begin(); itTok != text.end(); ++ itTok) { this->push_back(new LTR_Token(**itTok)); } for (SENTENCE_BOUNDS_T::const_iterator itSentBd = text.m_sentenceBounds.begin(); itSentBd != text.m_sentenceBounds.end(); ++ itSentBd) { m_sentenceBounds.push_back(*itSentBd); } }
LimaStatusCode LinearTextRepresentationDumper::process( AnalysisContent& analysis) const { DUMPERLOGINIT; // get metadata LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "LinearTextRepresentationDumper::process: no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } // get the analysis graph AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph == 0) { LERROR << "LinearTextRepresentationDumper::process: no AnalysisGraph ! abort" << LENDL; return MISSING_DATA; } // get sentence boundaries SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries")); if (sb == 0) { LERROR << "LinearTextRepresentationDumper::process: no SentenceBounds ! abort" << LENDL; return MISSING_DATA; } // build LTRText LTR_Text textRep; LTRTextBuilder builder(m_language, m_stopList); builder.buildLTRTextFrom( *(anaGraph->getGraph()), sb, anaGraph->lastVertex(), &textRep, metadata->getStartOffset()); // write LTR_Text LDEBUG << "handler will be: " << m_handler << LENDL; // MediaId langid = static_cast<const Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia(); AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer")); AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler)); if (handler == 0) { LERROR << "LinearTextRepresentationDumper::process: handler " << m_handler << " has not been given to the core client" << LENDL; return MISSING_DATA; } handler->startAnalysis(); HandlerStreamBuf hsb(handler); ostream out(&hsb); LDEBUG << textRep << LENDL; textRep.binaryWriteOn(out); out.flush(); handler->endAnalysis(); return SUCCESS_ID; }
void LTR_Text::appendLTR_Text(const LTR_Text& ltr) { LTR_Text::size_type thiSize = this->size(); // copy of tokens this->reserve(thiSize + ltr.size()); for (LTR_Text::const_iterator itTok = ltr.begin(); itTok != ltr.end(); ++ itTok) { LTR_Token* tok = new LTR_Token(**itTok); this->push_back(tok); } // add sentence bounds for (LTR_Text::SENTS_CONST_ITER_T itSentBd = ltr.beginSentenceBounds(); itSentBd != ltr.endSentenceBounds(); ++ itSentBd) { m_sentenceBounds.push_back(*itSentBd + thiSize); } }
LimaStatusCode LinearTextRepresentationLogger::process( AnalysisContent& analysis) const { DUMPERLOGINIT; // get metadata LinguisticMetaData* metadata=dynamic_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LERROR << "no LinguisticMetaData ! abort"; return MISSING_DATA; } // get the analysis graph AnalysisGraph* anaGraph = dynamic_cast<AnalysisGraph*>(analysis.getData("PosGraph")); if (anaGraph == 0) { LERROR << "no AnalysisGraph ! abort"; return MISSING_DATA; } // get sentence boundaries SegmentationData* sb = dynamic_cast<SegmentationData*>(analysis.getData("SentenceBoundaries")); if (sb == 0) { LDEBUG << "LinearTextRepresentationDumper::process: no SentenceBounds available: ignored"; // sentence bounds ignored: null pointer passed to LTRTextBuilder will be handled there } // build LTRText LTR_Text textRep; LTRTextBuilder builder(m_language, m_stopList); builder.buildLTRTextFrom( *(anaGraph->getGraph()), sb, anaGraph->firstVertex(), anaGraph->lastVertex(), &textRep, metadata->getStartOffset()); // write LTR_Text string textFileName = metadata->getMetaData("FileName"); string outputFile = textFileName + m_outputSuffix; ofstream out(outputFile.c_str(), std::ofstream::binary); if (!out.good()) { throw runtime_error("can't open file " + outputFile); } textRep.binaryWriteOn(out); out.flush(); out.close(); return SUCCESS_ID; }