/*! *\~english * Gets current document prefix *\~russian * Выбирает префикс текущего документа. *\~ *\return \~english prexix * \~russian префикс \~ */ QString aDocJournal::getPrefix() { QString pref = ""; qulonglong dUid = docId(); if ( dUid ) { QSqlQuery q = db->db()->exec(QString("SELECT pnum FROM a_journ WHERE idd=%1").arg(dUid)); if ( q.first() ) pref = q.value(0).toString(); } return pref; }
/*! *\~english * Gets current document. *\~russian * Возвращает текущий документ. Полученный документ необходимо удалить после использования. * \code * aDocument *doc = sysJournal->CurrentDocument(); * //какие-то действия с документом * delete doc; * doc=0; * \endcode *\~ *\return \~english current document * \~russian текущий документ \~ */ aDocument* aDocJournal::CurrentDocument() { aCfgItem i = md->find( docType() ); if(!i.isNull()) { aDocument *d = new aDocument( i, db ); if(!d->select( docId() )) { return d; } else { delete d; return 0; } } else { return 0; } }
LimaStatusCode SpecificEntitiesXmlLogger::process( AnalysisContent& analysis) const { SELOGINIT; LDEBUG << "SpecificEntitiesXmlLogger::process"; TimeUtils::updateCurrentTime(); AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); if (annotationData == 0) { SELOGINIT; LERROR << "no annotationData ! abort"; return MISSING_DATA; } LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)); if (graphp == 0) { SELOGINIT; LERROR << "no graph "<< m_graph <<" ! abort"; return MISSING_DATA; } const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp; LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SELOGINIT; LERROR << "no LinguisticMetaData ! abort"; return MISSING_DATA; } DumperStream* dstream=initialize(analysis); ostream& out=dstream->out(); uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } if (m_compactFormat) { out << "<entities docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\" offset=\"" << offset << "\">" << endl; } else { out << "<specific_entities>" << endl; } // SELOGINIT; if (m_followGraph) { // instead of looking to all annotations, follow the graph (in // morphological graph, some vertices are not related to main graph: // idiomatic expressions parts and named entity parts) // -> this will not include nested entities AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph)); if (tokenList==0) { LERROR << "graph " << m_graph << " has not been produced: check pipeline"; return MISSING_DATA; } LinguisticGraph* graph=tokenList->getGraph(); //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language); std::queue<LinguisticGraphVertex> toVisit; std::set<LinguisticGraphVertex> visited; toVisit.push(tokenList->firstVertex()); LinguisticGraphOutEdgeIt outItr,outItrEnd; while (!toVisit.empty()) { LinguisticGraphVertex v=toVisit.front(); toVisit.pop(); if (v == tokenList->lastVertex()) { continue; } for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) { LinguisticGraphVertex next=target(*outItr,*graph); if (visited.find(next)==visited.end()) { visited.insert(next); toVisit.push(next); } } const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData); if (annot != 0) { outputEntity(out,v,annot,tokenMap,offset); } } } else { // take all annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { // LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))) { // LDEBUG << " it has SpecificEntityAnnotation"; const SpecificEntityAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")) .pointerValue<SpecificEntityAnnotation>(); } catch (const boost::bad_any_cast& ) { SELOGINIT; LERROR << "This annotation is not a SpecificEntity; SE not logged"; continue; } // recuperer l'id du vertex morph cree LinguisticGraphVertex v; if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph))) { // SELOGINIT; // LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it."; continue; } v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)); outputEntity(out,v,annot,tokenMap,offset); } } } // LDEBUG << " all vertices done"; if (m_compactFormat) { out << "</entities>" << endl; } else { out << "</specific_entities>" << endl; } delete dstream; TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger"); return SUCCESS_ID; }
LimaStatusCode SemanticRelationsXmlLogger:: process(AnalysisContent& analysis) const { TimeUtils::updateCurrentTime(); SEMLOGINIT; LERROR << "SemanticRelationsXmlLogger" << LENDL; AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData")); const LinguisticAnalysisStructure::AnalysisGraph& graph = *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph))); LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph()); VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { SEMLOGINIT; LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } ofstream out; if (!openLogFile(out,metadata->getMetaData("FileName"))) { SEMLOGINIT; LERROR << "Can't open log file " << LENDL; return UNKNOWN_ERROR; } uint64_t offset(0); try { offset=atoi(metadata->getMetaData("StartOffset").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } uint64_t offsetIndexingNode(0); try { offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } std::string docId(""); try { docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& e) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } out << "<relations docid=\"" << docId << "\" offsetNode=\"" << offsetIndexingNode << "\">" << endl; // LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL; //look at all vertices for annotations AnnotationGraphVertexIt itv, itv_end; boost::tie(itv, itv_end) = vertices(annotationData->getGraph()); for (; itv != itv_end; itv++) { LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL; if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))) { // LDEBUG << " it has SemanticRelationAnnotation" << LENDL; const SemanticAnnotation* annot = 0; try { annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")) .pointerValue<SemanticAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticRelation" << LENDL; continue; } // output out << "<annotation type=\"" << annot->getType() << "\">" << endl << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset) << "</annotation>" << endl; } } // look at all edges for relations AnnotationGraphEdgeIt it,it_end; const AnnotationGraph& annotGraph=annotationData->getGraph(); boost::tie(it, it_end) = edges(annotGraph); for (; it != it_end; it++) { LDEBUG << "SemanticRelationsXmlLogger on annotation edge " << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL; if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))) { SEMLOGINIT; LDEBUG << "found semantic relation" << LENDL; const SemanticRelationAnnotation* annot = 0; try { annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")) .pointerValue<SemanticRelationAnnotation>(); } catch (const boost::bad_any_cast& e) { SEMLOGINIT; LERROR << "This annotation is not a SemanticAnnotation" << LENDL; continue; } //output out << "<relation type=\"" << annot->type() << "\">" << endl << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset) << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset) << "</relation>" << endl; } } // LDEBUG << " all vertices done" << LENDL; out << "</relations>" << endl; out.close(); TimeUtils::logElapsedTime("SemanticRelationsXmlLogger"); return SUCCESS_ID; }
LimaStatusCode SegmentationDataXmlLogger::process( AnalysisContent& analysis) const { LOGINIT("LP::Segmentation"); LDEBUG << "SegmentationDataXmlLogger::process" << LENDL; TimeUtils::updateCurrentTime(); LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData")); if (metadata == 0) { LOGINIT("LP::Segmentation"); LERROR << "no LinguisticMetaData ! abort" << LENDL; return MISSING_DATA; } // open output file ofstream out; if (!openLogFile(out,metadata->getMetaData("FileName"))) { LOGINIT("LP::Segmentation"); LERROR << "Can't open log file '" << metadata->getMetaData("FileName") << "'" << LENDL; return UNKNOWN_ERROR; } // get metadata (useful for XML documents) // uint64_t offset(0); // uint64_t offsetIndexingNode(0); std::string docId(""); try { // offset=atoi(metadata->getMetaData("StartOffset").c_str()); // offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str()); docId=metadata->getMetaData("DocId"); } catch (LinguisticProcessingException& ) { // do nothing: not set in analyzeText (only in analyzeXmlDocuments) } // log out << "<segmentation>" << endl; const AnalysisData* data =analysis.getData(m_data); if (data!=0) { const SegmentationData* segData=static_cast<const SegmentationData*>(data); const vector<Segment> seg=segData->getSegments(); for (vector<Segment>::const_iterator it=seg.begin(), it_end=seg.end(); it!=it_end; it++) { out << "<segment>" << "<pos>" << (*it).getPosBegin() << "</pos>" << "<len>" << (*it).getLength() << "</len>" << "<type>" << (*it).getType() << "</type>" << "</segment>" << endl; } } else { LOGINIT("LP::Segmentation"); LDEBUG << "no SegmentationData of name " << m_data << LENDL; } out << "</segmentation>" << endl; out.close(); TimeUtils::logElapsedTime("SegmentationDataXmlLogger"); return SUCCESS_ID; }
/*! *\~english * Gets current document type. *\~russian * Получение типа текущего документа. *\~ *\return \~english database document type. \~russian тип документа в базе данных.\~ */ int aDocJournal::docType() { return db->uidType( docId() ); }