示例#1
0
/*!
 *\~english
 *	Gets current document prefix
 *\~russian
 *	Выбирает префикс текущего документа.
 *\~
 *\return 	\~english prexix
 *		\~russian префикс \~
 */
QString
aDocJournal::getPrefix()
{
	QString pref = "";
	qulonglong dUid = docId();
	if ( dUid )
	{
		QSqlQuery q = db->db()->exec(QString("SELECT pnum FROM a_journ WHERE idd=%1").arg(dUid));
		if ( q.first() ) pref = q.value(0).toString();
	}
	return pref;
}
示例#2
0
/*!
 *\~english
 *	Gets current document.
 *\~russian
 *	Возвращает текущий документ. Полученный документ необходимо удалить после использования.
 *	\code
 *	aDocument *doc = sysJournal->CurrentDocument();
 *	//какие-то действия с документом
 *	delete doc;
 *	doc=0;
 *	\endcode
 *\~
 *\return 	\~english current document
 *		\~russian текущий документ \~
 */
aDocument*
aDocJournal::CurrentDocument()
{
	aCfgItem i = md->find( docType() );
	if(!i.isNull())
	{
		aDocument *d = new aDocument( i, db );
		if(!d->select( docId() ))
		{
			return d;
		}
		else
		{
			delete d;
			return 0;
		}
	}
	else
	{
		return 0;
	}
}
LimaStatusCode SpecificEntitiesXmlLogger::process(
  AnalysisContent& analysis) const
{
  SELOGINIT;
  LDEBUG << "SpecificEntitiesXmlLogger::process";
  TimeUtils::updateCurrentTime();

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData == 0) {
    SELOGINIT;
    LERROR << "no annotationData ! abort";
    return MISSING_DATA;
  }
  
  
  LinguisticAnalysisStructure::AnalysisGraph* graphp = static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph));
  if (graphp == 0) {
    SELOGINIT;
    LERROR << "no graph "<< m_graph <<" ! abort";
    return MISSING_DATA;
  }
  const LinguisticAnalysisStructure::AnalysisGraph& graph = *graphp;
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SELOGINIT;
      LERROR << "no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }

  DumperStream* dstream=initialize(analysis);
  ostream& out=dstream->out();

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  if (m_compactFormat) {
    out << "<entities docid=\"" << docId
    << "\" offsetNode=\"" << offsetIndexingNode 
    << "\" offset=\"" << offset
    << "\">" << endl;
  }
  else {
    out << "<specific_entities>" << endl;
  }
//   SELOGINIT;

  if (m_followGraph) {
    // instead of looking to all annotations, follow the graph (in
    // morphological graph, some vertices are not related to main graph:
    // idiomatic expressions parts and named entity parts)
    // -> this will not include nested entities

    AnalysisGraph* tokenList=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
    if (tokenList==0) {
      LERROR << "graph " << m_graph << " has not been produced: check pipeline";
      return MISSING_DATA;
    }
    LinguisticGraph* graph=tokenList->getGraph();
    //const FsaStringsPool& sp=Common::MediaticData::MediaticData::single().stringsPool(m_language);
    
    std::queue<LinguisticGraphVertex> toVisit;
    std::set<LinguisticGraphVertex> visited;
    toVisit.push(tokenList->firstVertex());
    
    LinguisticGraphOutEdgeIt outItr,outItrEnd;
    while (!toVisit.empty()) {
      LinguisticGraphVertex v=toVisit.front();
      toVisit.pop();
      if (v == tokenList->lastVertex()) {
        continue;
      }
      
      for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++) 
      {
        LinguisticGraphVertex next=target(*outItr,*graph);
        if (visited.find(next)==visited.end())
        {
          visited.insert(next);
          toVisit.push(next);
        }
      }
      const SpecificEntityAnnotation* annot=getSpecificEntityAnnotation(v,annotationData);
      if (annot != 0) {
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }
  else {
    // take all annotations
    AnnotationGraphVertexIt itv, itv_end;
    boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
    for (; itv != itv_end; itv++)
    {
      //     LDEBUG << "SpecificEntitiesXmlLogger on annotation vertex " << *itv;
      if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity")))
      {
        //       LDEBUG << "    it has SpecificEntityAnnotation";
        const SpecificEntityAnnotation* annot = 0;
        try
        {
          annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SpecificEntity"))
          .pointerValue<SpecificEntityAnnotation>();
        }
        catch (const boost::bad_any_cast& )
        {
          SELOGINIT;
          LERROR << "This annotation is not a SpecificEntity; SE not logged";
          continue;
        }
        
        // recuperer l'id du vertex morph cree
        LinguisticGraphVertex v;
        if (!annotationData->hasIntAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph)))
        {
          //         SELOGINIT;
          //         LDEBUG << *itv << " has no " << m_graph << " annotation. Skeeping it.";
          continue;
        }
        v = annotationData->intAnnotation(*itv,Common::Misc::utf8stdstring2limastring(m_graph));
        outputEntity(out,v,annot,tokenMap,offset);
      }
    }
  }   
  
  //   LDEBUG << "    all vertices done";
  if (m_compactFormat) {
    out << "</entities>" << endl;
  }
  else {
    out << "</specific_entities>" << endl;
  }
  delete dstream;
  TimeUtils::logElapsedTime("SpecificEntitiesXmlLogger");
  return SUCCESS_ID;
  
}
示例#4
0
LimaStatusCode SemanticRelationsXmlLogger::
process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  
  SEMLOGINIT;
  LERROR << "SemanticRelationsXmlLogger" << LENDL;
    
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  
  const LinguisticAnalysisStructure::AnalysisGraph& graph = 
    *(static_cast<LinguisticAnalysisStructure::AnalysisGraph*>(analysis.getData(m_graph)));
  
  LinguisticGraph* lingGraph = const_cast<LinguisticGraph*>(graph.getGraph());
  VertexTokenPropertyMap tokenMap = get(vertex_token, *lingGraph);
  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      SEMLOGINIT;
      LERROR << "no LinguisticMetaData ! abort" << LENDL;
      return MISSING_DATA;
  }

  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    SEMLOGINIT;
    LERROR << "Can't open log file " << LENDL;
    return UNKNOWN_ERROR;
  }

  uint64_t offset(0);
  try {
    offset=atoi(metadata->getMetaData("StartOffset").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  uint64_t offsetIndexingNode(0);
  try {
    offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  std::string docId("");
  try {
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& e) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  out << "<relations docid=\"" << docId
      << "\" offsetNode=\"" << offsetIndexingNode 
      << "\">" << endl;

//   LDEBUG << "SemanticRelationsXmlLogger on graph " << m_graph << LENDL;
  
  //look at all vertices for annotations
  AnnotationGraphVertexIt itv, itv_end;
  boost::tie(itv, itv_end) = vertices(annotationData->getGraph());
  for (; itv != itv_end; itv++)
  {
    LDEBUG << "SemanticRelationsXmlLogger on annotation vertex " << *itv << LENDL;
    if (annotationData->hasAnnotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation")))
    {
//       LDEBUG << "    it has SemanticRelationAnnotation" << LENDL;
      const SemanticAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*itv,Common::Misc::utf8stdstring2limastring("SemanticAnnotation"))
          .pointerValue<SemanticAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticRelation" << LENDL;
        continue;
      }

      // output
      out << "<annotation type=\"" << annot->getType() << "\">" << endl
          << vertexStringForSemanticAnnotation("vertex",*itv,tokenMap,annotationData,offset)
          << "</annotation>" << endl;
    }
  }

  // look at all edges for relations
  AnnotationGraphEdgeIt it,it_end;
  const AnnotationGraph& annotGraph=annotationData->getGraph();
  boost::tie(it, it_end) = edges(annotGraph);
  for (; it != it_end; it++) {
    LDEBUG << "SemanticRelationsXmlLogger on annotation edge " 
           << source(*it,annotGraph) << "->" << target(*it,annotationData->getGraph()) << LENDL;
    if (annotationData->hasAnnotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation")))
    {
      SEMLOGINIT;
      LDEBUG << "found semantic relation" << LENDL;
      const SemanticRelationAnnotation* annot = 0;
      try
      {
        annot = annotationData->annotation(*it,Common::Misc::utf8stdstring2limastring("SemanticRelation"))
          .pointerValue<SemanticRelationAnnotation>();
      }
      catch (const boost::bad_any_cast& e)
      {
        SEMLOGINIT;
        LERROR << "This annotation is not a SemanticAnnotation" << LENDL;
        continue;
      }

      //output
      out << "<relation type=\"" << annot->type() << "\">" << endl
          << vertexStringForSemanticAnnotation("source",source(*it,annotGraph),tokenMap,annotationData,offset)
          << vertexStringForSemanticAnnotation("target",target(*it,annotGraph),tokenMap,annotationData,offset)
          << "</relation>" << endl;
      
    }
  }

//   LDEBUG << "    all vertices done" << LENDL;
  out << "</relations>" << endl;
  out.close();

  TimeUtils::logElapsedTime("SemanticRelationsXmlLogger");
  return SUCCESS_ID;
}
LimaStatusCode SegmentationDataXmlLogger::process(
  AnalysisContent& analysis) const
{
  LOGINIT("LP::Segmentation");
  LDEBUG << "SegmentationDataXmlLogger::process" << LENDL;
  TimeUtils::updateCurrentTime();

  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
      LOGINIT("LP::Segmentation");
      LERROR << "no LinguisticMetaData ! abort" << LENDL;
      return MISSING_DATA;
  }

  // open output file
  ofstream out;
  if (!openLogFile(out,metadata->getMetaData("FileName"))) {
    LOGINIT("LP::Segmentation");
    LERROR << "Can't open log file '" << metadata->getMetaData("FileName") << "'" << LENDL;
    return UNKNOWN_ERROR;
  }

  // get metadata (useful for XML documents)
//   uint64_t offset(0);
//   uint64_t offsetIndexingNode(0);
  std::string docId("");
  try {
//     offset=atoi(metadata->getMetaData("StartOffset").c_str());
//     offsetIndexingNode=atoi(metadata->getMetaData("StartOffsetIndexingNode").c_str());
    docId=metadata->getMetaData("DocId");
  }
  catch (LinguisticProcessingException& ) {
    // do nothing: not set in analyzeText (only in analyzeXmlDocuments)
  }

  // log 
  out << "<segmentation>" << endl;
  const AnalysisData* data =analysis.getData(m_data);
  if (data!=0) {
    const SegmentationData* segData=static_cast<const SegmentationData*>(data);
    const vector<Segment> seg=segData->getSegments();
    for (vector<Segment>::const_iterator it=seg.begin(), it_end=seg.end(); it!=it_end; it++) {
      out 
        << "<segment>" 
        << "<pos>" << (*it).getPosBegin() << "</pos>" 
        << "<len>" << (*it).getLength() << "</len>"
        << "<type>" << (*it).getType() << "</type>"
        << "</segment>" 
        << endl;
    }
  }
  else {
    LOGINIT("LP::Segmentation");
    LDEBUG << "no SegmentationData of name " << m_data << LENDL;
  }
  
  out << "</segmentation>" << endl;
  out.close();

  TimeUtils::logElapsedTime("SegmentationDataXmlLogger");
  return SUCCESS_ID;
}
示例#6
0
/*!
 *\~english
 *	Gets current document type.
 *\~russian
 *	Получение типа текущего документа.
 *\~
 *\return \~english database document type. \~russian тип документа в базе данных.\~
 */
int
aDocJournal::docType()
{
	return db->uidType( docId() );
}