Пример #1
0
LimaStatusCode SpecificEntitiesLoader::
process(AnalysisContent& analysis) const
{
  // get analysis graph
  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph==0)
  {
    LOGINIT("LP::SpecificEntities");
    LERROR << "no graph '" << m_graph << "' available !";
    return MISSING_DATA;
  }

  //create a RecognizerData (such as in ApplyRecognizer) to be able to use
  //CreateSpecificEntity actions
  RecognizerData* recoData=new RecognizerData;
  analysis.setData("RecognizerData",recoData);
  RecognizerResultData* resultData=new RecognizerResultData(m_graph);
  recoData->setResultData(resultData);
  
  try
  {
    SpecificEntitiesLoader::XMLHandler handler(m_language,analysis,graph);
    m_parser->setContentHandler(&handler);
    m_parser->setErrorHandler(&handler);
    QFile file(getInputFile(analysis).c_str());
    if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
      throw XMLException();
    if (!m_parser->parse( QXmlInputSource(&file)))
    {
      throw XMLException();
    }
  }
  catch (const XMLException& )
  {
    LOGINIT("LP::SpecificEntities");
    LERROR << "Error: failed to parse XML input file";
  }

  // remove recognizer data (used only internally to this process unit)
  recoData->deleteResultData();
  resultData=0;
  analysis.removeData("RecognizerData");

  return SUCCESS_ID;
}
Пример #2
0
bool CreateSemanticRelation::
operator()(const LinguisticAnalysisStructure::AnalysisGraph& anagraph,
           const LinguisticGraphVertex& vertex1,
           const LinguisticGraphVertex& vertex2,
           AnalysisContent& analysis ) const
{
  LIMA_UNUSED(anagraph);
  SemanticRelationData * semanticData=static_cast<SemanticRelationData*>(analysis.getData("SemanticRelationData"));
  if (semanticData==0)
  {
    semanticData=new SemanticRelationData();
    analysis.setData("SemanticRelationData",semanticData);
    
  }
  
  return semanticData->relation(vertex1,vertex2,m_semanticRelationType);

}
Пример #3
0
LimaStatusCode SimpleEventBuilder::process(AnalysisContent& analysis) const
{
  EVENTANALYZERLOGINIT;
  TimeUtils::updateCurrentTime();
  LDEBUG << "start SimpleEventBuilder" << LENDL;

  // get annotation data (for entities)
  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData==0)
  {
    LERROR << "no annotation graph available !" << LENDL;
    return MISSING_DATA;
  }
    
  // get segmentation data
  AnalysisData* data=analysis.getData(m_segmData);
  if (data==0) {
    LERROR << "Missing data '" << m_segmData << "'" << LENDL;
    return MISSING_DATA;
  }
  SegmentationData* segmData=static_cast<SegmentationData*>(data);
  if (segmData==0)
  {
    LERROR << "Failed to interpret data '" << m_segmData << "' as SegmentationData" << LENDL;
    return MISSING_DATA;
  }

  // get graph on which the segmentation data relies
  string graphId=segmData->getGraphId();
  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(graphId));
  if (graph==0) {
    LERROR << "Cannot get graph '" << graphId << "' (from segmentation data)" << LENDL;
    return MISSING_DATA;
  }
  
  EventData* eventData=new EventData;
  LDEBUG << "set new data EventData of type EventData" << LENDL;
  analysis.setData("EventData", eventData);

  // get entities
  map<Common::MediaticData::EntityType,vector<Entity> >& entities=eventData->getEntities();
  // ??OME2 for (SegmentationData::const_iterator it=segmData->begin(),it_end=segmData->end();it!=it_end;it++) {
  for (std::vector<Segment>::const_iterator it=(segmData->getSegments()).begin(),it_end=(segmData->getSegments()).end();it!=it_end;it++) {
    if ((*it).getType()==m_segmentType) {
      LDEBUG << "in segment " << m_segmentType << " [" << (*it).getPosBegin() << "," << (*it).getLength() << "]" << LENDL;
      // get entities in this segment
      getEntitiesFromSegment(entities,graph,(*it).getFirstVertex(),(*it).getLastVertex(),annotationData);
      LDEBUG << "found " << entities.size() << " entities" << LENDL;
    }
    else {
      LDEBUG << "ignored segment " << (*it).getType() << LENDL;
    }
  }

  // choose main entities : take first
  for (map<Common::MediaticData::EntityType,vector<Entity> >::iterator it=entities.begin(),
    it_end=entities.end();it!=it_end;it++) {
    if ((*it).second.size()!=0) {
      LDEBUG << "set main for entity of type " << (*it).first << " at pos " << (*it).second[0].getPosition() << LENDL;
      (*it).second[0].setMain(true);
    }
  }


  TimeUtils::logElapsedTime("SimpleEventBuilder");
  return SUCCESS_ID;
}
Пример #4
0
LimaStatusCode ParagraphBoundariesFinder::process(
  AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  SENTBOUNDLOGINIT;
  LINFO << "start finding paragraph founds";
  
  // find paragraphs in text (positions of double carriage returns),
  // then find corresponding vertices in graph

  AnalysisGraph* graph=static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph==0) {
    LERROR << "no graph '" << m_graph << "' available !";
    return MISSING_DATA;
  }
  SegmentationData* boundaries=new SegmentationData(m_graph);
  analysis.setData("ParagraphBoundaries",boundaries);

  LimaStringText* text=static_cast<LimaStringText*>(analysis.getData("Text"));
  
  std::vector<uint64_t> paragraphPositions;
  int currentPos=0;
  int i=text->indexOf(m_paragraphSeparator,currentPos);
  while (i!=-1) {
    paragraphPositions.push_back((uint64_t)i);
    // goto next char that is not a carriage return
    currentPos=text->indexOf(QRegExp(QString(QLatin1String("[^%1]")).arg(m_paragraphSeparator)),i+1);
    i=text->indexOf(m_paragraphSeparator,currentPos);
  }

  if (paragraphPositions.empty()) {
    LWARN << "no paragraph found";
    return SUCCESS_ID;
  }

  // find vertices related to positions in graph
  uint64_t parNum=0;
  std::deque<LinguisticGraphVertex> toVisit;
  std::set<LinguisticGraphVertex> visited;

  LinguisticGraphVertex beginParagraph=graph->firstVertex();

  toVisit.push_back(graph->firstVertex());
  visited.insert(graph->firstVertex());

  while (!toVisit.empty())
  {
    LinguisticGraphVertex currentVertex=toVisit.front();
    toVisit.pop_front();

    if (currentVertex == graph->lastVertex()) { // end of the graph
      continue;  // may be other nodes to test in queue
    }

    if (currentVertex != graph->firstVertex()) {
      Token* t = get(vertex_token,*(graph->getGraph()),currentVertex);
      uint64_t position=t->position();
      if (position >= (paragraphPositions[parNum]+1)) {
        boundaries->add(Segment("paragraph",beginParagraph,currentVertex,graph));
        beginParagraph=currentVertex;
        parNum++;
        if (parNum >= paragraphPositions.size()) {
          break;
        }
      }
    }

    // store following nodes to test
    LinguisticGraphOutEdgeIt outEdge,outEdge_end;
    boost::tie(outEdge,outEdge_end)=out_edges(currentVertex,*(graph->getGraph()));

    for (; outEdge!=outEdge_end; outEdge++) {
      LinguisticGraphVertex next=target(*outEdge,*(graph->getGraph()));
      if (visited.find(next)==visited.end()) {
        toVisit.push_back(next);
        visited.insert(next);
      }
    }
  }
  
  TimeUtils::logElapsedTime("ParagraphBoundariesFinder");
  return SUCCESS_ID;
}
Пример #5
0
LimaStatusCode EasyXmlDumper::process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  DUMPERLOGINIT;

  LinguisticMetaData* metadata = static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0) {
    LERROR << "EasyXmlDumper::process no LinguisticMetaData ! abort";
      return MISSING_DATA;
  }
  string filename = metadata->getMetaData("FileName");
  LDEBUG << "EasyXmlDumper::process Filename: " << filename;

  LDEBUG << "handler will be: " << m_handler;
//   MediaId langid = static_cast<const  Common::MediaticData::LanguageData&>(Common::MediaticData::MediaticData::single().mediaData(metadata->getMetaData("Lang"))).getMedia();
  AnalysisHandlerContainer* h = static_cast<AnalysisHandlerContainer*>(analysis.getData("AnalysisHandlerContainer"));
  AbstractTextualAnalysisHandler* handler = static_cast<AbstractTextualAnalysisHandler*>(h->getHandler(m_handler));
  if (handler==0)
  {
    LERROR << "EasyXmlDumper::process: handler " << m_handler << " has not been given to the core client";
    return MISSING_DATA;
  }
  
  AnalysisGraph* graph = static_cast<AnalysisGraph*>(analysis.getData(m_graph));
  if (graph == 0)
  {
    graph = new AnalysisGraph(m_graph,m_language,true,true);
    analysis.setData(m_graph,graph);
  }

  SyntacticData* syntacticData = static_cast<SyntacticData*>(analysis.getData("SyntacticData"));
  if (syntacticData == 0)
  {
    syntacticData = new SyntacticAnalysis::SyntacticData(static_cast<AnalysisGraph*>(analysis.getData(m_graph)),0);
    syntacticData->setupDependencyGraph();
    analysis.setData("SyntacticData",syntacticData);
  }

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData == 0)
  {
    annotationData = new AnnotationData();
    if (static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph")) != 0)
    {
      static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"))->populateAnnotationGraph(annotationData, "AnalysisGraph");
    }
    analysis.setData("AnnotationData",annotationData);
  }

  handler->startAnalysis();
  HandlerStreamBuf hsb(handler);
  std::ostream outputStream(&hsb);

  LDEBUG << "EasyXmlDumper:: process before printing heading";
  AnalysisGraph* anaGraph = static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"));
  AnalysisGraph* posGraph = static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
  if (anaGraph != 0 && posGraph != 0)
  {
    LDEBUG << "EasyXmlDumper:: begin of posgraph";
    std::vector< bool > alreadyDumpedTokens;
    std::map< LinguisticAnalysisStructure::Token*, uint64_t > fullTokens;
    LinguisticGraphVertexIt i, i_end;
    uint64_t id = 0;
    alreadyDumpedTokens.resize(num_vertices(*posGraph->getGraph()));
    for (boost::tie(i, i_end) = vertices(*posGraph->getGraph()); i != i_end; ++i)
    {
      LDEBUG << "EasyXmlDumper:: examine posgraph for " << id;
      alreadyDumpedTokens[id] = false;
      fullTokens[get(vertex_token, *posGraph->getGraph(), *i)] = id;
      id++;
    }
    /* No need for sentence boundaries in Easy input
    LinguisticGraphVertex sentenceBegin = sb->getStartVertex();
    SegmentationData::iterator sbItr = sb->begin();
    LinguisticGraphVertex sentenceBegin = sb->getStartVertex();
    SegmentationData::iterator sbItr = sb->begin();
    */
    LinguisticGraphVertex sentenceBegin = posGraph->firstVertex();
    LinguisticGraphVertex sentenceEnd = posGraph->lastVertex();
    string sentIdPrefix;
    try {
      sentIdPrefix = metadata->getMetaData("docid");
      LDEBUG << "EasyXmlDumper:: retrieve sentence id " << sentIdPrefix;
    }catch (LinguisticProcessingException& ) {
      sentIdPrefix = "";
    }
    if(sentIdPrefix.length() <= 0)
      sentIdPrefix = "E";
    /* No need for sentence boundaries in Easy input
    while (sbItr != sb->end())
    {
      LinguisticGraphVertex sentenceEnd = *sbItr;
    */
    LDEBUG << "EasyXmlDumper:: inside posgraph while ";
    dumpLimaData(outputStream,
                  sentenceBegin,
                  sentenceEnd,
                  *anaGraph,
                  *posGraph,
                  *annotationData,
                  *syntacticData,
                  "PosGraph",
                  alreadyDumpedTokens,
                  fullTokens,
                  sentIdPrefix);
    /* No need for sentence boundaries in Easy input
      sentenceBegin = sentenceEnd;
      sbItr++;
    }
    */
    LDEBUG << "EasyXmlDumper:: end of posgraph";
  }

  return SUCCESS_ID;
}
Пример #6
0
LimaStatusCode EntityTracker::process(AnalysisContent& analysis) const
{
  TimeUtils::updateCurrentTime();
  SELOGINIT;

  LinguisticMetaData* metadata=static_cast<LinguisticMetaData*>(analysis.getData("LinguisticMetaData"));
  if (metadata == 0)
  {
    LERROR << "no LinguisticMetaData ! abort" << LENDL;
    return MISSING_DATA;
  }

  AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("AnalysisGraph"));
  if (anagraph==0)
  {
    LERROR << "no graph 'AnaGraph' available !" << LENDL;
    return MISSING_DATA;
  }

  AnnotationData* annotationData = static_cast< AnnotationData* >(analysis.getData("AnnotationData"));
  if (annotationData==0)
  {
    LERROR << "no annotation graph available !" << LENDL;
    return MISSING_DATA;
  }

  // add new data to store co-references
  CoreferenceData* corefData = new CoreferenceData;
  analysis.setData("CoreferenceData",corefData);
  
  CoreferenceEngine ref;
  LinguisticGraph* graph=anagraph->getGraph();
  LinguisticGraphVertex lastVertex=anagraph->lastVertex();
  LinguisticGraphVertex firstVertex=anagraph->firstVertex();

  std::queue<LinguisticGraphVertex> toVisit;
  std::set<LinguisticGraphVertex> visited;

  LinguisticGraphOutEdgeIt outItr,outItrEnd;

  // output vertices between begin and end,
  // but do not include begin (beginning of text or previous end of sentence) and include end (end of sentence)
  toVisit.push(firstVertex);

  bool first=true;
  bool last=false;
  while (!toVisit.empty()) {
    LinguisticGraphVertex v=toVisit.front();
    toVisit.pop();
    if (last || v == lastVertex) {
      continue;
    }
    if (v == lastVertex) {
      last=true;
    }

    for (boost::tie(outItr,outItrEnd)=out_edges(v,*graph); outItr!=outItrEnd; outItr++)
    {
      LinguisticGraphVertex next=target(*outItr,*graph);
      if (visited.find(next)==visited.end())
      {
        visited.insert(next);
        toVisit.push(next);
      }
    }

    if (first) {
      first=false;
    }
    else {
   // first, check if vertex corresponds to a specific entity
    std::set< AnnotationGraphVertex > matches = annotationData->matches("AnalysisGraph",v,"annot");
    for (std::set< AnnotationGraphVertex >::const_iterator it = matches.begin();
         it != matches.end(); it++)
    {
      AnnotationGraphVertex vx=*it;
      Token* t=get(vertex_token,*graph,vx);
      /* sauvegarde de tous les vertex */
      if (t != 0)
      {
        //storeAllToken(t);
        //allToken.push_back(t);
        ref.storeAllToken(*t);
      }
      if (annotationData->hasAnnotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity")))
      {
        /*const SpecificEntityAnnotation* se =
          annotationData->annotation(vx, Common::Misc::utf8stdstring2limastring("SpecificEntity")).
          pointerValue<SpecificEntityAnnotation>();*/
        //storeSpecificEntity(se);
        //Token* t=get(vertex_token,*graph,vx);
        //storedAnnotations.push_back(*t);
        ref.storeAnnot(*t);
//             std::cout<< "le vertex de nom "<< t->stringForm()<<std::endl;
      }
      }
    }
  }

  /* recherche des coréferences entre les entitées nommées précédemment détectées */

  vector<Token> vectTok;
  vector<Token>::const_iterator it1=ref.getAnnotations().begin(), it1_end=ref.getAnnotations().end();
  for (;
       it1 != it1_end;
       it1++)
  {
//     checkCoreference (*it1,ref);
    vectTok = ref.searchCoreference(*it1);
    if (vectTok.size() > 0)
    {
      corefData->push_back(vectTok);
    }
    ref.searchCoreference(*it1);
  }

  /* get the text */
//   LimaStringText* text=static_cast<LimaStringText*>(analysis.getData("Text"));
  
  return SUCCESS_ID;
}
Пример #7
0
LimaStatusCode CRFSegmentLoader::process(AnalysisContent& analysis) const
{

    // get segmentation data on which the CRF annotation relied
    // fromData and CRF results must be aligned
    AnalysisData* data=analysis.getData(m_fromDataName);
    SegmentationData* fromData=0;
    if (data==0) {
        LOGINIT("LP::AnalysisLoader");
        LERROR << "no data '" << m_fromDataName << "'" << LENDL;
        return MISSING_DATA;
    }
    else {
        fromData = static_cast<SegmentationData*>(data);
        if (fromData==0) {
            LOGINIT("LP::AnalysisLoader");
            LERROR << "data "<< m_fromDataName <<" is not an object of class SegmentationData" << LENDL;
            return MISSING_DATA;
        }
    }

    // get segmentation data or create new
    data=analysis.getData(m_dataName);
    SegmentationData* segmData=0;
    if (data==0) {
        segmData=new SegmentationData(fromData->getGraphId());
        analysis.setData(m_dataName,segmData);
    }
    else {
        segmData = static_cast<SegmentationData*>(data);
        if (segmData==0) {
            LOGINIT("LP::AnalysisLoader");
            LERROR << "data "<< m_dataName <<" is not an object of class SegmentationData" << LENDL;
            return MISSING_DATA;
        }
    }

    ifstream file(getInputFile(analysis).c_str(), std::ifstream::binary);
    if (! file.good()) {
        LOGINIT("LP::AnalysisLoader");
        LERROR << "Error: failed to open input file '" << getInputFile(analysis) << "'" << LENDL;
        return MISSING_DATA;
    }

    // segmentation in file must be aligned with fromData
    //SegmentationData::const_iterator seg=fromData->begin();
    std::vector<Segment>::const_iterator seg=(fromData->getSegments()).begin();
    string line;
    string currentType;
    Segment *currentSegment=0;
    while (file.good()) {
        getline(file,line);
        if (line.empty()) {
            break;
        }
        // ??OME2 if (seg==fromData->end()) {
        if (seg==(fromData->getSegments()).end()) {
            // data are not aligned: should not occur
            LOGINIT("LP::AnalysisLoader");
            LERROR << "CRFLoader: CRF output is not aligned with data " << m_fromDataName << LENDL;
            break;
        }
        // CRF tag is the last element in line (space or tab separated)
        string::size_type k=line.find_last_of("\t");
        if (k!=string::npos) {
            string type(line,k+1);
            if (currentSegment==0) {
                // first segment
                currentSegment=new Segment(*seg);
                currentSegment->setType(type);
            }
            else if (type == currentSegment->getType()) {
                // contiguous segment with same type
                currentSegment->addSegment(*seg);
            }
            else {
                // add current segment, create new one
                segmData->add(*currentSegment);
                delete currentSegment;
                currentSegment=new Segment(*seg);
                currentSegment->setType(type);
            }
        }
        else {
            // ignore line
            continue;
        }
        seg++;
    }
    // add last segment
    if (currentSegment!=0) {
        segmData->add(*currentSegment);
    }
    return SUCCESS_ID;
}
void CoreLinguisticProcessingClient::analyze(
    const LimaString& texte,
    const std::map<std::string,std::string>& metaData,
    const std::string& pipelineId,
    const std::map<std::string, AbstractAnalysisHandler*>& handlers,
    const std::set<std::string>& inactiveUnits) const

{
  Lima::TimeUtilsController timer("CoreLinguisticProcessingClient::analyze");
  CORECLIENTLOGINIT;
  // create analysis content
  AnalysisContent analysis;
  LinguisticMetaData* metadataholder=new LinguisticMetaData(); // will be destroyed in AnalysisContent destructor
  analysis.setData("LinguisticMetaData",metadataholder);

  metadataholder->setMetaData(metaData);
  LimaStringText* lstexte=new LimaStringText(texte);
  analysis.setData("Text",lstexte);
  
  LINFO << "CoreLinguisticProcessingClient::analyze(";
  for( std::map<std::string,std::string>::const_iterator attrIt = metaData.begin() ;
	attrIt != metaData.end() ; attrIt++ ) {
	LINFO << "attr:" << attrIt->first << "value:" << attrIt->second << ", " ;
  }
  LINFO;
  
  std::map<std::string,std::string>* metaDataPtr = const_cast<std::map<std::string,std::string>*>(&metaData);
  LINFO << "CoreLinguisticProcessingClient::analyze(" << (*metaDataPtr)["docid"] << "...)";

  // add date/time/location metadata in LinguisticMetaData
  if (metaData.empty()) {
    LDEBUG << "CoreLinguisticProcessingClient::analyze: no metadata";
  }
  for (map<string,string>::const_iterator it=metaData.begin(),
         it_end=metaData.end(); it!=it_end; it++) {
    if ((*it).first=="date") {
      try {
        const std::string& str=(*it).second;
        uint64_t i=str.find("T"); //2006-12-11T12:44:00
        /*if (i!=std::string::npos) {
          QTime docTime=posix_time::time_from_string(str);
          metadataholder->setTime("document",docTime);
          LDEBUG << "use '"<< str << "' as document time";
          }*/
        string date(str,0,i);
        QDate docDate=QDate::fromString(date.c_str(),Qt::ISODate);
        metadataholder->setDate("document",docDate);
        
        LDEBUG << "use '"<< date << "' as document date";
        LDEBUG << "use boost'"<< docDate.day() <<"/"<< docDate.month() <<"/"<< docDate.year() << "' as document date";
      }
      catch (std::exception& e) {
        LERROR << "Error in date conversion (date '"<< (*it).second
               << "' will be ignored): " << e.what();
      }
    }
    else if ((*it).first=="location") {
      metadataholder->setLocation("document",(*it).second);
        LDEBUG << "use '"<< (*it).second<< "' as document location";
    }
    else if ((*it).first=="time") {
      try {
        QTime docTime= QTime::fromString((*it).second.c_str(),"hh:mm:ss.z" );
        metadataholder->setTime("document",docTime);
        LDEBUG << "use '"<< (*it).second<< "' as document time";
      }
      catch (std::exception& e) {
        LERROR << "Error in ptime conversion (time '"<< (*it).second
               << "' will be ignored): " << e.what();
      }
    }
    else if ((*it).first=="docid") {
      LDEBUG << "use '"<< (*it).second<< "' as document id";
      metadataholder->setMetaData("DocId",(*it).second);
    }
  }
 
  // try to retreive offset
  try
  {
    const std::string& offsetStr=metadataholder->getMetaData("StartOffset");
    metadataholder->setStartOffset(atoi(offsetStr.c_str()));
  }
  catch (LinguisticProcessingException& )
  {
    metadataholder->setStartOffset(0);
  }

  const std::string& fileName=metadataholder->getMetaData("FileName");
  // get language
  const std::string& lang=metadataholder->getMetaData("Lang");
  LINFO  << "analyze file is: '" << fileName << "'";
  LINFO  << "analyze pipeline is '" << pipelineId << "'";
  LINFO  << "analyze language is '" << lang << "'";
  LDEBUG << "texte : " << texte;
  //LDEBUG << "texte : " << Common::Misc::limastring2utf8stdstring(texte);

  MediaId langId=MediaticData::single().getMediaId(lang);

  // get pipeline
  const MediaProcessUnitPipeline* pipeline=MediaProcessors::single().getPipelineForId(langId,pipelineId);
  if (pipeline==0)
  {
    LERROR << "can't get pipeline '" << pipelineId << "'";
    throw LinguisticProcessingException( std::string("can't get pipeline '" + pipelineId + "'") );
  }
  InactiveUnitsData* inactiveUnitsData = new InactiveUnitsData();
  for (std::set<std::string>::const_iterator it = inactiveUnits.begin(); it != inactiveUnits.end(); it++)
  {
//     const_cast<MediaProcessUnitPipeline*>(pipeline)->setInactiveProcessUnit(*it);
    inactiveUnitsData->insert(*it);
  }
  analysis.setData("InactiveUnits", inactiveUnitsData);
  
  // add handler to analysis
  LDEBUG << "add handler to analysis" ;
  for (auto hit = handlers.begin(); hit != handlers.end(); hit++)
  {
    LDEBUG << "    " << (*hit).first << (*hit).second;
  }
  AnalysisHandlerContainer* h = new AnalysisHandlerContainer(const_cast<std::map<std::string, AbstractAnalysisHandler*>& >(handlers));
  LDEBUG << "set data" ;
  analysis.setData("AnalysisHandlerContainer", h);

  // process analysis
  LDEBUG << "Process pipeline..." ;
  LimaStatusCode status=pipeline->process(analysis);
  LDEBUG << "pipeline process returned status " << (int)status ;
  if (status!=SUCCESS_ID)
  {
    std::stringstream s_mess;
    s_mess << "analysis failed : receive status " << (int)status << " from pipeline. exit";
    LERROR << s_mess.str();
    throw LinguisticProcessingException( s_mess.str() );
  }
}
Пример #9
0
LimaStatusCode SyntacticAnalyzerSimplify::process(
  AnalysisContent& analysis) const
{
  Lima::TimeUtilsController timer("SyntacticAnalysis");
  SASLOGINIT;
  LINFO << "start syntactic analysis - subsentences simplification";

  AnalysisGraph* anagraph=static_cast<AnalysisGraph*>(analysis.getData("PosGraph"));
  if (anagraph==0)
  {
    LERROR << "no AnalysisGraph ! abort";
    return MISSING_DATA;
  }
  SegmentationData* sb=static_cast<SegmentationData*>(analysis.getData("SentenceBoundaries"));
  if (sb==0)
  {
    LERROR << "no sentence bounds ! abort";
    return MISSING_DATA;
  }

  if (analysis.getData("SyntacticData")==0)
  {
    SyntacticData* syntacticData=new SyntacticData(anagraph,0);
    syntacticData->setupDependencyGraph();
    analysis.setData("SyntacticData",syntacticData);
  }
  
  SimplificationData* simplificationData =
    static_cast<SimplificationData*>(analysis.getData("SimplificationData"));
  if (simplificationData==0)
  {
    simplificationData=new SimplificationData(anagraph);
    analysis.setData("SimplificationData",simplificationData);
  }
  
  // ??OME2 SegmentationData::const_iterator boundItr, boundItr_end;
  //boundItr = sb->begin(); boundItr_end = sb->end();
  std::vector<Segment>::const_iterator boundItr, boundItr_end;
  boundItr = (sb->getSegments()).begin(); boundItr_end = (sb->getSegments()).end();
  for (; boundItr != boundItr_end; boundItr++)
  {
    LinguisticGraphVertex beginSentence=boundItr->getFirstVertex();
    LinguisticGraphVertex endSentence=boundItr->getLastVertex();
    LDEBUG << "simplify sentence from vertex " << beginSentence 
           << " to vertex " << endSentence;

    do 
    {
      LDEBUG << "Applying automaton on sentence from " << beginSentence << " to " << endSentence;
      simplificationData->simplificationDone(false);
      simplificationData->sentence(beginSentence);
      std::vector<Automaton::RecognizerMatch> result;
      m_recognizer->apply(*anagraph,
                          beginSentence, 
                          endSentence,
                          analysis,
                          result,
                          true, // test all vertices=true
                          true, // stop rules search on a node at first success
                          true, // only one success per type
                          true  // stop exploration at first success 
                         ); 
    } while (simplificationData->simplificationDone());

  }

  LINFO << "end syntactic analysis - subsentences simplification";
  return SUCCESS_ID;
}