/*!
		 * Explicit constructor.
		 */
		AnnotatorPerformanceAnnotation::AnnotatorPerformanceAnnotation(CAS& aCas, const UnicodeString& annotatorName, long elapsedMillis)
			: AnnotationWrapper(aCas)
		{
			FSIndexRepository& indexRep = aCas.getIndexRepository();
			annotation = aCas.createAnnotation(tAnnotatorPerformanceAnnotation, 0, 0);
			setComponentName(annotatorName);
			setElapsedTime(elapsedMillis);
			indexRep.addFS(annotation);
		}
		SentenceAnnotation::SentenceAnnotation(CAS& aCas, size_t begin, size_t end, const TokenAnnotation& firstToken, const TokenAnnotation& lastToken) :
				ContextAreaAnnotation(aCas)
		{
			FSIndexRepository& indexRep = aCas.getIndexRepository();
			annotation = aCas.createAnnotation(tSentenceAnnotation, begin, end);
			setFirstToken(firstToken);
			setLastToken(lastToken);
			indexRep.addFS(annotation);
		}
Пример #3
0
void testCallingSequence1(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename)
/* ----------------------------------------------------------------------- */
{
    ErrorInfo errInfo;
    uima::TextAnalysisEngine* pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo);

    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);
    CAS* cas = pEngine->newCAS();
    failIfNotTrue(cas != NULL);

    uima::UnicodeStringRef us(UnicodeString("a"));
//   UnicodeStringRef uRef(us);
    rclConsole.formatHeader(_TEXT("testing Engine CallingSequence1"));

    cas->setDocumentText(us.getBuffer(), us.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);
    failIfNotTrue(cas->reset() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine->destroy() == UIMA_ERR_NONE);

    cas->setDocumentText(us.getBuffer(), us.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);

    TyErrorId deInitRC = pEngine->destroy();
    rclConsole.format("RC of deInit()", deInitRC);
    failIfNotTrue(deInitRC == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);
    rclConsole.formatBool(_TEXT("testing Engine CallingSequence1 OK"),
                          true);  //lint !e944: argument for operator '!' always evaluates to False
    delete cas;
    delete pEngine;
}  //lint !e715: cpszConfigFilename (line 99) not referenced
Пример #4
0
void testCasMultiplier(uima::util::ConsoleUI& rclConsole)
/* ----------------------------------------------------------------------- */
{
    rclConsole.info("testCasMultiplier start.");
    uima::TextAnalysisEngine* pEngine;

    ErrorInfo errInfo;

    UnicodeString filename("SimpleTextSegmenter.xml");
    UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
    pEngine = TextAnalysisEngine::createTextAnalysisEngine(UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);


    //test operational properties settings
    failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getOutputsNewCASes() == true);
    failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getModifiesCas() == false);
    failIfNotTrue(
            pEngine->getAnalysisEngineMetaData().getOperationalProperties()->isMultipleDeploymentAllowed() == true);


    CAS* cas = pEngine->newCAS();
    cas->setDocumentText(
            UnicodeString("This is the first sentence. This is the second sentence. This is the third sentence."));

    CASIterator iter = pEngine->processAndOutputNewCASes(*cas);
    int num = 0;
    while (iter.hasNext()) {
        num++;
        CAS& seg = iter.next();
        failIfNotTrue(seg.getDocumentText().length() > 0);
        pEngine->getAnnotatorContext().releaseCAS(seg);
    }
    failIfNotTrue(num == 3);
    delete pEngine;
    delete cas;
    rclConsole.info("testCasMultiplier finished.");
}
Пример #5
0
void testProcessTerm(uima::util::ConsoleUI& rclConsole,
                     uima::TextAnalysisEngine& rclEngine,
        ///const uima::CCSID & crclCCSID,
                     const char* crclCCSID,
                     const uima::Language& crclLanguage,
                     const TCHAR* cpszInpTerm)
/* ----------------------------------------------------------------------- */
{
    TyErrorId utErrorId;

    failIfNotTrue(EXISTS(cpszInpTerm));
    rclConsole.format(_TEXT("Input term"), cpszInpTerm);

    DocBuffer docBuffer;
    docBuffer.addDocPart(cpszInpTerm, strlen(cpszInpTerm), crclCCSID);

    //? assert(false);
    CAS* cas = rclEngine.newCAS();
    failIfNotTrue(cas != NULL);

    // For terms we always add a term annotation for the whole "document"
    /* since we already added a complete doc, we may not add anything else */
    cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength());
    cas->getDocumentAnnotation().setLanguage(crclLanguage);

    utErrorId = rclEngine.process(*cas);
    uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                          _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc);

    if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) {
        failIfNotTrue(false);
        //      iteratorTest(rclConsole, rclEngine);
    }

    utErrorId = cas->reset();
    uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                          _TEXT("uima::Engine::resetDocument"));
    delete cas;
}
Пример #6
0
TyErrorId
AnnotatorDump::process(CAS & tcas,
                       const ResultSpecification &
                      ) {
  TyErrorId tyErrId;

  // in append mode all data in a session/collection is dumped into one file
  // otherwise the same dump file is deleted and rewritten for each document
  // in the session/collection
  if (!iv_bAppendFile) {
    tyErrId = openOutputFile();
    if (tyErrId != UIMA_ERR_NONE) {
      return tyErrId;
    }
  }

  assert(iv_clOutputStream.good());

  if (iv_bDumpDocBuffer) {
    //    Dumping the Document Buffer
    UnicodeStringRef doc = tcas.getDocumentText();

    outputDocBuffer(doc);
  }

  uima::CASWriterABase * writer = NULL;
  switch (iv_enOutputStyle) {
  case Xml:
    writer = new uima::XMLDumpWriter(tcas, iv_bDumpDocBuffer);
    break;
  case XCas:
    writer = new uima::XCASWriter(tcas, iv_bDumpDocBuffer);
    break;
  default:
    assert(false);
  }
  assert( EXISTS(writer) );
  auto_ptr<CASWriterABase> apWriter( writer );
  apWriter->write(iv_clOutputStream);

  // in append mode all data in a session/collection is dumped into one file
  // otherwise the same dump file is deleted and rewritten for each document
  // in the session/collection
  if (!iv_bAppendFile) {
    closeOutputFile();
  }

  return(TyErrorId)UIMA_ERR_NONE;
}
Пример #7
0
void testCallingSequence3(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename)
/* ----------------------------------------------------------------------- */
{
    uima::TextAnalysisEngine* pEngine = NULL;
    uima::Language clLanguage(MAIN_DEFAULT_LANG);
    const char* clCCSID = MAIN_DEFAULT_CCSID_STR;
    TyErrorId utErrorId;

    UnicodeString us("a");
    UnicodeStringRef uref(us);

    rclConsole.formatHeader(_TEXT("testing Engine CallingSequence3"));

    ErrorInfo errInfo;
    pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo);
    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);
    CAS* cas = pEngine->newCAS();
    failIfNotTrue(cas != NULL);

    /* test for NULL ptrs */

    UnicodeStringRef uref2(NULL);
    cas->setDocumentText(uref2.getBuffer(), uref2.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);
    failIfNotTrue(cas->reset() == UIMA_ERR_NONE);


    /* test for subsequent processes */
    cas->setDocumentText(uref2.getBuffer(), uref2.length());
    cas->getDocumentAnnotation().setLanguage("en");

    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);

    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);


    utErrorId = pEngine->destroy();
    failIfNotTrue(utErrorId == UIMA_ERR_NONE);
    delete cas;
    delete pEngine;
    rclConsole.formatBool(_TEXT("testing Engine CallingSequence3 OK"),
                          true);  //lint !e944: argument for operator '!' always evaluates to False
}
Пример #8
0
int main(int argc, char* argv[]) {
    try {
        int loglevel = -1;

        /* check  the number of command line args */
        if (argc != 3 && argc != 5) {
            tell();
            return 1;
        }

        if (argc == 5) {
            if (!strcmp(argv[3], "-l")) {
                loglevel = atoi(argv[4]);
                if (loglevel < LogStream::EnMessage) {
                    cerr << "LogLevel less than minimum value (Message) = " << LogStream::EnMessage << endl;
                    return 1;
                }
                if (loglevel > LogStream::EnError) {
                    cerr << "LogLevel greater than maximum value (Error) = " << LogStream::EnError << endl;
                    return 1;
                }
            } else {
                cerr << "Inexpected option: " << argv[3] << endl;
                tell();
                return 1;
            }
        }

        /* Create/link up to a resource manager instance (singleton) */
        (void) ResourceManager::createInstance("UIMACPP_EXAMPLE_APPLICATION");

        if (loglevel >= 0) {
            ResourceManager::getInstance().setLoggingLevel((LogStream::EnEntryType) loglevel);
        }

        std::string a = "abc";
        UnicodeString b = "wxyz";
        UChar c = 'c';

        TyErrorId utErrorId;          // Variable to store return codes
        ErrorInfo errorInfo;          // Variable to stored detailed error info
        /* Initialize engine with filename of config-file */
        AnalysisEngine* pEngine = Framework::createAnalysisEngine(argv[1], errorInfo);
        CheckError(errorInfo);

        /* Get a new CAS */
        CAS* tcas = pEngine->newCAS();

        /* process input xcas */
        util::DirectoryWalk dirwalker(argv[2]);
        if (dirwalker.isValid()) {

            util::Filename infile(argv[2], "FilenamePlaceHolder");
            while (dirwalker.isValid()) {
                // Process all files or just the ones with matching suffix
                if (dirwalker.isFile() &&
                    dirwalker.matchesWildcardPattern("*.txt")) {
                    infile.setNewName(dirwalker.getNameWithoutPath());
                    std::string afile(infile.getAsCString());

                    //process the file
                    processFile(afile, pEngine, tcas);

                    //reset the cas
                    tcas->reset();
                }
                //get the next xcas file in the directory
                dirwalker.setToNext();
            }
        } else {
            /* If has no directory entries then probably a file */
            cout << "ExampleApplication: processing file " << argv[2] << endl;
            std::string afile(argv[2]);
            //process the cas
            processFile(afile, pEngine, tcas);
        }

        /* call collectionProcessComplete */
        utErrorId = pEngine->collectionProcessComplete();

        /* Free ressorces */
        utErrorId = pEngine->destroy();
        CheckError(utErrorId, *pEngine);

        delete tcas;
        delete pEngine;
    } catch (Exception e) {
        cout << "ExampleApplication " << e << endl;
    }
    /* If we got this far everything went OK */
    cout << "ExampleApplication: processing finished sucessfully! " << endl;

    return (0);
}
Пример #9
0
  // Look for "EnglishDocument" sofa and read it as a stream
  TyErrorId process(CAS & rCas, ResultSpecification const & crResultSpecification) {
    cout << "SofaDataAnnotator: process() begins" << endl;

    /** get the CAS view of the sofa */
    CAS * tcas = rCas.getView("EnglishDocument");
    /** get the handle to the index repository */
    FSIndexRepository & indexRep = tcas->getIndexRepository();

    /** get the default text sofa */
    SofaFS textSofa = tcas->getSofa();

    /** get the handle to the sofa data stream */
    SofaDataStream * pStream = textSofa.getSofaDataStream();
    /** open the stream */
    int rc = pStream->open();
    if (rc != 0) {
      cout << "open failed "  << rc << endl;
      return (TyErrorId)UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS;
    }
    /** get the total stream size */
    size_t streamSize = pStream->getTotalStreamSizeInBytes();

    /** read file contents into a buffer */
    char * pBuffer = new char[streamSize+1];
    memset(pBuffer,'\n' ,streamSize+1);
    int elementsize=1;
    pStream->read(pBuffer, elementsize, streamSize);

    cout << endl;
    cout.write(pBuffer, streamSize);
    cout << endl;

    /** convert to unicode */
    UnicodeString ustrInputText(pBuffer, streamSize+1, "utf-8");

    /** find tokens and annotate */
    UnicodeString delim(" ");
    UChar *myLocalSaveState;
    UChar * pInputText = (UChar*) ustrInputText.getBuffer();
    const UChar * pToken = pInputText;
    const UChar * pNextToken = u_strtok_r((UChar*) pInputText, delim.getBuffer(), &myLocalSaveState);
    int start = 1;
    int tokenlength=0;
    int nTokens = 0;
    while ( (pNextToken=u_strtok_r(NULL, delim.getBuffer(), &myLocalSaveState)) ) {
      tokenlength = pNextToken - pToken;
      AnnotationFS annotFS = tcas->createAnnotation(annot, start, start+tokenlength-2);
      indexRep.addFS(annotFS);
      ++nTokens;
      start += tokenlength;
      pToken = pNextToken;
    }
    /* last token */
    tokenlength = pNextToken - pToken;
    AnnotationFS annotFS = tcas->createAnnotation(annot, start, streamSize);
    indexRep.addFS(annotFS);
    ++nTokens;
    cout << endl << "   Annotated " << nTokens << " tokens." << endl << endl;

    /** close the stream */
    pStream->close();
    delete pStream;
    delete[] pBuffer;

    cout << "SofaDataAnnotator: process() ends" << endl;
    return (TyErrorId)UIMA_ERR_NONE;
  }
Пример #10
0
void testProcessDocu(uima::util::ConsoleUI& rclConsole,
                     uima::TextAnalysisEngine& rclEngine,
                     const char* crclCCSID,
                     const uima::Language& crclLanguage)
/* ----------------------------------------------------------------------- */
{
    TyErrorId utErrorId;
    string clstrInputFileContent;
    size_t uiNumOfInputDocs = 0;

    uima::DocBuffer docBuffer;
    CAS* cas = rclEngine.newCAS();
    failIfNotTrue(cas != NULL);

    /* iterate through all doc specs on command line */
    for (rclConsole.setToFirst(); rclConsole.isValid(); rclConsole.setToNext()) {
        ////uima::util::Filename     clInputFilename(rclConsole.getAsCString());
        //replaced with a hard wired data file
        UnicodeString filename("tdoc_001_enus_850.asc");
        UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
        uima::util::Filename clInputFilename(UnicodeStringRef(fn).asUTF8().c_str());

        size_t uiSize;

        if (!clInputFilename.isExistent()) {
            rclConsole.fatal(1, _TEXT("Input file not found"), clInputFilename.getAsCString());
        }
        if (crclCCSID == NULL) /**** (!crclCCSID.isValid()) ***/
        {
            rclConsole.fatal(1, _TEXT("Invalid CCSID specified - cannot load document"),
                             crclCCSID /**crclCCSID.getName() **/);
        }
        rclConsole.format(_TEXT("Adding Document"), clInputFilename.getAsCString());
        uiSize = ftool_ReadFileToString(clInputFilename, clstrInputFileContent);

        docBuffer.addDocPart(clstrInputFileContent.data(), uiSize, crclCCSID);
        // For real file based documents we only add a term annotation for the
        // whole "document" if the appropriate switch is set
        if (gs_bDocIsTerm) {
            assert(false);
        }

        UnicodeString ustrInputFileContent(clstrInputFileContent.data(), uiSize, crclCCSID);
        /* since we already added a complete doc, we may not add anything else */
///      failIfNotTrue(rclEngine.addDocPart(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);
///      failIfNotTrue(rclEngine.addDoc(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);

        cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength());
        cas->getDocumentAnnotation().setLanguage(crclLanguage);

        utErrorId = rclEngine.process(*cas);
        uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                              _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc);

        if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) {
            failIfNotTrue(false);
            //         iteratorTest(rclConsole, rclEngine);
        }

        utErrorId = cas->reset();
        uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                              _TEXT("uima::Engine::resetDocument"));
        ++uiNumOfInputDocs;
    }
    if (uiNumOfInputDocs == 0) {
        rclConsole.warning(_TEXT("No input file(s) specified"));
    }
    delete cas;
}
Пример #11
0
  TyErrorId process(CAS & rCAS, ResultSpecification const & crResultSpecification) {
    CAS *engTcas, *germTcas;
    UChar *myLocalSaveState;

    // Look for english document and "translate" to German
    cout << "SofaExampleAnnotator: process() begins" << endl;

    // get English view
    engTcas = rCAS.getView("EnglishDocument");
    DocumentFS adocFS = engTcas->getDocumentAnnotation();
    UnicodeStringRef aengText = adocFS.getCoveredText();
    cout << "      English Input: " << aengText << endl;

    // Create the output German text Sofa and open CAS view
    germTcas = rCAS.createView("GermanDocument");

    // Get pointer to the English text document
    DocumentFS docFS = engTcas->getDocumentAnnotation();
    UnicodeStringRef engText = docFS.getCoveredText();

    // make copy of document for the u_strtok_r function (100 character limit!)
    UChar uWork[100];
    u_strncpy(uWork, engText.getBuffer(), 99);

    // Setup for translated text
    int germBegin = 0;
    int germEnd = 0;
    UChar translation[400];
    translation[0]=0;

    // get two IR handles for adding annotations to the appropriate view
    FSIndexRepository & engIndexRep = engTcas->getIndexRepository();
    FSIndexRepository & germIndexRep = germTcas->getIndexRepository();

    // Parse the English text
    UChar uDelim[2];
    UnicodeString delimUS(" ");
    u_strncpy(uDelim, delimUS.getBuffer(), 1);
	uDelim[1] = 0;
    UChar * next = u_strtok_r(uWork, uDelim, &myLocalSaveState);

    while (next) {
      // Create annotation on source text
      AnnotationFS engAnnot =
        engTcas->createAnnotation(annot, next-uWork, (next-uWork)+u_strlen(next));
      engIndexRep.addFS(engAnnot);

      // Translate word-by-word
      const UChar * gword = translate(next);

      // Accumulate the total translated document
      if (germBegin > 0) {
        // if not the first word, add space before
        u_strncat(translation, uDelim, 1);
        germBegin += 1;
      }
      u_strcat(translation, gword);

      // Create annotation on output text
      germEnd = germBegin + u_strlen(gword);
      AnnotationFS germAnnot = germTcas->createAnnotation(cross, germBegin, germEnd);
      germIndexRep.addFS(germAnnot);
      // add link to English text
      germAnnot.setFSValue(other, engAnnot);
      germBegin = germEnd;

      next = u_strtok_r(NULL, uDelim, &myLocalSaveState);
    }
    // set documentText with accumulated transation
    germTcas->setDocumentText( translation, u_strlen(translation), true );

    cout << "   German(!) Output: " << germTcas->getDocumentText() << endl;

    cout << "SofaExampleAnnotator: process() ends" << endl;
    return (TyErrorId)UIMA_ERR_NONE;
  }
Пример #12
0
    void CASDeserializer::deserializeIndexedFSs(vector<SerializedCAS::TyNum> & crIndexFSs,
        uima::CAS & rCAS) {

      uima::internal::CASImpl & rCASImpl = uima::internal::CASImpl::promoteCAS(rCAS);
      uima::lowlevel::FSHeap & crHeap = rCASImpl.getHeap();
      uima::lowlevel::IndexRepository * crIndexRep = &rCASImpl.getIndexRepository();
      uima::lowlevel::FSHeap::TyFSHeap const & rTempFSHeap = crHeap.iv_clTemporaryHeap;
      SerializedCAS::TyNum iMaxOffset = rTempFSHeap.getTopOfHeap();

      vector<SerializedCAS::TyNum>::const_iterator cit, loopit;
      vector<SerializedCAS::TyNum> perLoopIndexedFSs;
      cit = crIndexFSs.begin();
      int numViews = *cit++;
      int loopSize = *cit;

      crIndexRep->reset();

      // deserialize base CAS
      if (loopSize > 0) {
        lastSegmentUsed = 0;
        perLoopIndexedFSs.insert(perLoopIndexedFSs.end(), cit+1, cit+1+loopSize);
        cit += loopSize + 1;

        for (loopit = perLoopIndexedFSs.begin(); loopit != perLoopIndexedFSs.end(); ++loopit) {
          assert( *loopit < iMaxOffset );
          crIndexRep->add( *loopit );
        }
      }

      // book keeping for all Sofas
      rCAS.getBaseCas()->iv_sofaCount = 1; // reserve for initial view
      FSIndex fsIdx = crIndexRep->getIndex(CAS::INDEXID_SOFA);
      FSIterator fsIt = fsIdx.iterator();
      while (fsIt.isValid()) {
        SofaFS aSofa = (SofaFS) fsIt.get();
        if ( 0 == aSofa.getSofaID().compare(UnicodeString(CAS::NAME_DEFAULT_SOFA)) ) {
          rCAS.registerInitialSofa();
        } else {
          // only bump sofa count if not initial View
          rCAS.bumpSofaCount();
        }
        rCAS.getView(aSofa)->registerView(aSofa);
        fsIt.moveToNext();
      }

      for (int view = 1; view <= numViews; view++) {

        // Check if sofa's index has anything in it
        loopSize = *cit;
        if (0 == loopSize) {
          cit++;
          continue;
        }

        CAS* tcas = rCAS.getViewBySofaNum(view);
        uima::internal::CASImpl & crTCASImpl = uima::internal::CASImpl::promoteCAS(*tcas);
        crIndexRep = &crTCASImpl.getIndexRepository();
        crIndexRep->reset();

        perLoopIndexedFSs.clear();
        perLoopIndexedFSs.insert(perLoopIndexedFSs.end(), cit+1, cit+1+loopSize);
        cit += loopSize + 1;

        for (loopit = perLoopIndexedFSs.begin(); loopit != perLoopIndexedFSs.end(); ++loopit) {
          assert( *loopit < iMaxOffset );
          crIndexRep->add( *loopit );
        }
        tcas->pickupDocumentAnnotation();
      }

    }