Пример #1
0
void testCallingSequence1(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename)
/* ----------------------------------------------------------------------- */
{
    ErrorInfo errInfo;
    uima::TextAnalysisEngine* pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo);

    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);
    CAS* cas = pEngine->newCAS();
    failIfNotTrue(cas != NULL);

    uima::UnicodeStringRef us(UnicodeString("a"));
//   UnicodeStringRef uRef(us);
    rclConsole.formatHeader(_TEXT("testing Engine CallingSequence1"));

    cas->setDocumentText(us.getBuffer(), us.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);
    failIfNotTrue(cas->reset() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine->destroy() == UIMA_ERR_NONE);

    cas->setDocumentText(us.getBuffer(), us.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);

    TyErrorId deInitRC = pEngine->destroy();
    rclConsole.format("RC of deInit()", deInitRC);
    failIfNotTrue(deInitRC == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);
    rclConsole.formatBool(_TEXT("testing Engine CallingSequence1 OK"),
                          true);  //lint !e944: argument for operator '!' always evaluates to False
    delete cas;
    delete pEngine;
}  //lint !e715: cpszConfigFilename (line 99) not referenced
Пример #2
0
void testCallingSequence3(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename)
/* ----------------------------------------------------------------------- */
{
    uima::TextAnalysisEngine* pEngine = NULL;
    uima::Language clLanguage(MAIN_DEFAULT_LANG);
    const char* clCCSID = MAIN_DEFAULT_CCSID_STR;
    TyErrorId utErrorId;

    UnicodeString us("a");
    UnicodeStringRef uref(us);

    rclConsole.formatHeader(_TEXT("testing Engine CallingSequence3"));

    ErrorInfo errInfo;
    pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo);
    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);
    CAS* cas = pEngine->newCAS();
    failIfNotTrue(cas != NULL);

    /* test for NULL ptrs */

    UnicodeStringRef uref2(NULL);
    cas->setDocumentText(uref2.getBuffer(), uref2.length());
    cas->getDocumentAnnotation().setLanguage("en");
    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);
    failIfNotTrue(cas->reset() == UIMA_ERR_NONE);


    /* test for subsequent processes */
    cas->setDocumentText(uref2.getBuffer(), uref2.length());
    cas->getDocumentAnnotation().setLanguage("en");

    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);

    failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE);


    utErrorId = pEngine->destroy();
    failIfNotTrue(utErrorId == UIMA_ERR_NONE);
    delete cas;
    delete pEngine;
    rclConsole.formatBool(_TEXT("testing Engine CallingSequence3 OK"),
                          true);  //lint !e944: argument for operator '!' always evaluates to False
}
Пример #3
0
void testCasMultiplier(uima::util::ConsoleUI& rclConsole)
/* ----------------------------------------------------------------------- */
{
    rclConsole.info("testCasMultiplier start.");
    uima::TextAnalysisEngine* pEngine;

    ErrorInfo errInfo;

    UnicodeString filename("SimpleTextSegmenter.xml");
    UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
    pEngine = TextAnalysisEngine::createTextAnalysisEngine(UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
    failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE);
    failIfNotTrue(pEngine != NULL);


    //test operational properties settings
    failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getOutputsNewCASes() == true);
    failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getModifiesCas() == false);
    failIfNotTrue(
            pEngine->getAnalysisEngineMetaData().getOperationalProperties()->isMultipleDeploymentAllowed() == true);


    CAS* cas = pEngine->newCAS();
    cas->setDocumentText(
            UnicodeString("This is the first sentence. This is the second sentence. This is the third sentence."));

    CASIterator iter = pEngine->processAndOutputNewCASes(*cas);
    int num = 0;
    while (iter.hasNext()) {
        num++;
        CAS& seg = iter.next();
        failIfNotTrue(seg.getDocumentText().length() > 0);
        pEngine->getAnnotatorContext().releaseCAS(seg);
    }
    failIfNotTrue(num == 3);
    delete pEngine;
    delete cas;
    rclConsole.info("testCasMultiplier finished.");
}
Пример #4
0
void testProcessTerm(uima::util::ConsoleUI& rclConsole,
                     uima::TextAnalysisEngine& rclEngine,
        ///const uima::CCSID & crclCCSID,
                     const char* crclCCSID,
                     const uima::Language& crclLanguage,
                     const TCHAR* cpszInpTerm)
/* ----------------------------------------------------------------------- */
{
    TyErrorId utErrorId;

    failIfNotTrue(EXISTS(cpszInpTerm));
    rclConsole.format(_TEXT("Input term"), cpszInpTerm);

    DocBuffer docBuffer;
    docBuffer.addDocPart(cpszInpTerm, strlen(cpszInpTerm), crclCCSID);

    //? assert(false);
    CAS* cas = rclEngine.newCAS();
    failIfNotTrue(cas != NULL);

    // For terms we always add a term annotation for the whole "document"
    /* since we already added a complete doc, we may not add anything else */
    cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength());
    cas->getDocumentAnnotation().setLanguage(crclLanguage);

    utErrorId = rclEngine.process(*cas);
    uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                          _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc);

    if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) {
        failIfNotTrue(false);
        //      iteratorTest(rclConsole, rclEngine);
    }

    utErrorId = cas->reset();
    uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                          _TEXT("uima::Engine::resetDocument"));
    delete cas;
}
Пример #5
0
void testProcessDocu(uima::util::ConsoleUI& rclConsole,
                     uima::TextAnalysisEngine& rclEngine,
                     const char* crclCCSID,
                     const uima::Language& crclLanguage)
/* ----------------------------------------------------------------------- */
{
    TyErrorId utErrorId;
    string clstrInputFileContent;
    size_t uiNumOfInputDocs = 0;

    uima::DocBuffer docBuffer;
    CAS* cas = rclEngine.newCAS();
    failIfNotTrue(cas != NULL);

    /* iterate through all doc specs on command line */
    for (rclConsole.setToFirst(); rclConsole.isValid(); rclConsole.setToNext()) {
        ////uima::util::Filename     clInputFilename(rclConsole.getAsCString());
        //replaced with a hard wired data file
        UnicodeString filename("tdoc_001_enus_850.asc");
        UnicodeString fn = ResourceManager::resolveFilename(filename, filename);
        uima::util::Filename clInputFilename(UnicodeStringRef(fn).asUTF8().c_str());

        size_t uiSize;

        if (!clInputFilename.isExistent()) {
            rclConsole.fatal(1, _TEXT("Input file not found"), clInputFilename.getAsCString());
        }
        if (crclCCSID == NULL) /**** (!crclCCSID.isValid()) ***/
        {
            rclConsole.fatal(1, _TEXT("Invalid CCSID specified - cannot load document"),
                             crclCCSID /**crclCCSID.getName() **/);
        }
        rclConsole.format(_TEXT("Adding Document"), clInputFilename.getAsCString());
        uiSize = ftool_ReadFileToString(clInputFilename, clstrInputFileContent);

        docBuffer.addDocPart(clstrInputFileContent.data(), uiSize, crclCCSID);
        // For real file based documents we only add a term annotation for the
        // whole "document" if the appropriate switch is set
        if (gs_bDocIsTerm) {
            assert(false);
        }

        UnicodeString ustrInputFileContent(clstrInputFileContent.data(), uiSize, crclCCSID);
        /* since we already added a complete doc, we may not add anything else */
///      failIfNotTrue(rclEngine.addDocPart(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);
///      failIfNotTrue(rclEngine.addDoc(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE);

        cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength());
        cas->getDocumentAnnotation().setLanguage(crclLanguage);

        utErrorId = rclEngine.process(*cas);
        uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                              _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc);

        if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) {
            failIfNotTrue(false);
            //         iteratorTest(rclConsole, rclEngine);
        }

        utErrorId = cas->reset();
        uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(),
                              _TEXT("uima::Engine::resetDocument"));
        ++uiNumOfInputDocs;
    }
    if (uiNumOfInputDocs == 0) {
        rclConsole.warning(_TEXT("No input file(s) specified"));
    }
    delete cas;
}