/*! * Explicit constructor. */ AnnotatorPerformanceAnnotation::AnnotatorPerformanceAnnotation(CAS& aCas, const UnicodeString& annotatorName, long elapsedMillis) : AnnotationWrapper(aCas) { FSIndexRepository& indexRep = aCas.getIndexRepository(); annotation = aCas.createAnnotation(tAnnotatorPerformanceAnnotation, 0, 0); setComponentName(annotatorName); setElapsedTime(elapsedMillis); indexRep.addFS(annotation); }
SentenceAnnotation::SentenceAnnotation(CAS& aCas, size_t begin, size_t end, const TokenAnnotation& firstToken, const TokenAnnotation& lastToken) : ContextAreaAnnotation(aCas) { FSIndexRepository& indexRep = aCas.getIndexRepository(); annotation = aCas.createAnnotation(tSentenceAnnotation, begin, end); setFirstToken(firstToken); setLastToken(lastToken); indexRep.addFS(annotation); }
void testCallingSequence1(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename) /* ----------------------------------------------------------------------- */ { ErrorInfo errInfo; uima::TextAnalysisEngine* pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo); failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE); failIfNotTrue(pEngine != NULL); CAS* cas = pEngine->newCAS(); failIfNotTrue(cas != NULL); uima::UnicodeStringRef us(UnicodeString("a")); // UnicodeStringRef uRef(us); rclConsole.formatHeader(_TEXT("testing Engine CallingSequence1")); cas->setDocumentText(us.getBuffer(), us.length()); cas->getDocumentAnnotation().setLanguage("en"); failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE); failIfNotTrue(cas->reset() == UIMA_ERR_NONE); failIfNotTrue(pEngine->destroy() == UIMA_ERR_NONE); cas->setDocumentText(us.getBuffer(), us.length()); cas->getDocumentAnnotation().setLanguage("en"); failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE); TyErrorId deInitRC = pEngine->destroy(); rclConsole.format("RC of deInit()", deInitRC); failIfNotTrue(deInitRC == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE); rclConsole.formatBool(_TEXT("testing Engine CallingSequence1 OK"), true); //lint !e944: argument for operator '!' always evaluates to False delete cas; delete pEngine; } //lint !e715: cpszConfigFilename (line 99) not referenced
void testCasMultiplier(uima::util::ConsoleUI& rclConsole) /* ----------------------------------------------------------------------- */ { rclConsole.info("testCasMultiplier start."); uima::TextAnalysisEngine* pEngine; ErrorInfo errInfo; UnicodeString filename("SimpleTextSegmenter.xml"); UnicodeString fn = ResourceManager::resolveFilename(filename, filename); pEngine = TextAnalysisEngine::createTextAnalysisEngine(UnicodeStringRef(fn).asUTF8().c_str(), errInfo); failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE); failIfNotTrue(pEngine != NULL); //test operational properties settings failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getOutputsNewCASes() == true); failIfNotTrue(pEngine->getAnalysisEngineMetaData().getOperationalProperties()->getModifiesCas() == false); failIfNotTrue( pEngine->getAnalysisEngineMetaData().getOperationalProperties()->isMultipleDeploymentAllowed() == true); CAS* cas = pEngine->newCAS(); cas->setDocumentText( UnicodeString("This is the first sentence. This is the second sentence. This is the third sentence.")); CASIterator iter = pEngine->processAndOutputNewCASes(*cas); int num = 0; while (iter.hasNext()) { num++; CAS& seg = iter.next(); failIfNotTrue(seg.getDocumentText().length() > 0); pEngine->getAnnotatorContext().releaseCAS(seg); } failIfNotTrue(num == 3); delete pEngine; delete cas; rclConsole.info("testCasMultiplier finished."); }
void testProcessTerm(uima::util::ConsoleUI& rclConsole, uima::TextAnalysisEngine& rclEngine, ///const uima::CCSID & crclCCSID, const char* crclCCSID, const uima::Language& crclLanguage, const TCHAR* cpszInpTerm) /* ----------------------------------------------------------------------- */ { TyErrorId utErrorId; failIfNotTrue(EXISTS(cpszInpTerm)); rclConsole.format(_TEXT("Input term"), cpszInpTerm); DocBuffer docBuffer; docBuffer.addDocPart(cpszInpTerm, strlen(cpszInpTerm), crclCCSID); //? assert(false); CAS* cas = rclEngine.newCAS(); failIfNotTrue(cas != NULL); // For terms we always add a term annotation for the whole "document" /* since we already added a complete doc, we may not add anything else */ cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength()); cas->getDocumentAnnotation().setLanguage(crclLanguage); utErrorId = rclEngine.process(*cas); uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(), _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc); if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) { failIfNotTrue(false); // iteratorTest(rclConsole, rclEngine); } utErrorId = cas->reset(); uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(), _TEXT("uima::Engine::resetDocument")); delete cas; }
TyErrorId AnnotatorDump::process(CAS & tcas, const ResultSpecification & ) { TyErrorId tyErrId; // in append mode all data in a session/collection is dumped into one file // otherwise the same dump file is deleted and rewritten for each document // in the session/collection if (!iv_bAppendFile) { tyErrId = openOutputFile(); if (tyErrId != UIMA_ERR_NONE) { return tyErrId; } } assert(iv_clOutputStream.good()); if (iv_bDumpDocBuffer) { // Dumping the Document Buffer UnicodeStringRef doc = tcas.getDocumentText(); outputDocBuffer(doc); } uima::CASWriterABase * writer = NULL; switch (iv_enOutputStyle) { case Xml: writer = new uima::XMLDumpWriter(tcas, iv_bDumpDocBuffer); break; case XCas: writer = new uima::XCASWriter(tcas, iv_bDumpDocBuffer); break; default: assert(false); } assert( EXISTS(writer) ); auto_ptr<CASWriterABase> apWriter( writer ); apWriter->write(iv_clOutputStream); // in append mode all data in a session/collection is dumped into one file // otherwise the same dump file is deleted and rewritten for each document // in the session/collection if (!iv_bAppendFile) { closeOutputFile(); } return(TyErrorId)UIMA_ERR_NONE; }
void testCallingSequence3(uima::util::ConsoleUI& rclConsole, const TCHAR* cpszConfigFilename) /* ----------------------------------------------------------------------- */ { uima::TextAnalysisEngine* pEngine = NULL; uima::Language clLanguage(MAIN_DEFAULT_LANG); const char* clCCSID = MAIN_DEFAULT_CCSID_STR; TyErrorId utErrorId; UnicodeString us("a"); UnicodeStringRef uref(us); rclConsole.formatHeader(_TEXT("testing Engine CallingSequence3")); ErrorInfo errInfo; pEngine = TextAnalysisEngine::createTextAnalysisEngine(cpszConfigFilename, errInfo); failIfNotTrue(errInfo.getErrorId() == UIMA_ERR_NONE); failIfNotTrue(pEngine != NULL); CAS* cas = pEngine->newCAS(); failIfNotTrue(cas != NULL); /* test for NULL ptrs */ UnicodeStringRef uref2(NULL); cas->setDocumentText(uref2.getBuffer(), uref2.length()); cas->getDocumentAnnotation().setLanguage("en"); failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE); failIfNotTrue(cas->reset() == UIMA_ERR_NONE); /* test for subsequent processes */ cas->setDocumentText(uref2.getBuffer(), uref2.length()); cas->getDocumentAnnotation().setLanguage("en"); failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE); failIfNotTrue(pEngine->process(*cas) == UIMA_ERR_NONE); utErrorId = pEngine->destroy(); failIfNotTrue(utErrorId == UIMA_ERR_NONE); delete cas; delete pEngine; rclConsole.formatBool(_TEXT("testing Engine CallingSequence3 OK"), true); //lint !e944: argument for operator '!' always evaluates to False }
int main(int argc, char* argv[]) { try { int loglevel = -1; /* check the number of command line args */ if (argc != 3 && argc != 5) { tell(); return 1; } if (argc == 5) { if (!strcmp(argv[3], "-l")) { loglevel = atoi(argv[4]); if (loglevel < LogStream::EnMessage) { cerr << "LogLevel less than minimum value (Message) = " << LogStream::EnMessage << endl; return 1; } if (loglevel > LogStream::EnError) { cerr << "LogLevel greater than maximum value (Error) = " << LogStream::EnError << endl; return 1; } } else { cerr << "Inexpected option: " << argv[3] << endl; tell(); return 1; } } /* Create/link up to a resource manager instance (singleton) */ (void) ResourceManager::createInstance("UIMACPP_EXAMPLE_APPLICATION"); if (loglevel >= 0) { ResourceManager::getInstance().setLoggingLevel((LogStream::EnEntryType) loglevel); } std::string a = "abc"; UnicodeString b = "wxyz"; UChar c = 'c'; TyErrorId utErrorId; // Variable to store return codes ErrorInfo errorInfo; // Variable to stored detailed error info /* Initialize engine with filename of config-file */ AnalysisEngine* pEngine = Framework::createAnalysisEngine(argv[1], errorInfo); CheckError(errorInfo); /* Get a new CAS */ CAS* tcas = pEngine->newCAS(); /* process input xcas */ util::DirectoryWalk dirwalker(argv[2]); if (dirwalker.isValid()) { util::Filename infile(argv[2], "FilenamePlaceHolder"); while (dirwalker.isValid()) { // Process all files or just the ones with matching suffix if (dirwalker.isFile() && dirwalker.matchesWildcardPattern("*.txt")) { infile.setNewName(dirwalker.getNameWithoutPath()); std::string afile(infile.getAsCString()); //process the file processFile(afile, pEngine, tcas); //reset the cas tcas->reset(); } //get the next xcas file in the directory dirwalker.setToNext(); } } else { /* If has no directory entries then probably a file */ cout << "ExampleApplication: processing file " << argv[2] << endl; std::string afile(argv[2]); //process the cas processFile(afile, pEngine, tcas); } /* call collectionProcessComplete */ utErrorId = pEngine->collectionProcessComplete(); /* Free ressorces */ utErrorId = pEngine->destroy(); CheckError(utErrorId, *pEngine); delete tcas; delete pEngine; } catch (Exception e) { cout << "ExampleApplication " << e << endl; } /* If we got this far everything went OK */ cout << "ExampleApplication: processing finished sucessfully! " << endl; return (0); }
// Look for "EnglishDocument" sofa and read it as a stream TyErrorId process(CAS & rCas, ResultSpecification const & crResultSpecification) { cout << "SofaDataAnnotator: process() begins" << endl; /** get the CAS view of the sofa */ CAS * tcas = rCas.getView("EnglishDocument"); /** get the handle to the index repository */ FSIndexRepository & indexRep = tcas->getIndexRepository(); /** get the default text sofa */ SofaFS textSofa = tcas->getSofa(); /** get the handle to the sofa data stream */ SofaDataStream * pStream = textSofa.getSofaDataStream(); /** open the stream */ int rc = pStream->open(); if (rc != 0) { cout << "open failed " << rc << endl; return (TyErrorId)UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS; } /** get the total stream size */ size_t streamSize = pStream->getTotalStreamSizeInBytes(); /** read file contents into a buffer */ char * pBuffer = new char[streamSize+1]; memset(pBuffer,'\n' ,streamSize+1); int elementsize=1; pStream->read(pBuffer, elementsize, streamSize); cout << endl; cout.write(pBuffer, streamSize); cout << endl; /** convert to unicode */ UnicodeString ustrInputText(pBuffer, streamSize+1, "utf-8"); /** find tokens and annotate */ UnicodeString delim(" "); UChar *myLocalSaveState; UChar * pInputText = (UChar*) ustrInputText.getBuffer(); const UChar * pToken = pInputText; const UChar * pNextToken = u_strtok_r((UChar*) pInputText, delim.getBuffer(), &myLocalSaveState); int start = 1; int tokenlength=0; int nTokens = 0; while ( (pNextToken=u_strtok_r(NULL, delim.getBuffer(), &myLocalSaveState)) ) { tokenlength = pNextToken - pToken; AnnotationFS annotFS = tcas->createAnnotation(annot, start, start+tokenlength-2); indexRep.addFS(annotFS); ++nTokens; start += tokenlength; pToken = pNextToken; } /* last token */ tokenlength = pNextToken - pToken; AnnotationFS annotFS = tcas->createAnnotation(annot, start, streamSize); indexRep.addFS(annotFS); ++nTokens; cout << endl << " Annotated " << nTokens << " tokens." << endl << endl; /** close the stream */ pStream->close(); delete pStream; delete[] pBuffer; cout << "SofaDataAnnotator: process() ends" << endl; return (TyErrorId)UIMA_ERR_NONE; }
void testProcessDocu(uima::util::ConsoleUI& rclConsole, uima::TextAnalysisEngine& rclEngine, const char* crclCCSID, const uima::Language& crclLanguage) /* ----------------------------------------------------------------------- */ { TyErrorId utErrorId; string clstrInputFileContent; size_t uiNumOfInputDocs = 0; uima::DocBuffer docBuffer; CAS* cas = rclEngine.newCAS(); failIfNotTrue(cas != NULL); /* iterate through all doc specs on command line */ for (rclConsole.setToFirst(); rclConsole.isValid(); rclConsole.setToNext()) { ////uima::util::Filename clInputFilename(rclConsole.getAsCString()); //replaced with a hard wired data file UnicodeString filename("tdoc_001_enus_850.asc"); UnicodeString fn = ResourceManager::resolveFilename(filename, filename); uima::util::Filename clInputFilename(UnicodeStringRef(fn).asUTF8().c_str()); size_t uiSize; if (!clInputFilename.isExistent()) { rclConsole.fatal(1, _TEXT("Input file not found"), clInputFilename.getAsCString()); } if (crclCCSID == NULL) /**** (!crclCCSID.isValid()) ***/ { rclConsole.fatal(1, _TEXT("Invalid CCSID specified - cannot load document"), crclCCSID /**crclCCSID.getName() **/); } rclConsole.format(_TEXT("Adding Document"), clInputFilename.getAsCString()); uiSize = ftool_ReadFileToString(clInputFilename, clstrInputFileContent); docBuffer.addDocPart(clstrInputFileContent.data(), uiSize, crclCCSID); // For real file based documents we only add a term annotation for the // whole "document" if the appropriate switch is set if (gs_bDocIsTerm) { assert(false); } UnicodeString ustrInputFileContent(clstrInputFileContent.data(), uiSize, crclCCSID); /* since we already added a complete doc, we may not add anything else */ /// failIfNotTrue(rclEngine.addDocPart(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE); /// failIfNotTrue(rclEngine.addDoc(ustrInputFileContent) == UIMA_ERR_ENGINE_INVALID_CALLING_SEQUENCE); cas->setDocumentText(docBuffer.getDocBuffer(), docBuffer.getLength()); cas->getDocumentAnnotation().setLanguage(crclLanguage); utErrorId = rclEngine.process(*cas); uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(), _TEXT("uima::Engine::processDocument"), gs_lExpectedProcessDocumentRc); if (utErrorId == UIMA_ERR_NONE && gs_bDoIterTest) { failIfNotTrue(false); // iteratorTest(rclConsole, rclEngine); } utErrorId = cas->reset(); uimaToolHandleErrorId(rclConsole, utErrorId, rclEngine.getAnnotatorContext().getLogger().getLastErrorAsCStr(), _TEXT("uima::Engine::resetDocument")); ++uiNumOfInputDocs; } if (uiNumOfInputDocs == 0) { rclConsole.warning(_TEXT("No input file(s) specified")); } delete cas; }
TyErrorId process(CAS & rCAS, ResultSpecification const & crResultSpecification) { CAS *engTcas, *germTcas; UChar *myLocalSaveState; // Look for english document and "translate" to German cout << "SofaExampleAnnotator: process() begins" << endl; // get English view engTcas = rCAS.getView("EnglishDocument"); DocumentFS adocFS = engTcas->getDocumentAnnotation(); UnicodeStringRef aengText = adocFS.getCoveredText(); cout << " English Input: " << aengText << endl; // Create the output German text Sofa and open CAS view germTcas = rCAS.createView("GermanDocument"); // Get pointer to the English text document DocumentFS docFS = engTcas->getDocumentAnnotation(); UnicodeStringRef engText = docFS.getCoveredText(); // make copy of document for the u_strtok_r function (100 character limit!) UChar uWork[100]; u_strncpy(uWork, engText.getBuffer(), 99); // Setup for translated text int germBegin = 0; int germEnd = 0; UChar translation[400]; translation[0]=0; // get two IR handles for adding annotations to the appropriate view FSIndexRepository & engIndexRep = engTcas->getIndexRepository(); FSIndexRepository & germIndexRep = germTcas->getIndexRepository(); // Parse the English text UChar uDelim[2]; UnicodeString delimUS(" "); u_strncpy(uDelim, delimUS.getBuffer(), 1); uDelim[1] = 0; UChar * next = u_strtok_r(uWork, uDelim, &myLocalSaveState); while (next) { // Create annotation on source text AnnotationFS engAnnot = engTcas->createAnnotation(annot, next-uWork, (next-uWork)+u_strlen(next)); engIndexRep.addFS(engAnnot); // Translate word-by-word const UChar * gword = translate(next); // Accumulate the total translated document if (germBegin > 0) { // if not the first word, add space before u_strncat(translation, uDelim, 1); germBegin += 1; } u_strcat(translation, gword); // Create annotation on output text germEnd = germBegin + u_strlen(gword); AnnotationFS germAnnot = germTcas->createAnnotation(cross, germBegin, germEnd); germIndexRep.addFS(germAnnot); // add link to English text germAnnot.setFSValue(other, engAnnot); germBegin = germEnd; next = u_strtok_r(NULL, uDelim, &myLocalSaveState); } // set documentText with accumulated transation germTcas->setDocumentText( translation, u_strlen(translation), true ); cout << " German(!) Output: " << germTcas->getDocumentText() << endl; cout << "SofaExampleAnnotator: process() ends" << endl; return (TyErrorId)UIMA_ERR_NONE; }
void CASDeserializer::deserializeIndexedFSs(vector<SerializedCAS::TyNum> & crIndexFSs, uima::CAS & rCAS) { uima::internal::CASImpl & rCASImpl = uima::internal::CASImpl::promoteCAS(rCAS); uima::lowlevel::FSHeap & crHeap = rCASImpl.getHeap(); uima::lowlevel::IndexRepository * crIndexRep = &rCASImpl.getIndexRepository(); uima::lowlevel::FSHeap::TyFSHeap const & rTempFSHeap = crHeap.iv_clTemporaryHeap; SerializedCAS::TyNum iMaxOffset = rTempFSHeap.getTopOfHeap(); vector<SerializedCAS::TyNum>::const_iterator cit, loopit; vector<SerializedCAS::TyNum> perLoopIndexedFSs; cit = crIndexFSs.begin(); int numViews = *cit++; int loopSize = *cit; crIndexRep->reset(); // deserialize base CAS if (loopSize > 0) { lastSegmentUsed = 0; perLoopIndexedFSs.insert(perLoopIndexedFSs.end(), cit+1, cit+1+loopSize); cit += loopSize + 1; for (loopit = perLoopIndexedFSs.begin(); loopit != perLoopIndexedFSs.end(); ++loopit) { assert( *loopit < iMaxOffset ); crIndexRep->add( *loopit ); } } // book keeping for all Sofas rCAS.getBaseCas()->iv_sofaCount = 1; // reserve for initial view FSIndex fsIdx = crIndexRep->getIndex(CAS::INDEXID_SOFA); FSIterator fsIt = fsIdx.iterator(); while (fsIt.isValid()) { SofaFS aSofa = (SofaFS) fsIt.get(); if ( 0 == aSofa.getSofaID().compare(UnicodeString(CAS::NAME_DEFAULT_SOFA)) ) { rCAS.registerInitialSofa(); } else { // only bump sofa count if not initial View rCAS.bumpSofaCount(); } rCAS.getView(aSofa)->registerView(aSofa); fsIt.moveToNext(); } for (int view = 1; view <= numViews; view++) { // Check if sofa's index has anything in it loopSize = *cit; if (0 == loopSize) { cit++; continue; } CAS* tcas = rCAS.getViewBySofaNum(view); uima::internal::CASImpl & crTCASImpl = uima::internal::CASImpl::promoteCAS(*tcas); crIndexRep = &crTCASImpl.getIndexRepository(); crIndexRep->reset(); perLoopIndexedFSs.clear(); perLoopIndexedFSs.insert(perLoopIndexedFSs.end(), cit+1, cit+1+loopSize); cit += loopSize + 1; for (loopit = perLoopIndexedFSs.begin(); loopit != perLoopIndexedFSs.end(); ++loopit) { assert( *loopit < iMaxOffset ); crIndexRep->add( *loopit ); } tcas->pickupDocumentAnnotation(); } }