// Look for "EnglishDocument" sofa and read it as a stream TyErrorId process(CAS & rCas, ResultSpecification const & crResultSpecification) { cout << "SofaDataAnnotator: process() begins" << endl; /** get the CAS view of the sofa */ CAS * tcas = rCas.getView("EnglishDocument"); /** get the handle to the index repository */ FSIndexRepository & indexRep = tcas->getIndexRepository(); /** get the default text sofa */ SofaFS textSofa = tcas->getSofa(); /** get the handle to the sofa data stream */ SofaDataStream * pStream = textSofa.getSofaDataStream(); /** open the stream */ int rc = pStream->open(); if (rc != 0) { cout << "open failed " << rc << endl; return (TyErrorId)UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS; } /** get the total stream size */ size_t streamSize = pStream->getTotalStreamSizeInBytes(); /** read file contents into a buffer */ char * pBuffer = new char[streamSize+1]; memset(pBuffer,'\n' ,streamSize+1); int elementsize=1; pStream->read(pBuffer, elementsize, streamSize); cout << endl; cout.write(pBuffer, streamSize); cout << endl; /** convert to unicode */ UnicodeString ustrInputText(pBuffer, streamSize+1, "utf-8"); /** find tokens and annotate */ UnicodeString delim(" "); UChar *myLocalSaveState; UChar * pInputText = (UChar*) ustrInputText.getBuffer(); const UChar * pToken = pInputText; const UChar * pNextToken = u_strtok_r((UChar*) pInputText, delim.getBuffer(), &myLocalSaveState); int start = 1; int tokenlength=0; int nTokens = 0; while ( (pNextToken=u_strtok_r(NULL, delim.getBuffer(), &myLocalSaveState)) ) { tokenlength = pNextToken - pToken; AnnotationFS annotFS = tcas->createAnnotation(annot, start, start+tokenlength-2); indexRep.addFS(annotFS); ++nTokens; start += tokenlength; pToken = pNextToken; } /* last token */ tokenlength = pNextToken - pToken; AnnotationFS annotFS = tcas->createAnnotation(annot, start, streamSize); indexRep.addFS(annotFS); ++nTokens; cout << endl << " Annotated " << nTokens << " tokens." << endl << endl; /** close the stream */ pStream->close(); delete pStream; delete[] pBuffer; cout << "SofaDataAnnotator: process() ends" << endl; return (TyErrorId)UIMA_ERR_NONE; }
TyErrorId process(CAS & rCAS, ResultSpecification const & crResultSpecification) { CAS *engTcas, *germTcas; UChar *myLocalSaveState; // Look for english document and "translate" to German cout << "SofaExampleAnnotator: process() begins" << endl; // get English view engTcas = rCAS.getView("EnglishDocument"); DocumentFS adocFS = engTcas->getDocumentAnnotation(); UnicodeStringRef aengText = adocFS.getCoveredText(); cout << " English Input: " << aengText << endl; // Create the output German text Sofa and open CAS view germTcas = rCAS.createView("GermanDocument"); // Get pointer to the English text document DocumentFS docFS = engTcas->getDocumentAnnotation(); UnicodeStringRef engText = docFS.getCoveredText(); // make copy of document for the u_strtok_r function (100 character limit!) UChar uWork[100]; u_strncpy(uWork, engText.getBuffer(), 99); // Setup for translated text int germBegin = 0; int germEnd = 0; UChar translation[400]; translation[0]=0; // get two IR handles for adding annotations to the appropriate view FSIndexRepository & engIndexRep = engTcas->getIndexRepository(); FSIndexRepository & germIndexRep = germTcas->getIndexRepository(); // Parse the English text UChar uDelim[2]; UnicodeString delimUS(" "); u_strncpy(uDelim, delimUS.getBuffer(), 1); uDelim[1] = 0; UChar * next = u_strtok_r(uWork, uDelim, &myLocalSaveState); while (next) { // Create annotation on source text AnnotationFS engAnnot = engTcas->createAnnotation(annot, next-uWork, (next-uWork)+u_strlen(next)); engIndexRep.addFS(engAnnot); // Translate word-by-word const UChar * gword = translate(next); // Accumulate the total translated document if (germBegin > 0) { // if not the first word, add space before u_strncat(translation, uDelim, 1); germBegin += 1; } u_strcat(translation, gword); // Create annotation on output text germEnd = germBegin + u_strlen(gword); AnnotationFS germAnnot = germTcas->createAnnotation(cross, germBegin, germEnd); germIndexRep.addFS(germAnnot); // add link to English text germAnnot.setFSValue(other, engAnnot); germBegin = germEnd; next = u_strtok_r(NULL, uDelim, &myLocalSaveState); } // set documentText with accumulated transation germTcas->setDocumentText( translation, u_strlen(translation), true ); cout << " German(!) Output: " << germTcas->getDocumentText() << endl; cout << "SofaExampleAnnotator: process() ends" << endl; return (TyErrorId)UIMA_ERR_NONE; }