/*!
		 * Explicit constructor.
		 */
		AnnotatorPerformanceAnnotation::AnnotatorPerformanceAnnotation(CAS& aCas, const UnicodeString& annotatorName, long elapsedMillis)
			: AnnotationWrapper(aCas)
		{
			FSIndexRepository& indexRep = aCas.getIndexRepository();
			annotation = aCas.createAnnotation(tAnnotatorPerformanceAnnotation, 0, 0);
			setComponentName(annotatorName);
			setElapsedTime(elapsedMillis);
			indexRep.addFS(annotation);
		}
		SentenceAnnotation::SentenceAnnotation(CAS& aCas, size_t begin, size_t end, const TokenAnnotation& firstToken, const TokenAnnotation& lastToken) :
				ContextAreaAnnotation(aCas)
		{
			FSIndexRepository& indexRep = aCas.getIndexRepository();
			annotation = aCas.createAnnotation(tSentenceAnnotation, begin, end);
			setFirstToken(firstToken);
			setLastToken(lastToken);
			indexRep.addFS(annotation);
		}
Пример #3
0
  // Look for "EnglishDocument" sofa and read it as a stream
  TyErrorId process(CAS & rCas, ResultSpecification const & crResultSpecification) {
    cout << "SofaDataAnnotator: process() begins" << endl;

    /** get the CAS view of the sofa */
    CAS * tcas = rCas.getView("EnglishDocument");
    /** get the handle to the index repository */
    FSIndexRepository & indexRep = tcas->getIndexRepository();

    /** get the default text sofa */
    SofaFS textSofa = tcas->getSofa();

    /** get the handle to the sofa data stream */
    SofaDataStream * pStream = textSofa.getSofaDataStream();
    /** open the stream */
    int rc = pStream->open();
    if (rc != 0) {
      cout << "open failed "  << rc << endl;
      return (TyErrorId)UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS;
    }
    /** get the total stream size */
    size_t streamSize = pStream->getTotalStreamSizeInBytes();

    /** read file contents into a buffer */
    char * pBuffer = new char[streamSize+1];
    memset(pBuffer,'\n' ,streamSize+1);
    int elementsize=1;
    pStream->read(pBuffer, elementsize, streamSize);

    cout << endl;
    cout.write(pBuffer, streamSize);
    cout << endl;

    /** convert to unicode */
    UnicodeString ustrInputText(pBuffer, streamSize+1, "utf-8");

    /** find tokens and annotate */
    UnicodeString delim(" ");
    UChar *myLocalSaveState;
    UChar * pInputText = (UChar*) ustrInputText.getBuffer();
    const UChar * pToken = pInputText;
    const UChar * pNextToken = u_strtok_r((UChar*) pInputText, delim.getBuffer(), &myLocalSaveState);
    int start = 1;
    int tokenlength=0;
    int nTokens = 0;
    while ( (pNextToken=u_strtok_r(NULL, delim.getBuffer(), &myLocalSaveState)) ) {
      tokenlength = pNextToken - pToken;
      AnnotationFS annotFS = tcas->createAnnotation(annot, start, start+tokenlength-2);
      indexRep.addFS(annotFS);
      ++nTokens;
      start += tokenlength;
      pToken = pNextToken;
    }
    /* last token */
    tokenlength = pNextToken - pToken;
    AnnotationFS annotFS = tcas->createAnnotation(annot, start, streamSize);
    indexRep.addFS(annotFS);
    ++nTokens;
    cout << endl << "   Annotated " << nTokens << " tokens." << endl << endl;

    /** close the stream */
    pStream->close();
    delete pStream;
    delete[] pBuffer;

    cout << "SofaDataAnnotator: process() ends" << endl;
    return (TyErrorId)UIMA_ERR_NONE;
  }