Esempio n. 1
0
const char *__sbstemmer_Stem(void *ctx, const char *word, size_t len, size_t *outlen) {
    const sb_symbol *b = (const sb_symbol *)word;
    struct sb_stemmer *sb = ctx;

    const sb_symbol *stemmed = sb_stemmer_stem(sb, b, (int)len);
    if (stemmed) {
        *outlen = sb_stemmer_length(sb);
        return (const char *)stemmed;
    }
    return NULL;
}
Esempio n. 2
0
static void
stem_filter(grn_ctx *ctx,
            grn_token *current_token,
            grn_token *next_token,
            void *user_data)
{
  grn_stem_token_filter *token_filter = user_data;
  grn_obj *data;

  if (GRN_CTX_GET_ENCODING(ctx) != GRN_ENC_UTF8) {
    return;
  }

  data = grn_token_get_data(ctx, current_token);

  if (token_filter->stemmer) {
    sb_stemmer_delete(token_filter->stemmer);
  }
  {
    /* TODO: Detect algorithm from the current token. */
    const char *algorithm = "english";
    const char *encoding = "UTF_8";
    token_filter->stemmer = sb_stemmer_new(algorithm, encoding);
    if (!token_filter->stemmer) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[token-filter][stem] "
                       "failed to create stemmer: "
                       "algorithm=<%s>, encoding=<%s>",
                       algorithm, encoding);
      return;
    }
  }

  {
    const sb_symbol *stemmed;

    stemmed = sb_stemmer_stem(token_filter->stemmer,
                              GRN_TEXT_VALUE(data), GRN_TEXT_LEN(data));
    if (stemmed) {
      grn_token_set_data(ctx, next_token,
                         stemmed,
                         sb_stemmer_length(token_filter->stemmer));
    } else {
      GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                       "[token-filter][stem] "
                       "failed to allocate memory for stemmed word: <%.*s>",
                       (int)GRN_TEXT_LEN(data), GRN_TEXT_VALUE(data));
      return;
    }
  }
}
bool SnowballFilter::incrementToken() {
    if (input->incrementToken()) {
        StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result);
        const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length);
        if (stemmed == NULL) {
            boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term()));
        }
        int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer());
        termAtt->setTermLength(newlen);
        return true;
    } else {
        return false;
    }
}
Esempio n. 4
0
        string Stemmer::stem( const StringData& word ) const {
            if ( !_stemmer )
                return word.toString();

            const sb_symbol* sb_sym = sb_stemmer_stem( _stemmer,
                                                       (const sb_symbol*)word.rawData(),
                                                       word.size() );

            if ( sb_sym == NULL ) {
                // out of memory
                abort();
            }

            return string( (const char*)(sb_sym), sb_stemmer_length( _stemmer ) );
        }
Esempio n. 5
0
static foreign_t
snowball(term_t lang, term_t in, term_t out)
{ struct sb_stemmer *stemmer = NULL;
  char *s;
  size_t len, olen;
  const sb_symbol *stemmed;

  if ( !get_lang_stemmer(lang, &stemmer) )
    return FALSE;
  if ( !PL_get_nchars(in, &len, &s,
		      CVT_ATOM|CVT_STRING|CVT_LIST|REP_UTF8|CVT_EXCEPTION) )
    return FALSE;

  if ( !(stemmed = sb_stemmer_stem(stemmer, (const sb_symbol*)s, (int)len)) )
    return resource_error("memory");
  olen = sb_stemmer_length(stemmer);

  return PL_unify_chars(out, PL_ATOM|REP_UTF8, olen, (const char*)stemmed);
}
Esempio n. 6
0
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
    VALUE word, language, result = Qnil;

    rb_scan_args(argc, argv, "11", &word, &language);
    if (NIL_P(language))
        language = fuzzy_default_language;

    if (TYPE(word) != T_STRING)
        rb_raise(rb_eArgError, "invalid word, expect string");

    struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
    if (stemmer) {
        const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
        uint32_t stem_len = sb_stemmer_length(stemmer);
        result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
        sb_stemmer_delete(stemmer);
    }

    return result;
}
Esempio n. 7
0
void CDOMDocument::flushToken()
{
	CIndexer *indexer = (CIndexer *)(this->userData);

	if(this->onKeyword)
	{
		(this->onKeyword)(this, this->tokenLC, this->tokenLCLen, this->indexStart, this->tokenLen, this->wordIndex, "", 0);

		for(int i=0; i<narg_stem; i++)
		{
			if(indexer->stemmer[i])
			{
				const sb_symbol *stemmed = sb_stemmer_stem(indexer->stemmer[i], (const sb_symbol *)(this->tokenLC), this->tokenLCLen);
				int stemmedLen = sb_stemmer_length(indexer->stemmer[i]);

				(this->onKeyword)(this, (char *)stemmed, stemmedLen, this->indexStart, this->tokenLen, this->wordIndex, arg_stem[i], strlen(arg_stem[i]));
			}
		}
	}
	this->wordIndex++;
}
Esempio n. 8
0
Inversion*
SnowStemmer_Transform_IMP(SnowballStemmer *self, Inversion *inversion) {
    Token *token;
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    struct sb_stemmer *const snowstemmer
        = (struct sb_stemmer*)ivars->snowstemmer;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        const sb_symbol *stemmed_text
            = sb_stemmer_stem(snowstemmer, (sb_symbol*)token_ivars->text,
                              token_ivars->len);
        size_t len = sb_stemmer_length(snowstemmer);
        if (len > token_ivars->len) {
            FREEMEM(token_ivars->text);
            token_ivars->text = (char*)MALLOCATE(len + 1);
        }
        memcpy(token_ivars->text, stemmed_text, len + 1);
        token_ivars->len = len;
    }
    Inversion_Reset(inversion);
    return (Inversion*)INCREF(inversion);
}
static int
fts_filter_stemmer_snowball_filter(struct fts_filter *filter,
                                   const char **token, const char **error_r)
{
    struct fts_filter_stemmer_snowball *sp =
        (struct fts_filter_stemmer_snowball *) filter;
    const sb_symbol *base;

    if (sp->stemmer == NULL) {
        if (fts_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0)
            return -1;
    }

    base = sb_stemmer_stem(sp->stemmer, (const unsigned char *)*token, strlen(*token));
    if (base == NULL) {
        /* the only reason why this could fail is because of
           out of memory. */
        i_fatal_status(FATAL_OUTOFMEM,
                       "sb_stemmer_stem(len=%"PRIuSIZE_T") failed: "
                       "Out of memory", strlen(*token));
    }
    *token = t_strndup(base, sb_stemmer_length(sp->stemmer));
    return 1;
}
Esempio n. 10
0
		std::string get(const char *word, int size, bool complete = true) {
			const sb_symbol *sb;
			std::string ret;

			int len, prev_len;
			len = prev_len = size;

			boost::mutex::scoped_lock guard(m_lock);

			do {
				sb = sb_stemmer_stem(m_stem, (const sb_symbol *)word, len);
				if (!sb)
					return ret;

				len = sb_stemmer_length(m_stem);
				if (len == prev_len)
					break;

				prev_len = len;
			} while (complete);

			ret.assign((char *)sb, len);
			return ret;
		}
Esempio n. 11
0
		char* CSnowballWrapper::stemmer(char* szTextBuf,size_t& nTextLen)
		{			
			if(nTextLen <= 0)
				return NULL;
			if(m_pStemmerBuffer == NULL)
			{
				m_nBufferSize = nTextLen<<1;
				m_pStemmerBuffer = new char[m_nBufferSize];
				if(m_pStemmerBuffer == NULL)
				{
					m_nBufferSize = 0;
					return NULL;
				}
			}
			else
			{
				if(m_nBufferSize < (nTextLen<<1) )
				{
					delete[] m_pStemmerBuffer;
					m_nBufferSize = nTextLen<<1;
					m_pStemmerBuffer = new char[m_nBufferSize];
					if(m_pStemmerBuffer == NULL)
					{
						m_nBufferSize = 0;
						return NULL;
					}
				}
			}

			bool bFlagEnglish = true;
			int i;
			register char*	pWord;
			int  nWordLen;
			register char* pStemmedBufStart = m_pStemmerBuffer;
			register char *pStemmedBufEnd;
			register char *pWordStart,*pWordEnd;
			pWordStart = szTextBuf;
			char* pTextEnd = szTextBuf + nTextLen;
			while(pWordStart < pTextEnd)
			{
				
				while(pWordStart < pTextEnd && !((*pWordStart >= 'a' && *pWordStart <= 'z') || (*pWordStart >= 'A' && *pWordStart <= 'Z')))//(*pWordStart==' ' || *pWordStart=='\xd' || *pWordStart=='\xa'))
				{
					*pStemmedBufStart = *pWordStart;
					pWordStart++;
					pStemmedBufStart++;
				}
				if(pWordStart >= pTextEnd)
					break;

				pWordEnd = pWordStart + 1;
				while((pWordEnd < (pTextEnd)) && ((*pWordEnd >= 'a' && *pWordEnd <= 'z') || (*pWordEnd >= 'A' && *pWordEnd <= 'Z')))//*pWordEnd!=' ' && *pWordEnd!='\xd' && *pWordEnd!='\xa' && *pWordEnd!='.' && *pWordEnd!='?' && *pWordEnd!=',')
					pWordEnd++;

				pStemmedBufEnd = pStemmedBufStart;
				for(i=0;i < pWordEnd - pWordStart;i++)
				{
					if(pWordStart[i] >= 'A' && pWordStart[i] <= 'Z')
						*pStemmedBufEnd = 'a'-'A' + pWordStart[i];
					else
					{
						if(!(pWordStart[i] >= 'a' && pWordStart[i] <= 'z'))
							bFlagEnglish = false;
						*pStemmedBufEnd = pWordStart[i];
					}
					pStemmedBufEnd++;
				}
				if(bFlagEnglish)
				{					
					if(pWordEnd != pWordStart)
					{							
						if(*(pWordEnd-1) != 'y')
						{
							
							pWord = (char*)sb_stemmer_stem(m_pStemmer, (const sb_symbol*)pStemmedBufStart,(int)(pStemmedBufEnd - pStemmedBufStart) );
							nWordLen = sb_stemmer_length(m_pStemmer);								
							for(i=0;i < nWordLen;i++)
							{
								*pStemmedBufStart = pWord[i];
								pStemmedBufStart++;
							}							
						}
						else
						{
							pStemmedBufStart = pStemmedBufEnd;
						}
					}			
				}
				else
				{
					pStemmedBufStart = pStemmedBufEnd;
				}
				pWordStart = pWordEnd;
			}
			*pStemmedBufStart = 0;
			nTextLen = pStemmedBufStart - m_pStemmerBuffer;
			return m_pStemmerBuffer;
		}
Esempio n. 12
0
static PyObject *__pyx_f_7Stemmer_7Stemmer_stemWord(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_word = 0;
  char *__pyx_v_c_word;
  PyObject *__pyx_v_was_unicode;
  PyObject *__pyx_v_cacheditem;
  PyObject *__pyx_v_result;
  PyObject *__pyx_v_length;
  PyObject *__pyx_r;
  PyObject *__pyx_1 = 0;
  int __pyx_2;
  PyObject *__pyx_3 = 0;
  PyObject *__pyx_4 = 0;
  char *__pyx_5;
  Py_ssize_t __pyx_6;
  PyObject *__pyx_7 = 0;
  PyObject *__pyx_8 = 0;
  static char *__pyx_argnames[] = {"word",0};
  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_word)) return 0;
  Py_INCREF(__pyx_v_self);
  Py_INCREF(__pyx_v_word);
  __pyx_v_was_unicode = Py_None; Py_INCREF(Py_None);
  __pyx_v_cacheditem = Py_None; Py_INCREF(Py_None);
  __pyx_v_result = Py_None; Py_INCREF(Py_None);
  __pyx_v_length = Py_None; Py_INCREF(Py_None);

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":171 */
  __pyx_1 = PyInt_FromLong(0); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; goto __pyx_L1;}
  Py_DECREF(__pyx_v_was_unicode);
  __pyx_v_was_unicode = __pyx_1;
  __pyx_1 = 0;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":172 */
  __pyx_1 = __Pyx_GetName(__pyx_b, __pyx_n_unicode); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 172; goto __pyx_L1;}
  __pyx_2 = PyObject_IsInstance(__pyx_v_word,__pyx_1); if (__pyx_2 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 172; goto __pyx_L1;}
  Py_DECREF(__pyx_1); __pyx_1 = 0;
  if (__pyx_2) {

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":173 */
    __pyx_1 = PyInt_FromLong(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; goto __pyx_L1;}
    Py_DECREF(__pyx_v_was_unicode);
    __pyx_v_was_unicode = __pyx_1;
    __pyx_1 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":174 */
    __pyx_1 = PyObject_GetAttr(__pyx_v_word, __pyx_n_encode); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    Py_INCREF(__pyx_k9p);
    PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k9p);
    __pyx_4 = PyObject_CallObject(__pyx_1, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    Py_DECREF(__pyx_1); __pyx_1 = 0;
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    Py_DECREF(__pyx_v_word);
    __pyx_v_word = __pyx_4;
    __pyx_4 = 0;
    goto __pyx_L2;
  }
  __pyx_L2:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":176 */
  __pyx_2 = (((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->max_cache_size > 0);
  if (__pyx_2) {
    /*try:*/ {

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":178 */
      __pyx_1 = PyObject_GetItem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache, __pyx_v_word); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; goto __pyx_L4;}
      Py_DECREF(__pyx_v_cacheditem);
      __pyx_v_cacheditem = __pyx_1;
      __pyx_1 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":179 */
      __pyx_3 = __Pyx_GetItemInt(__pyx_v_cacheditem, 0); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; goto __pyx_L4;}
      Py_DECREF(__pyx_v_result);
      __pyx_v_result = __pyx_3;
      __pyx_3 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":180 */
      if (__Pyx_SetItemInt(__pyx_v_cacheditem, 1, ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 180; goto __pyx_L4;}

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":181 */
      __pyx_4 = PyInt_FromLong(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; goto __pyx_L4;}
      __pyx_1 = PyNumber_Add(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter, __pyx_4); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; goto __pyx_L4;}
      Py_DECREF(__pyx_4); __pyx_4 = 0;
      Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter = __pyx_1;
      __pyx_1 = 0;
    }
    goto __pyx_L5;
    __pyx_L4:;
    Py_XDECREF(__pyx_3); __pyx_3 = 0;
    Py_XDECREF(__pyx_4); __pyx_4 = 0;
    Py_XDECREF(__pyx_1); __pyx_1 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":182 */
    __pyx_2 = PyErr_ExceptionMatches(PyExc_KeyError);
    if (__pyx_2) {
      __Pyx_AddTraceback("Stemmer.stemWord");
      if (__Pyx_GetException(&__pyx_3, &__pyx_4, &__pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 182; goto __pyx_L1;}

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":183 */
      __pyx_5 = PyString_AsString(__pyx_v_word); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 183; goto __pyx_L1;}
      __pyx_v_c_word = __pyx_5;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":184 */
      __pyx_6 = PyObject_Length(__pyx_v_word); if (__pyx_6 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; goto __pyx_L1;}
      __pyx_v_c_word = ((char *)sb_stemmer_stem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj,((sb_symbol *)__pyx_v_c_word),__pyx_6));

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":185 */
      __pyx_7 = PyInt_FromLong(sb_stemmer_length(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj)); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; goto __pyx_L1;}
      Py_DECREF(__pyx_v_length);
      __pyx_v_length = __pyx_7;
      __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":186 */
      __pyx_2 = PyInt_AsLong(__pyx_v_length); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; goto __pyx_L1;}
      __pyx_7 = PyString_FromStringAndSize(__pyx_v_c_word,__pyx_2); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; goto __pyx_L1;}
      Py_DECREF(__pyx_v_result);
      __pyx_v_result = __pyx_7;
      __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":187 */
      __pyx_7 = PyList_New(2); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; goto __pyx_L1;}
      Py_INCREF(__pyx_v_result);
      PyList_SET_ITEM(__pyx_7, 0, __pyx_v_result);
      Py_INCREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      PyList_SET_ITEM(__pyx_7, 1, ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      if (PyObject_SetItem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache, __pyx_v_word, __pyx_7) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":188 */
      __pyx_7 = PyInt_FromLong(1); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
      __pyx_8 = PyNumber_Add(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter, __pyx_7); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;
      Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter = __pyx_8;
      __pyx_8 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":189 */
      __pyx_7 = PyObject_GetAttr(__pyx_v_self, __pyx_n___purgeCache); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
      __pyx_8 = PyObject_CallObject(__pyx_7, 0); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;
      Py_DECREF(__pyx_8); __pyx_8 = 0;
      Py_DECREF(__pyx_3); __pyx_3 = 0;
      Py_DECREF(__pyx_4); __pyx_4 = 0;
      Py_DECREF(__pyx_1); __pyx_1 = 0;
      goto __pyx_L5;
    }
    goto __pyx_L1;
    __pyx_L5:;
    goto __pyx_L3;
  }
  /*else*/ {

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":191 */
    __pyx_5 = PyString_AsString(__pyx_v_word); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; goto __pyx_L1;}
    __pyx_v_c_word = __pyx_5;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":192 */
    __pyx_6 = PyObject_Length(__pyx_v_word); if (__pyx_6 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; goto __pyx_L1;}
    __pyx_v_c_word = ((char *)sb_stemmer_stem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj,((sb_symbol *)__pyx_v_c_word),__pyx_6));

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":193 */
    __pyx_7 = PyInt_FromLong(sb_stemmer_length(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj)); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; goto __pyx_L1;}
    Py_DECREF(__pyx_v_length);
    __pyx_v_length = __pyx_7;
    __pyx_7 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":194 */
    __pyx_2 = PyInt_AsLong(__pyx_v_length); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; goto __pyx_L1;}
    __pyx_8 = PyString_FromStringAndSize(__pyx_v_c_word,__pyx_2); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; goto __pyx_L1;}
    Py_DECREF(__pyx_v_result);
    __pyx_v_result = __pyx_8;
    __pyx_8 = 0;
  }
  __pyx_L3:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":196 */
  __pyx_2 = PyObject_IsTrue(__pyx_v_was_unicode); if (__pyx_2 < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 196; goto __pyx_L1;}
  if (__pyx_2) {
    __pyx_3 = PyObject_GetAttr(__pyx_v_result, __pyx_n_decode); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    __pyx_4 = PyTuple_New(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    Py_INCREF(__pyx_k9p);
    PyTuple_SET_ITEM(__pyx_4, 0, __pyx_k9p);
    __pyx_1 = PyObject_CallObject(__pyx_3, __pyx_4); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    Py_DECREF(__pyx_4); __pyx_4 = 0;
    __pyx_r = __pyx_1;
    __pyx_1 = 0;
    goto __pyx_L0;
    goto __pyx_L6;
  }
  __pyx_L6:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":198 */
  Py_INCREF(__pyx_v_result);
  __pyx_r = __pyx_v_result;
  goto __pyx_L0;

  __pyx_r = Py_None; Py_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1:;
  Py_XDECREF(__pyx_1);
  Py_XDECREF(__pyx_3);
  Py_XDECREF(__pyx_4);
  Py_XDECREF(__pyx_7);
  Py_XDECREF(__pyx_8);
  __Pyx_AddTraceback("Stemmer.Stemmer.stemWord");
  __pyx_r = 0;
  __pyx_L0:;
  Py_DECREF(__pyx_v_was_unicode);
  Py_DECREF(__pyx_v_cacheditem);
  Py_DECREF(__pyx_v_result);
  Py_DECREF(__pyx_v_length);
  Py_DECREF(__pyx_v_self);
  Py_DECREF(__pyx_v_word);
  return __pyx_r;
}
Esempio n. 13
0
size_t SbStemmerWrapper::getStemPos(const char *toStem, size_t length) {
  sb_stemmer_stem(stemmer_, (const sb_symbol*) toStem, length);
  return sb_stemmer_length(stemmer_);
}