static void
stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
{
#define INC 10
    int lim = INC;
    sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));

    while(1) {
        int ch = getc(f_in);
        if (ch == EOF) {
            free(b); return;
        }
        {
            int i = 0;
            while(1) {
                if (ch == '\n' || ch == EOF) break;
                if (i == lim) {
                    sb_symbol * newb;
		    newb = (sb_symbol *)
			    realloc(b, (lim + INC) * sizeof(sb_symbol));
		    if (newb == 0) goto error;
		    b = newb;
                    lim = lim + INC;
                }
                /* force lower case: */
                if (isupper(ch)) ch = tolower(ch);

                b[i] = ch;
		i++;
                ch = getc(f_in);
            }

            if (pretty) {
                int j;
                for (j = 0; j < i; j++) fprintf(f_out, "%c", b[j]);
                fprintf(f_out, "%s", " -> ");
            }
	    {
		const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
                if (stemmed == NULL)
                {
                    fprintf(stderr, "Out of memory");
                    exit(1);
                }
                else
                {
                    int j;
                    /*for (j = 0; j < z->l; j++) */
                    for (j = 0; stemmed[j] != 0; j++)
                        fprintf(f_out, "%c", stemmed[j]);
                    fprintf(f_out, "\n");
                }
            }
        }
    }
error:
    if (b != 0) free(b);
    return;
}
static const char * stem_token_text(struct sb_stemmer *stemmer, text *token) {
    char * stemmed_token = TextDatumGetCString(PointerGetDatum(token));
    if (strcmp(stemmed_token, "") == 0) {
        return "";
    }
    const sb_symbol *stemmed = sb_stemmer_stem(stemmer,
                               (const sb_symbol *) stemmed_token,
                               strlen(stemmed_token));
    return (const char*) stemmed;
}
Example #3
0
string Words::stemGreekWord(string &word) const
{
    sb_symbol* inWord = (sb_symbol*)word.c_str();
    
    const sb_symbol * stemmed = sb_stemmer_stem(stemmerGreek, inWord, (int)word.length());
    
    if (stemmed==NULL)
        return NULL;
    else
        return string((char*)stemmed);
}
Example #4
0
const char *__sbstemmer_Stem(void *ctx, const char *word, size_t len, size_t *outlen) {
    const sb_symbol *b = (const sb_symbol *)word;
    struct sb_stemmer *sb = ctx;

    const sb_symbol *stemmed = sb_stemmer_stem(sb, b, (int)len);
    if (stemmed) {
        *outlen = sb_stemmer_length(sb);
        return (const char *)stemmed;
    }
    return NULL;
}
Example #5
0
static void
stem_filter(grn_ctx *ctx,
            grn_token *current_token,
            grn_token *next_token,
            void *user_data)
{
  grn_stem_token_filter *token_filter = user_data;
  grn_obj *data;

  if (GRN_CTX_GET_ENCODING(ctx) != GRN_ENC_UTF8) {
    return;
  }

  data = grn_token_get_data(ctx, current_token);

  if (token_filter->stemmer) {
    sb_stemmer_delete(token_filter->stemmer);
  }
  {
    /* TODO: Detect algorithm from the current token. */
    const char *algorithm = "english";
    const char *encoding = "UTF_8";
    token_filter->stemmer = sb_stemmer_new(algorithm, encoding);
    if (!token_filter->stemmer) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[token-filter][stem] "
                       "failed to create stemmer: "
                       "algorithm=<%s>, encoding=<%s>",
                       algorithm, encoding);
      return;
    }
  }

  {
    const sb_symbol *stemmed;

    stemmed = sb_stemmer_stem(token_filter->stemmer,
                              GRN_TEXT_VALUE(data), GRN_TEXT_LEN(data));
    if (stemmed) {
      grn_token_set_data(ctx, next_token,
                         stemmed,
                         sb_stemmer_length(token_filter->stemmer));
    } else {
      GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                       "[token-filter][stem] "
                       "failed to allocate memory for stemmed word: <%.*s>",
                       (int)GRN_TEXT_LEN(data), GRN_TEXT_VALUE(data));
      return;
    }
  }
}
bool SnowballFilter::incrementToken() {
    if (input->incrementToken()) {
        StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result);
        const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length);
        if (stemmed == NULL) {
            boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term()));
        }
        int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer());
        termAtt->setTermLength(newlen);
        return true;
    } else {
        return false;
    }
}
Example #7
0
        string Stemmer::stem( const StringData& word ) const {
            if ( !_stemmer )
                return word.toString();

            const sb_symbol* sb_sym = sb_stemmer_stem( _stemmer,
                                                       (const sb_symbol*)word.rawData(),
                                                       word.size() );

            if ( sb_sym == NULL ) {
                // out of memory
                abort();
            }

            return string( (const char*)(sb_sym), sb_stemmer_length( _stemmer ) );
        }
Example #8
0
static foreign_t
snowball(term_t lang, term_t in, term_t out)
{ struct sb_stemmer *stemmer = NULL;
  char *s;
  size_t len, olen;
  const sb_symbol *stemmed;

  if ( !get_lang_stemmer(lang, &stemmer) )
    return FALSE;
  if ( !PL_get_nchars(in, &len, &s,
		      CVT_ATOM|CVT_STRING|CVT_LIST|REP_UTF8|CVT_EXCEPTION) )
    return FALSE;

  if ( !(stemmed = sb_stemmer_stem(stemmer, (const sb_symbol*)s, (int)len)) )
    return resource_error("memory");
  olen = sb_stemmer_length(stemmer);

  return PL_unify_chars(out, PL_ATOM|REP_UTF8, olen, (const char*)stemmed);
}
Example #9
0
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
    VALUE word, language, result = Qnil;

    rb_scan_args(argc, argv, "11", &word, &language);
    if (NIL_P(language))
        language = fuzzy_default_language;

    if (TYPE(word) != T_STRING)
        rb_raise(rb_eArgError, "invalid word, expect string");

    struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
    if (stemmer) {
        const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
        uint32_t stem_len = sb_stemmer_length(stemmer);
        result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
        sb_stemmer_delete(stemmer);
    }

    return result;
}
Example #10
0
void CDOMDocument::flushToken()
{
	CIndexer *indexer = (CIndexer *)(this->userData);

	if(this->onKeyword)
	{
		(this->onKeyword)(this, this->tokenLC, this->tokenLCLen, this->indexStart, this->tokenLen, this->wordIndex, "", 0);

		for(int i=0; i<narg_stem; i++)
		{
			if(indexer->stemmer[i])
			{
				const sb_symbol *stemmed = sb_stemmer_stem(indexer->stemmer[i], (const sb_symbol *)(this->tokenLC), this->tokenLCLen);
				int stemmedLen = sb_stemmer_length(indexer->stemmer[i]);

				(this->onKeyword)(this, (char *)stemmed, stemmedLen, this->indexStart, this->tokenLen, this->wordIndex, arg_stem[i], strlen(arg_stem[i]));
			}
		}
	}
	this->wordIndex++;
}
Inversion*
SnowStemmer_Transform_IMP(SnowballStemmer *self, Inversion *inversion) {
    Token *token;
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    struct sb_stemmer *const snowstemmer
        = (struct sb_stemmer*)ivars->snowstemmer;

    while (NULL != (token = Inversion_Next(inversion))) {
        TokenIVARS *const token_ivars = Token_IVARS(token);
        const sb_symbol *stemmed_text
            = sb_stemmer_stem(snowstemmer, (sb_symbol*)token_ivars->text,
                              token_ivars->len);
        size_t len = sb_stemmer_length(snowstemmer);
        if (len > token_ivars->len) {
            FREEMEM(token_ivars->text);
            token_ivars->text = (char*)MALLOCATE(len + 1);
        }
        memcpy(token_ivars->text, stemmed_text, len + 1);
        token_ivars->len = len;
    }
    Inversion_Reset(inversion);
    return (Inversion*)INCREF(inversion);
}
Example #12
0
		std::string get(const char *word, int size, bool complete = true) {
			const sb_symbol *sb;
			std::string ret;

			int len, prev_len;
			len = prev_len = size;

			boost::mutex::scoped_lock guard(m_lock);

			do {
				sb = sb_stemmer_stem(m_stem, (const sb_symbol *)word, len);
				if (!sb)
					return ret;

				len = sb_stemmer_length(m_stem);
				if (len == prev_len)
					break;

				prev_len = len;
			} while (complete);

			ret.assign((char *)sb, len);
			return ret;
		}
static int
fts_filter_stemmer_snowball_filter(struct fts_filter *filter,
                                   const char **token, const char **error_r)
{
    struct fts_filter_stemmer_snowball *sp =
        (struct fts_filter_stemmer_snowball *) filter;
    const sb_symbol *base;

    if (sp->stemmer == NULL) {
        if (fts_filter_stemmer_snowball_create_stemmer(sp, error_r) < 0)
            return -1;
    }

    base = sb_stemmer_stem(sp->stemmer, (const unsigned char *)*token, strlen(*token));
    if (base == NULL) {
        /* the only reason why this could fail is because of
           out of memory. */
        i_fatal_status(FATAL_OUTOFMEM,
                       "sb_stemmer_stem(len=%"PRIuSIZE_T") failed: "
                       "Out of memory", strlen(*token));
    }
    *token = t_strndup(base, sb_stemmer_length(sp->stemmer));
    return 1;
}
Example #14
0
size_t SbStemmerWrapper::getStemPos(const char *toStem, size_t length) {
  sb_stemmer_stem(stemmer_, (const sb_symbol*) toStem, length);
  return sb_stemmer_length(stemmer_);
}
void Snowball(sLONG_PTR *pResult, PackagePtr pParams)
{
				C_TEXT Param1;
				ARRAY_TEXT Param2;
				ARRAY_TEXT Param3;
				C_LONGINT Param4;

				Param1.fromParamAtIndex(pParams, 1);
				Param4.fromParamAtIndex(pParams, 4);

				CUTF8String t;
				Param1.copyUTF8String(&t);
				std::string str((const char *)t.c_str());
				std::vector<std::string>words = split(str);

				struct sb_stemmer *stemmer;
				char *language = (char *)"english";

				Snowball_Language lang = (Snowball_Language)Param4.getIntValue();

				switch (lang) {
						case Snowball_Danish:
								language = (char *)"danish";
								break;
						case Snowball_Dutch:
								language = (char *)"dutch";
								break;
						case Snowball_English:
								language = (char *)"english";
								break;
						case Snowball_Finnish:
								language = (char *)"finnish";
								break;
						case Snowball_French:
								language = (char *)"french";
								break;
						case Snowball_German:
								language = (char *)"german";
								break;
						case Snowball_Hungarian:
								language = (char *)"hungarian";
								break;
						case Snowball_Italian:
								language = (char *)"italian";
								break;
						case Snowball_Norwegian:
								language = (char *)"norwegian";
								break;
						case Snowball_Portuguese:
								language = (char *)"portuguese";
								break;
						case Snowball_Romanian:
								language = (char *)"romanian";
								break;
						case Snowball_Russian:
								language = (char *)"russian";
								break;
						case Snowball_Spanish:
								language = (char *)"spanish";
								break;
						case Snowball_Swedish:
								language = (char *)"swedish";
								break;
						case Snowball_Turkish:
								language = (char *)"turkish";
								break;
						default:
								break;
				}

				char *charenc = NULL;//UTF-8

				stemmer = sb_stemmer_new(language, charenc);

				if (stemmer)
				{
								Param2.setSize(1);
								Param3.setSize(1);

								for(std::vector<std::string>::iterator it = words.begin(); it != words.end(); ++it) {
												std::string word = *it;
												sb_symbol * symbol = (sb_symbol *)word.c_str();
												int size = word.length();
												const sb_symbol *stemmed = sb_stemmer_stem(stemmer, symbol, size);
												if (stemmed)
												{
																CUTF8String w((const uint8_t *)symbol);
																CUTF8String s((const uint8_t *)stemmed);
																Param2.appendUTF8String(&w);
																Param3.appendUTF8String(&s);
												}
								}
								
								sb_stemmer_delete(stemmer);
				}

					Param2.toParamAtIndex(pParams, 2);
					Param3.toParamAtIndex(pParams, 3);
}
Example #16
0
QString RStemmer::stem(const QString& word, const QString& locale) {
    static QString prevLocale = "";
    static struct sb_stemmer* stemmer = NULL;

    // keep always the last used stemmer in memory:
    if (locale!=prevLocale) {
        if (stemmer!=NULL) {
            sb_stemmer_delete(stemmer);
            stemmer = NULL;
        }
        stemmer = sb_stemmer_new(locale.toUtf8(), "UTF_8");
        prevLocale = locale;
    }

    if (stemmer == NULL) {
        qWarning() << "No stemmer found for locale: " << locale;
        return word;
    }

    QByteArray ba = word.toUtf8();

    int lim = 10;
    sb_symbol* sbWord = (sb_symbol*)malloc(lim * sizeof(sb_symbol));
    int i = 0;
    int inlen = 0;

    while (1) {
        if (i==ba.length()) {
            break;
        }
        int ch = ba.at(i);
        if (i == lim) {
            sb_symbol * newb;
            newb = (sb_symbol*)realloc(sbWord, (lim + 10) * sizeof(sb_symbol));
            if (newb == 0) {
                Q_ASSERT(false);
                qWarning() << "RStemmer::stem: Memory allocation error.";
            }
            sbWord = newb;
            lim = lim + 10;
        }
        // Update count of utf-8 characters.
        if (ch < 0x80 || ch > 0xBF) {
            inlen += 1;
        }
        // force lower case:
        //if (isupper(ch)) {
        //    ch = tolower(ch);
        //}

        sbWord[i] = ch;
        i++;
    }

    const sb_symbol* sbStemmed = sb_stemmer_stem(stemmer, sbWord, i);
    QString stemmed;

    i=0;
    while(1) {
        if (sbStemmed[i]==0) {
            break;
        }
        stemmed += QChar(sbStemmed[i]);
        i++;
    }

    return stemmed;
}
Example #17
0
		char* CSnowballWrapper::stemmer(char* szTextBuf,size_t& nTextLen)
		{			
			if(nTextLen <= 0)
				return NULL;
			if(m_pStemmerBuffer == NULL)
			{
				m_nBufferSize = nTextLen<<1;
				m_pStemmerBuffer = new char[m_nBufferSize];
				if(m_pStemmerBuffer == NULL)
				{
					m_nBufferSize = 0;
					return NULL;
				}
			}
			else
			{
				if(m_nBufferSize < (nTextLen<<1) )
				{
					delete[] m_pStemmerBuffer;
					m_nBufferSize = nTextLen<<1;
					m_pStemmerBuffer = new char[m_nBufferSize];
					if(m_pStemmerBuffer == NULL)
					{
						m_nBufferSize = 0;
						return NULL;
					}
				}
			}

			bool bFlagEnglish = true;
			int i;
			register char*	pWord;
			int  nWordLen;
			register char* pStemmedBufStart = m_pStemmerBuffer;
			register char *pStemmedBufEnd;
			register char *pWordStart,*pWordEnd;
			pWordStart = szTextBuf;
			char* pTextEnd = szTextBuf + nTextLen;
			while(pWordStart < pTextEnd)
			{
				
				while(pWordStart < pTextEnd && !((*pWordStart >= 'a' && *pWordStart <= 'z') || (*pWordStart >= 'A' && *pWordStart <= 'Z')))//(*pWordStart==' ' || *pWordStart=='\xd' || *pWordStart=='\xa'))
				{
					*pStemmedBufStart = *pWordStart;
					pWordStart++;
					pStemmedBufStart++;
				}
				if(pWordStart >= pTextEnd)
					break;

				pWordEnd = pWordStart + 1;
				while((pWordEnd < (pTextEnd)) && ((*pWordEnd >= 'a' && *pWordEnd <= 'z') || (*pWordEnd >= 'A' && *pWordEnd <= 'Z')))//*pWordEnd!=' ' && *pWordEnd!='\xd' && *pWordEnd!='\xa' && *pWordEnd!='.' && *pWordEnd!='?' && *pWordEnd!=',')
					pWordEnd++;

				pStemmedBufEnd = pStemmedBufStart;
				for(i=0;i < pWordEnd - pWordStart;i++)
				{
					if(pWordStart[i] >= 'A' && pWordStart[i] <= 'Z')
						*pStemmedBufEnd = 'a'-'A' + pWordStart[i];
					else
					{
						if(!(pWordStart[i] >= 'a' && pWordStart[i] <= 'z'))
							bFlagEnglish = false;
						*pStemmedBufEnd = pWordStart[i];
					}
					pStemmedBufEnd++;
				}
				if(bFlagEnglish)
				{					
					if(pWordEnd != pWordStart)
					{							
						if(*(pWordEnd-1) != 'y')
						{
							
							pWord = (char*)sb_stemmer_stem(m_pStemmer, (const sb_symbol*)pStemmedBufStart,(int)(pStemmedBufEnd - pStemmedBufStart) );
							nWordLen = sb_stemmer_length(m_pStemmer);								
							for(i=0;i < nWordLen;i++)
							{
								*pStemmedBufStart = pWord[i];
								pStemmedBufStart++;
							}							
						}
						else
						{
							pStemmedBufStart = pStemmedBufEnd;
						}
					}			
				}
				else
				{
					pStemmedBufStart = pStemmedBufEnd;
				}
				pWordStart = pWordEnd;
			}
			*pStemmedBufStart = 0;
			nTextLen = pStemmedBufStart - m_pStemmerBuffer;
			return m_pStemmerBuffer;
		}
Example #18
0
static PyObject *__pyx_f_7Stemmer_7Stemmer_stemWord(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_word = 0;
  char *__pyx_v_c_word;
  PyObject *__pyx_v_was_unicode;
  PyObject *__pyx_v_cacheditem;
  PyObject *__pyx_v_result;
  PyObject *__pyx_v_length;
  PyObject *__pyx_r;
  PyObject *__pyx_1 = 0;
  int __pyx_2;
  PyObject *__pyx_3 = 0;
  PyObject *__pyx_4 = 0;
  char *__pyx_5;
  Py_ssize_t __pyx_6;
  PyObject *__pyx_7 = 0;
  PyObject *__pyx_8 = 0;
  static char *__pyx_argnames[] = {"word",0};
  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O", __pyx_argnames, &__pyx_v_word)) return 0;
  Py_INCREF(__pyx_v_self);
  Py_INCREF(__pyx_v_word);
  __pyx_v_was_unicode = Py_None; Py_INCREF(Py_None);
  __pyx_v_cacheditem = Py_None; Py_INCREF(Py_None);
  __pyx_v_result = Py_None; Py_INCREF(Py_None);
  __pyx_v_length = Py_None; Py_INCREF(Py_None);

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":171 */
  __pyx_1 = PyInt_FromLong(0); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; goto __pyx_L1;}
  Py_DECREF(__pyx_v_was_unicode);
  __pyx_v_was_unicode = __pyx_1;
  __pyx_1 = 0;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":172 */
  __pyx_1 = __Pyx_GetName(__pyx_b, __pyx_n_unicode); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 172; goto __pyx_L1;}
  __pyx_2 = PyObject_IsInstance(__pyx_v_word,__pyx_1); if (__pyx_2 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 172; goto __pyx_L1;}
  Py_DECREF(__pyx_1); __pyx_1 = 0;
  if (__pyx_2) {

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":173 */
    __pyx_1 = PyInt_FromLong(1); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 173; goto __pyx_L1;}
    Py_DECREF(__pyx_v_was_unicode);
    __pyx_v_was_unicode = __pyx_1;
    __pyx_1 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":174 */
    __pyx_1 = PyObject_GetAttr(__pyx_v_word, __pyx_n_encode); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    __pyx_3 = PyTuple_New(1); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    Py_INCREF(__pyx_k9p);
    PyTuple_SET_ITEM(__pyx_3, 0, __pyx_k9p);
    __pyx_4 = PyObject_CallObject(__pyx_1, __pyx_3); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; goto __pyx_L1;}
    Py_DECREF(__pyx_1); __pyx_1 = 0;
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    Py_DECREF(__pyx_v_word);
    __pyx_v_word = __pyx_4;
    __pyx_4 = 0;
    goto __pyx_L2;
  }
  __pyx_L2:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":176 */
  __pyx_2 = (((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->max_cache_size > 0);
  if (__pyx_2) {
    /*try:*/ {

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":178 */
      __pyx_1 = PyObject_GetItem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache, __pyx_v_word); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; goto __pyx_L4;}
      Py_DECREF(__pyx_v_cacheditem);
      __pyx_v_cacheditem = __pyx_1;
      __pyx_1 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":179 */
      __pyx_3 = __Pyx_GetItemInt(__pyx_v_cacheditem, 0); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; goto __pyx_L4;}
      Py_DECREF(__pyx_v_result);
      __pyx_v_result = __pyx_3;
      __pyx_3 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":180 */
      if (__Pyx_SetItemInt(__pyx_v_cacheditem, 1, ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 180; goto __pyx_L4;}

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":181 */
      __pyx_4 = PyInt_FromLong(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; goto __pyx_L4;}
      __pyx_1 = PyNumber_Add(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter, __pyx_4); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; goto __pyx_L4;}
      Py_DECREF(__pyx_4); __pyx_4 = 0;
      Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter = __pyx_1;
      __pyx_1 = 0;
    }
    goto __pyx_L5;
    __pyx_L4:;
    Py_XDECREF(__pyx_3); __pyx_3 = 0;
    Py_XDECREF(__pyx_4); __pyx_4 = 0;
    Py_XDECREF(__pyx_1); __pyx_1 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":182 */
    __pyx_2 = PyErr_ExceptionMatches(PyExc_KeyError);
    if (__pyx_2) {
      __Pyx_AddTraceback("Stemmer.stemWord");
      if (__Pyx_GetException(&__pyx_3, &__pyx_4, &__pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 182; goto __pyx_L1;}

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":183 */
      __pyx_5 = PyString_AsString(__pyx_v_word); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 183; goto __pyx_L1;}
      __pyx_v_c_word = __pyx_5;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":184 */
      __pyx_6 = PyObject_Length(__pyx_v_word); if (__pyx_6 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; goto __pyx_L1;}
      __pyx_v_c_word = ((char *)sb_stemmer_stem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj,((sb_symbol *)__pyx_v_c_word),__pyx_6));

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":185 */
      __pyx_7 = PyInt_FromLong(sb_stemmer_length(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj)); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; goto __pyx_L1;}
      Py_DECREF(__pyx_v_length);
      __pyx_v_length = __pyx_7;
      __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":186 */
      __pyx_2 = PyInt_AsLong(__pyx_v_length); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; goto __pyx_L1;}
      __pyx_7 = PyString_FromStringAndSize(__pyx_v_c_word,__pyx_2); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 186; goto __pyx_L1;}
      Py_DECREF(__pyx_v_result);
      __pyx_v_result = __pyx_7;
      __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":187 */
      __pyx_7 = PyList_New(2); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; goto __pyx_L1;}
      Py_INCREF(__pyx_v_result);
      PyList_SET_ITEM(__pyx_7, 0, __pyx_v_result);
      Py_INCREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      PyList_SET_ITEM(__pyx_7, 1, ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      if (PyObject_SetItem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache, __pyx_v_word, __pyx_7) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":188 */
      __pyx_7 = PyInt_FromLong(1); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
      __pyx_8 = PyNumber_Add(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter, __pyx_7); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;
      Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
      ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter = __pyx_8;
      __pyx_8 = 0;

      /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":189 */
      __pyx_7 = PyObject_GetAttr(__pyx_v_self, __pyx_n___purgeCache); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
      __pyx_8 = PyObject_CallObject(__pyx_7, 0); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; goto __pyx_L1;}
      Py_DECREF(__pyx_7); __pyx_7 = 0;
      Py_DECREF(__pyx_8); __pyx_8 = 0;
      Py_DECREF(__pyx_3); __pyx_3 = 0;
      Py_DECREF(__pyx_4); __pyx_4 = 0;
      Py_DECREF(__pyx_1); __pyx_1 = 0;
      goto __pyx_L5;
    }
    goto __pyx_L1;
    __pyx_L5:;
    goto __pyx_L3;
  }
  /*else*/ {

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":191 */
    __pyx_5 = PyString_AsString(__pyx_v_word); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; goto __pyx_L1;}
    __pyx_v_c_word = __pyx_5;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":192 */
    __pyx_6 = PyObject_Length(__pyx_v_word); if (__pyx_6 == -1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; goto __pyx_L1;}
    __pyx_v_c_word = ((char *)sb_stemmer_stem(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj,((sb_symbol *)__pyx_v_c_word),__pyx_6));

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":193 */
    __pyx_7 = PyInt_FromLong(sb_stemmer_length(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj)); if (!__pyx_7) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; goto __pyx_L1;}
    Py_DECREF(__pyx_v_length);
    __pyx_v_length = __pyx_7;
    __pyx_7 = 0;

    /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":194 */
    __pyx_2 = PyInt_AsLong(__pyx_v_length); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; goto __pyx_L1;}
    __pyx_8 = PyString_FromStringAndSize(__pyx_v_c_word,__pyx_2); if (!__pyx_8) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; goto __pyx_L1;}
    Py_DECREF(__pyx_v_result);
    __pyx_v_result = __pyx_8;
    __pyx_8 = 0;
  }
  __pyx_L3:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":196 */
  __pyx_2 = PyObject_IsTrue(__pyx_v_was_unicode); if (__pyx_2 < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 196; goto __pyx_L1;}
  if (__pyx_2) {
    __pyx_3 = PyObject_GetAttr(__pyx_v_result, __pyx_n_decode); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    __pyx_4 = PyTuple_New(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    Py_INCREF(__pyx_k9p);
    PyTuple_SET_ITEM(__pyx_4, 0, __pyx_k9p);
    __pyx_1 = PyObject_CallObject(__pyx_3, __pyx_4); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 197; goto __pyx_L1;}
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    Py_DECREF(__pyx_4); __pyx_4 = 0;
    __pyx_r = __pyx_1;
    __pyx_1 = 0;
    goto __pyx_L0;
    goto __pyx_L6;
  }
  __pyx_L6:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":198 */
  Py_INCREF(__pyx_v_result);
  __pyx_r = __pyx_v_result;
  goto __pyx_L0;

  __pyx_r = Py_None; Py_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1:;
  Py_XDECREF(__pyx_1);
  Py_XDECREF(__pyx_3);
  Py_XDECREF(__pyx_4);
  Py_XDECREF(__pyx_7);
  Py_XDECREF(__pyx_8);
  __Pyx_AddTraceback("Stemmer.Stemmer.stemWord");
  __pyx_r = 0;
  __pyx_L0:;
  Py_DECREF(__pyx_v_was_unicode);
  Py_DECREF(__pyx_v_cacheditem);
  Py_DECREF(__pyx_v_result);
  Py_DECREF(__pyx_v_length);
  Py_DECREF(__pyx_v_self);
  Py_DECREF(__pyx_v_word);
  return __pyx_r;
}