Exemple #1
0
		stem(const char *lang, const char *enc) {
			m_stem = sb_stemmer_new(lang, enc);
			if (!m_stem) {
				m_stem = sb_stemmer_new("eng", enc);
				if (!m_stem)
					throw std::bad_alloc();
			}
		}
Exemple #2
0
		CSnowballWrapper::CSnowballWrapper(const tchar* szLanguage)
			:m_pStemmerBuffer(NULL)
			,m_nBufferSize(0)
			,m_pIrregularWords(NULL)
		{			
			if(szLanguage)
				m_pStemmer = sb_stemmer_new(szLanguage, NULL);///using utf-8 
			else 
				m_pStemmer = sb_stemmer_new("english", NULL);///using utf-8 
		}		
Exemple #3
0
		CSnowballWrapper::CSnowballWrapper(const tchar* szLanguage,const tchar* szEnc)
			:m_pStemmerBuffer(NULL)
			,m_nBufferSize(0)
			,m_pIrregularWords(NULL)
		{			
			if(szLanguage)
				m_pStemmer = sb_stemmer_new(szLanguage, szEnc);
			else 
				m_pStemmer = sb_stemmer_new("english", szEnc);
		}
Exemple #4
0
		CSnowballWrapper::CSnowballWrapper(const tchar* szLanguage,const tchar* szIrregularWords,const tchar* szEnc)
			:m_pStemmerBuffer(NULL)
			,m_nBufferSize(0)
			,m_pIrregularWords(NULL)
		{			
			if(szIrregularWords)
			{
				m_pIrregularWords = new map<string,string>();
				m_sIrregularWords = szIrregularWords;
			}
			if(szLanguage)
				m_pStemmer = sb_stemmer_new(szLanguage, szEnc);
			else 
				m_pStemmer = sb_stemmer_new("english", szEnc);
		}
Datum stem_token_arr(PG_FUNCTION_ARGS)
{
    if (PG_ARGISNULL(0)) {
        PG_RETURN_NULL();
    }
    /* Prepare elements to receive input text[] */
    ArrayType *arr = PG_GETARG_ARRAYTYPE_P(0);
    Datum *dtum;
    bool *nulls;
    int ndim;
    /* Deconstruct input text[] */
    deconstruct_array(arr, TEXTOID, -1, false, 'i', &dtum, &nulls, &ndim);
    /* Prepare stemmer */
    struct sb_stemmer *stemmer = sb_stemmer_new(
                                     "english" /* language */, NULL /* language encoding NULL for UTF-8 */);
    Assert(stemmer);

    /* Call stemming code */
    text **result = (text **) palloc(ndim * sizeof(text * ));
    for(int i=0; i< ndim; i++) {
        text *token = dtum[i] == 0 ? NULL : DatumGetTextP(dtum[i]);
        char *empty;
        if(token == NULL) {
            empty =  (char *)palloc(sizeof(char));
            empty[0] = '\0';
        }
        result[i] = (token == NULL ?
                     cstring_to_text(empty) :
                     cstring_to_text(stem_token_text(stemmer, token)));
    }
    ArrayType *res = construct_array((Datum*)result, ndim, TEXTOID, -1, false, 'i');
    sb_stemmer_delete(stemmer);
    PG_RETURN_ARRAYTYPE_P(res);
}
Exemple #6
0
		CSnowballWrapper::CSnowballWrapper()
			:m_pStemmerBuffer(NULL)
			,m_nBufferSize(0)
			,m_pIrregularWords(NULL)
		{			
			m_pStemmer = sb_stemmer_new("english", NULL);///using utf-8 
		}
SnowballFilter::SnowballFilter(const TokenStreamPtr& input, const String& name) : TokenFilter(input) {
    stemmer = sb_stemmer_new(StringUtils::toUTF8(name).c_str(), "UTF_8");
    if (stemmer == NULL) {
        boost::throw_exception(IllegalArgumentException(L"language not available for stemming:" + name));
    }
    termAtt = addAttribute<TermAttribute>();
    utf8Result = newLucene<UTF8Result>();
}
Exemple #8
0
SbStemmerWrapper SbStemmerWrapper::create(string countryCode) {
  const char *cCode = countryCode.c_str();
  if (strcmp(cCode, "UNKNOWN")) {
    cCode = "en";
  }
  auto stemmer = sb_stemmer_new(cCode, "UTF_8");
  return SbStemmerWrapper(countryCode, stemmer);
}
Datum stem_token(PG_FUNCTION_ARGS)
{
    if (PG_ARGISNULL(0)) {
        PG_RETURN_NULL();
    }
    text * org_token = PG_GETARG_TEXT_P(0);
    struct sb_stemmer *stemmer = sb_stemmer_new(
                                     "english" /* language */, NULL /* language encoding NULL for UTF-8 */);
    Assert(stemmer);
    text *stemmed = cstring_to_text(stem_token_text(stemmer, org_token));
    sb_stemmer_delete(stemmer);
    PG_RETURN_TEXT_P(stemmed);
}
Exemple #10
0
Stemmer *__newSnowballStemmer(const char *language) {
    struct sb_stemmer *sb = sb_stemmer_new(language, NULL);
    // No stemmer available for this language
    if (!sb) {
        return NULL;
    }

    Stemmer *ret = malloc(sizeof(Stemmer));
    ret->ctx = sb;
    ret->Stem = __sbstemmer_Stem;
    ret->Free = __sbstemmer_Free;
    return ret;
}
Exemple #11
0
static void
stem_filter(grn_ctx *ctx,
            grn_token *current_token,
            grn_token *next_token,
            void *user_data)
{
  grn_stem_token_filter *token_filter = user_data;
  grn_obj *data;

  if (GRN_CTX_GET_ENCODING(ctx) != GRN_ENC_UTF8) {
    return;
  }

  data = grn_token_get_data(ctx, current_token);

  if (token_filter->stemmer) {
    sb_stemmer_delete(token_filter->stemmer);
  }
  {
    /* TODO: Detect algorithm from the current token. */
    const char *algorithm = "english";
    const char *encoding = "UTF_8";
    token_filter->stemmer = sb_stemmer_new(algorithm, encoding);
    if (!token_filter->stemmer) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "[token-filter][stem] "
                       "failed to create stemmer: "
                       "algorithm=<%s>, encoding=<%s>",
                       algorithm, encoding);
      return;
    }
  }

  {
    const sb_symbol *stemmed;

    stemmed = sb_stemmer_stem(token_filter->stemmer,
                              GRN_TEXT_VALUE(data), GRN_TEXT_LEN(data));
    if (stemmed) {
      grn_token_set_data(ctx, next_token,
                         stemmed,
                         sb_stemmer_length(token_filter->stemmer));
    } else {
      GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                       "[token-filter][stem] "
                       "failed to allocate memory for stemmed word: <%.*s>",
                       (int)GRN_TEXT_LEN(data), GRN_TEXT_VALUE(data));
      return;
    }
  }
}
static int
fts_filter_stemmer_snowball_create_stemmer(struct fts_filter_stemmer_snowball *sp,
        const char **error_r)
{
    sp->stemmer = sb_stemmer_new(sp->lang->name, "UTF_8");
    if (sp->stemmer == NULL) {
        *error_r = t_strdup_printf(
                       "Creating a Snowball stemmer for language '%s' failed.",
                       sp->lang->name);
        fts_filter_stemmer_snowball_destroy(&sp->filter);
        return -1;
    }
    return 0;
}
Exemple #13
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "deu");
    lang->full_lang = strdup("german");
    lang->param = malloc(sizeof(iplus1_german_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_german_t* deu = (iplus1_german_t*)lang->param;
    if ((deu->stemmer = sb_stemmer_new("deu", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find german stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(deu);
    return IPLUS1_SUCCESS;
}
Exemple #14
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "por");
    lang->full_lang = strdup("portugese");
    lang->param = malloc(sizeof(iplus1_portugese_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_portugese_t* por = (iplus1_portugese_t*)lang->param;
    if ((por->stemmer = sb_stemmer_new("por", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find portugese stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(por);
    return IPLUS1_SUCCESS;
}
SnowballStemmer*
SnowStemmer_init(SnowballStemmer *self, String *language) {
    char lang_buf[3];
    Analyzer_init((Analyzer*)self);
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    ivars->language = Str_Clone(language);

    // Get a Snowball stemmer.  Be case-insensitive.
    lang_buf[0] = tolower(Str_Code_Point_At(language, 0));
    lang_buf[1] = tolower(Str_Code_Point_At(language, 1));
    lang_buf[2] = '\0';
    ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
    if (!ivars->snowstemmer) {
        THROW(ERR, "Can't find a Snowball stemmer for %o", language);
    }

    return self;
}
Exemple #16
0
int init(iplus1_lang_t* lang)
{
    strcpy(lang->lang, "eng");
    lang->full_lang = strdup("english");
    lang->param = malloc(sizeof(iplus1_english_t));
    if (lang->param == NULL)
        return IPLUS1_FAIL;
    lang->parse = parse;
    
    iplus1_english_t* eng = (iplus1_english_t*)lang->param;
    if ((eng->stemmer = sb_stemmer_new("eng", "UTF_8")) == NULL) {
        fprintf(stderr, "could not find english stemmer\n");
        return IPLUS1_FAIL;
    }
    
    load_stopwords(eng);
    return IPLUS1_SUCCESS;
}
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
    VALUE word, language, result = Qnil;

    rb_scan_args(argc, argv, "11", &word, &language);
    if (NIL_P(language))
        language = fuzzy_default_language;

    if (TYPE(word) != T_STRING)
        rb_raise(rb_eArgError, "invalid word, expect string");

    struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
    if (stemmer) {
        const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
        uint32_t stem_len = sb_stemmer_length(stemmer);
        result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
        sb_stemmer_delete(stemmer);
    }

    return result;
}
Exemple #18
0
static int
get_lang_stemmer(term_t t, struct sb_stemmer **stemmer)
{ stem_cache *cache = get_cache();
  atom_t lang;
  int i;

  if ( !PL_get_atom(t, &lang) )
    return type_error("atom", t);

  for(i=0; i<CACHE_SIZE; i++)
  { if ( cache->stemmers[i].language == lang )
    { *stemmer = cache->stemmers[i].stemmer;
      return TRUE;
    }
  }
  for(i=0; i<CACHE_SIZE; i++)
  { if ( !cache->stemmers[i].stemmer )
    { struct sb_stemmer *st;

      if ( !(st= sb_stemmer_new(PL_atom_chars(lang), NULL)) )
      { if ( errno == ENOMEM )
	  return resource_error("memory");
	else
	  return domain_error("snowball_algorithm", t);
      }

      cache->stemmers[i].language = lang;
      cache->stemmers[i].stemmer  = st;
      PL_register_atom(cache->stemmers[i].language);

      *stemmer = cache->stemmers[i].stemmer;
      return TRUE;
    }
  }

  assert(0);				/* TBD: clean cache */
  return FALSE;
}
Exemple #19
0
Utils::Utils()
{
	permutationTable_ = (int *)malloc(512*sizeof(int));
	memset(permutationTable_, 0, 512*sizeof(int));
	for(int i = 0; i < 512; i++)
	{
		permutationTable_[i] = p[i & 255];
	//	printf("perm[%d] = %d\n", i, perm[i]);
	}
	memset(crcTab_, 0, 256);
	crc32Initialize();
	randomGenerator_ = new CRandomMersenne(4);
	//perlin_.SetFrequency(20);

	//perlin_.SetFrequency(0.4);
	//perlin_.SetFrequency(500);
	perlin_.SetFrequency(800);
	//perlin_.SetPersistence(0.02);
	perlin_.SetOctaveCount(20);
	#ifndef Q_OS_MAC
	stemmer_ = sb_stemmer_new("en", "UTF_8");
	#endif
}
int
main(int argc, char * argv[])
{
    char * in = 0;
    char * out = 0;
    FILE * f_in;
    FILE * f_out;
    struct sb_stemmer * stemmer;

    char * language = "english";
    char * charenc = NULL;

    char * s;
    int i = 1;
    pretty = 0;

    progname = argv[0];

    while(i < argc) {
	s = argv[i++];
	if (s[0] == '-') {
	    if (strcmp(s, "-o") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		out = argv[i++];
	    } else if (strcmp(s, "-i") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		in = argv[i++];
	    } else if (strcmp(s, "-l") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		language = argv[i++];
	    } else if (strcmp(s, "-c") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		charenc = argv[i++];
	    } else if (strcmp(s, "-p") == 0) {
		pretty = 1;
	    } else if (strcmp(s, "-h") == 0) {
		usage(0);
	    } else {
		fprintf(stderr, "option %s unknown\n", s);
		usage(1);
	    }
	} else {
	    fprintf(stderr, "unexpected parameter %s\n", s);
	    usage(1);
	}
    }

    /* prepare the files */
    f_in = (in == 0) ? stdin : fopen(in, "r");
    if (f_in == 0) {
	fprintf(stderr, "file %s not found\n", in);
	exit(1);
    }
    f_out = (out == 0) ? stdout : fopen(out, "w");
    if (f_out == 0) {
	fprintf(stderr, "file %s cannot be opened\n", out);
	exit(1);
    }

    /* do the stemming process: */
    stemmer = sb_stemmer_new(language, charenc);
    if (stemmer == 0) {
        if (charenc == NULL) {
            fprintf(stderr, "language `%s' not available for stemming\n", language);
            exit(1);
        } else {
            fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
            exit(1);
        }
    }
    stem_file(stemmer, f_in, f_out);
    sb_stemmer_delete(stemmer);

    if (in != 0) (void) fclose(f_in);
    if (out != 0) (void) fclose(f_out);

    return 0;
}
void Snowball(sLONG_PTR *pResult, PackagePtr pParams)
{
				C_TEXT Param1;
				ARRAY_TEXT Param2;
				ARRAY_TEXT Param3;
				C_LONGINT Param4;

				Param1.fromParamAtIndex(pParams, 1);
				Param4.fromParamAtIndex(pParams, 4);

				CUTF8String t;
				Param1.copyUTF8String(&t);
				std::string str((const char *)t.c_str());
				std::vector<std::string>words = split(str);

				struct sb_stemmer *stemmer;
				char *language = (char *)"english";

				Snowball_Language lang = (Snowball_Language)Param4.getIntValue();

				switch (lang) {
						case Snowball_Danish:
								language = (char *)"danish";
								break;
						case Snowball_Dutch:
								language = (char *)"dutch";
								break;
						case Snowball_English:
								language = (char *)"english";
								break;
						case Snowball_Finnish:
								language = (char *)"finnish";
								break;
						case Snowball_French:
								language = (char *)"french";
								break;
						case Snowball_German:
								language = (char *)"german";
								break;
						case Snowball_Hungarian:
								language = (char *)"hungarian";
								break;
						case Snowball_Italian:
								language = (char *)"italian";
								break;
						case Snowball_Norwegian:
								language = (char *)"norwegian";
								break;
						case Snowball_Portuguese:
								language = (char *)"portuguese";
								break;
						case Snowball_Romanian:
								language = (char *)"romanian";
								break;
						case Snowball_Russian:
								language = (char *)"russian";
								break;
						case Snowball_Spanish:
								language = (char *)"spanish";
								break;
						case Snowball_Swedish:
								language = (char *)"swedish";
								break;
						case Snowball_Turkish:
								language = (char *)"turkish";
								break;
						default:
								break;
				}

				char *charenc = NULL;//UTF-8

				stemmer = sb_stemmer_new(language, charenc);

				if (stemmer)
				{
								Param2.setSize(1);
								Param3.setSize(1);

								for(std::vector<std::string>::iterator it = words.begin(); it != words.end(); ++it) {
												std::string word = *it;
												sb_symbol * symbol = (sb_symbol *)word.c_str();
												int size = word.length();
												const sb_symbol *stemmed = sb_stemmer_stem(stemmer, symbol, size);
												if (stemmed)
												{
																CUTF8String w((const uint8_t *)symbol);
																CUTF8String s((const uint8_t *)stemmed);
																Param2.appendUTF8String(&w);
																Param3.appendUTF8String(&s);
												}
								}
								
								sb_stemmer_delete(stemmer);
				}

					Param2.toParamAtIndex(pParams, 2);
					Param3.toParamAtIndex(pParams, 3);
}
Exemple #22
0
QString RStemmer::stem(const QString& word, const QString& locale) {
    static QString prevLocale = "";
    static struct sb_stemmer* stemmer = NULL;

    // keep always the last used stemmer in memory:
    if (locale!=prevLocale) {
        if (stemmer!=NULL) {
            sb_stemmer_delete(stemmer);
            stemmer = NULL;
        }
        stemmer = sb_stemmer_new(locale.toUtf8(), "UTF_8");
        prevLocale = locale;
    }

    if (stemmer == NULL) {
        qWarning() << "No stemmer found for locale: " << locale;
        return word;
    }

    QByteArray ba = word.toUtf8();

    int lim = 10;
    sb_symbol* sbWord = (sb_symbol*)malloc(lim * sizeof(sb_symbol));
    int i = 0;
    int inlen = 0;

    while (1) {
        if (i==ba.length()) {
            break;
        }
        int ch = ba.at(i);
        if (i == lim) {
            sb_symbol * newb;
            newb = (sb_symbol*)realloc(sbWord, (lim + 10) * sizeof(sb_symbol));
            if (newb == 0) {
                Q_ASSERT(false);
                qWarning() << "RStemmer::stem: Memory allocation error.";
            }
            sbWord = newb;
            lim = lim + 10;
        }
        // Update count of utf-8 characters.
        if (ch < 0x80 || ch > 0xBF) {
            inlen += 1;
        }
        // force lower case:
        //if (isupper(ch)) {
        //    ch = tolower(ch);
        //}

        sbWord[i] = ch;
        i++;
    }

    const sb_symbol* sbStemmed = sb_stemmer_stem(stemmer, sbWord, i);
    QString stemmed;

    i=0;
    while(1) {
        if (sbStemmed[i]==0) {
            break;
        }
        stemmed += QChar(sbStemmed[i]);
        i++;
    }

    return stemmed;
}
Exemple #23
0
 Stemmer::Stemmer( const string& language ) {
     _stemmer = NULL;
     if ( language != "none" )
         _stemmer = sb_stemmer_new(language.c_str(), "UTF_8");
 }
Exemple #24
0
  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":68 */
  Py_INCREF(__pyx_v_py_algs);
  __pyx_r = __pyx_v_py_algs;
  goto __pyx_L0;

  __pyx_r = Py_None; Py_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1:;
  Py_XDECREF(__pyx_1);
  Py_XDECREF(__pyx_3);
  Py_XDECREF(__pyx_4);
  __Pyx_AddTraceback("Stemmer.algorithms");
  __pyx_r = 0;
  __pyx_L0:;
  Py_DECREF(__pyx_v_py_algs);
  Py_DECREF(__pyx_v_aliases);
  return __pyx_r;
}

static PyObject *__pyx_f_7Stemmer_version(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static char __pyx_doc_7Stemmer_version[] = "Get the version string of the stemming module.\n\n    This version number is for the Stemmer module as a whole (not for an\n    individual stemming algorithm).\n\n    ";
static PyObject *__pyx_f_7Stemmer_version(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_r;
  static char *__pyx_argnames[] = {0};
  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "", __pyx_argnames)) return 0;
  Py_INCREF(__pyx_k2p);
  __pyx_r = __pyx_k2p;
  goto __pyx_L0;

  __pyx_r = Py_None; Py_INCREF(Py_None);
  __pyx_L0:;
  return __pyx_r;
}

static int __pyx_f_7Stemmer_7Stemmer___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static int __pyx_f_7Stemmer_7Stemmer___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_algorithm = 0;
  int __pyx_v_maxCacheSize;
  int __pyx_r;
  char *__pyx_1;
  int __pyx_2;
  PyObject *__pyx_3 = 0;
  PyObject *__pyx_4 = 0;
  static char *__pyx_argnames[] = {"algorithm","maxCacheSize",0};
  __pyx_v_maxCacheSize = __pyx_d2;
  if (!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "O|i", __pyx_argnames, &__pyx_v_algorithm, &__pyx_v_maxCacheSize)) return -1;
  Py_INCREF(__pyx_v_self);
  Py_INCREF(__pyx_v_algorithm);

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":118 */
  __pyx_1 = PyString_AsString(__pyx_v_algorithm); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; goto __pyx_L1;}
  ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj = sb_stemmer_new(__pyx_1,__pyx_k3);

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":119 */
  __pyx_2 = (((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj == NULL);
  if (__pyx_2) {
    __pyx_3 = PyNumber_Remainder(__pyx_k4p, __pyx_v_algorithm); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}
    __pyx_4 = PyTuple_New(1); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}
    PyTuple_SET_ITEM(__pyx_4, 0, __pyx_3);
    __pyx_3 = 0;
    __pyx_3 = PyObject_CallObject(PyExc_KeyError, __pyx_4); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}
    Py_DECREF(__pyx_4); __pyx_4 = 0;
    __Pyx_Raise(__pyx_3, 0, 0);
    Py_DECREF(__pyx_3); __pyx_3 = 0;
    {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; goto __pyx_L1;}
    goto __pyx_L2;
  }
  __pyx_L2:;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":121 */
  ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->max_cache_size = __pyx_v_maxCacheSize;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":122 */
  __pyx_4 = PyInt_FromLong(0); if (!__pyx_4) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; goto __pyx_L1;}
  Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter);
  ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->counter = __pyx_4;
  __pyx_4 = 0;

  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":123 */
  __pyx_3 = PyDict_New(); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
  Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache);
  ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache = __pyx_3;
  __pyx_3 = 0;

  __pyx_r = 0;
  goto __pyx_L0;
  __pyx_L1:;
  Py_XDECREF(__pyx_3);
  Py_XDECREF(__pyx_4);
  __Pyx_AddTraceback("Stemmer.Stemmer.__init__");
  __pyx_r = -1;
  __pyx_L0:;
  Py_DECREF(__pyx_v_self);
  Py_DECREF(__pyx_v_algorithm);
  return __pyx_r;
}
Exemple #25
0
void Words::initiliazeStemmers()
{
    stemmerGreek = sb_stemmer_new("greek", NULL);
    stemmerEnglish = sb_stemmer_new("english", NULL);
}
Exemple #26
0
 Stemmer::Stemmer( const FTSLanguage language ) {
     _stemmer = NULL;
     if ( language.str() != "none" )
         _stemmer = sb_stemmer_new(language.str().c_str(), "UTF_8");
 }