int destroy(iplus1_lang_t* lang) { iplus1_portugese_t* por = (iplus1_portugese_t*)lang->param; iplus1_tree_foreach_postorder(por->stopwords, &iplus1_tree_free_key, NULL); iplus1_tree_destroy(por->stopwords); free(por->stopwords); sb_stemmer_delete(por->stemmer); free(lang->full_lang); free(lang->param); return IPLUS1_SUCCESS; }
int destroy(iplus1_lang_t* lang) { iplus1_german_t* deu = (iplus1_german_t*)lang->param; iplus1_tree_foreach_postorder(deu->stopwords, &iplus1_tree_free_key, NULL); iplus1_tree_destroy(deu->stopwords); free(deu->stopwords); sb_stemmer_delete(deu->stemmer); free(lang->full_lang); free(lang->param); return IPLUS1_SUCCESS; }
int destroy(iplus1_lang_t* lang) { iplus1_swedish_t* swe = (iplus1_swedish_t*)lang->param; iplus1_tree_foreach_postorder(swe->stopwords, &iplus1_tree_free_key, NULL); iplus1_tree_destroy(swe->stopwords); free(swe->stopwords); sb_stemmer_delete(swe->stemmer); free(lang->full_lang); free(lang->param); return IPLUS1_SUCCESS; }
static void stem_fin(grn_ctx *ctx, void *user_data) { grn_stem_token_filter *token_filter = user_data; if (!token_filter) { return; } grn_tokenizer_token_fin(ctx, &(token_filter->token)); if (token_filter->stemmer) { sb_stemmer_delete(token_filter->stemmer); } GRN_PLUGIN_FREE(ctx, token_filter); }
CSnowballWrapper::~CSnowballWrapper() { if(m_pStemmer) sb_stemmer_delete(m_pStemmer); if(m_pStemmerBuffer) { delete[] m_pStemmerBuffer; m_pStemmerBuffer = NULL; m_nBufferSize = 0; } if(m_pIrregularWords) { m_pIrregularWords->clear(); delete m_pIrregularWords; m_pIrregularWords = NULL; } }
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) { VALUE word, language, result = Qnil; rb_scan_args(argc, argv, "11", &word, &language); if (NIL_P(language)) language = fuzzy_default_language; if (TYPE(word) != T_STRING) rb_raise(rb_eArgError, "invalid word, expect string"); struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8"); if (stemmer) { const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word)); uint32_t stem_len = sb_stemmer_length(stemmer); result = rb_enc_str_new(stem, stem_len, rb_enc_get(word)); sb_stemmer_delete(stemmer); } return result; }
void __sbstemmer_Free(Stemmer *s) { sb_stemmer_delete(s->ctx); free(s); }
~stem() { sb_stemmer_delete(m_stem); }
Stemmer::~Stemmer() { if ( _stemmer ) { sb_stemmer_delete(_stemmer); _stemmer = NULL; } }
/* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":123 */ __pyx_3 = PyDict_New(); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;} Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache); ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache = __pyx_3; __pyx_3 = 0; __pyx_r = 0; goto __pyx_L0; __pyx_L1:; Py_XDECREF(__pyx_3); Py_XDECREF(__pyx_4); __Pyx_AddTraceback("Stemmer.Stemmer.__init__"); __pyx_r = -1; __pyx_L0:; Py_DECREF(__pyx_v_self); Py_DECREF(__pyx_v_algorithm); return __pyx_r; } static void __pyx_f_7Stemmer_7Stemmer___dealloc__(PyObject *__pyx_v_self); /*proto*/ static void __pyx_f_7Stemmer_7Stemmer___dealloc__(PyObject *__pyx_v_self) { Py_INCREF(__pyx_v_self); sb_stemmer_delete(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj); Py_DECREF(__pyx_v_self); }
QString RStemmer::stem(const QString& word, const QString& locale) { static QString prevLocale = ""; static struct sb_stemmer* stemmer = NULL; // keep always the last used stemmer in memory: if (locale!=prevLocale) { if (stemmer!=NULL) { sb_stemmer_delete(stemmer); stemmer = NULL; } stemmer = sb_stemmer_new(locale.toUtf8(), "UTF_8"); prevLocale = locale; } if (stemmer == NULL) { qWarning() << "No stemmer found for locale: " << locale; return word; } QByteArray ba = word.toUtf8(); int lim = 10; sb_symbol* sbWord = (sb_symbol*)malloc(lim * sizeof(sb_symbol)); int i = 0; int inlen = 0; while (1) { if (i==ba.length()) { break; } int ch = ba.at(i); if (i == lim) { sb_symbol * newb; newb = (sb_symbol*)realloc(sbWord, (lim + 10) * sizeof(sb_symbol)); if (newb == 0) { Q_ASSERT(false); qWarning() << "RStemmer::stem: Memory allocation error."; } sbWord = newb; lim = lim + 10; } // Update count of utf-8 characters. if (ch < 0x80 || ch > 0xBF) { inlen += 1; } // force lower case: //if (isupper(ch)) { // ch = tolower(ch); //} sbWord[i] = ch; i++; } const sb_symbol* sbStemmed = sb_stemmer_stem(stemmer, sbWord, i); QString stemmed; i=0; while(1) { if (sbStemmed[i]==0) { break; } stemmed += QChar(sbStemmed[i]); i++; } return stemmed; }
void Snowball(sLONG_PTR *pResult, PackagePtr pParams) { C_TEXT Param1; ARRAY_TEXT Param2; ARRAY_TEXT Param3; C_LONGINT Param4; Param1.fromParamAtIndex(pParams, 1); Param4.fromParamAtIndex(pParams, 4); CUTF8String t; Param1.copyUTF8String(&t); std::string str((const char *)t.c_str()); std::vector<std::string>words = split(str); struct sb_stemmer *stemmer; char *language = (char *)"english"; Snowball_Language lang = (Snowball_Language)Param4.getIntValue(); switch (lang) { case Snowball_Danish: language = (char *)"danish"; break; case Snowball_Dutch: language = (char *)"dutch"; break; case Snowball_English: language = (char *)"english"; break; case Snowball_Finnish: language = (char *)"finnish"; break; case Snowball_French: language = (char *)"french"; break; case Snowball_German: language = (char *)"german"; break; case Snowball_Hungarian: language = (char *)"hungarian"; break; case Snowball_Italian: language = (char *)"italian"; break; case Snowball_Norwegian: language = (char *)"norwegian"; break; case Snowball_Portuguese: language = (char *)"portuguese"; break; case Snowball_Romanian: language = (char *)"romanian"; break; case Snowball_Russian: language = (char *)"russian"; break; case Snowball_Spanish: language = (char *)"spanish"; break; case Snowball_Swedish: language = (char *)"swedish"; break; case Snowball_Turkish: language = (char *)"turkish"; break; default: break; } char *charenc = NULL;//UTF-8 stemmer = sb_stemmer_new(language, charenc); if (stemmer) { Param2.setSize(1); Param3.setSize(1); for(std::vector<std::string>::iterator it = words.begin(); it != words.end(); ++it) { std::string word = *it; sb_symbol * symbol = (sb_symbol *)word.c_str(); int size = word.length(); const sb_symbol *stemmed = sb_stemmer_stem(stemmer, symbol, size); if (stemmed) { CUTF8String w((const uint8_t *)symbol); CUTF8String s((const uint8_t *)stemmed); Param2.appendUTF8String(&w); Param3.appendUTF8String(&s); } } sb_stemmer_delete(stemmer); } Param2.toParamAtIndex(pParams, 2); Param3.toParamAtIndex(pParams, 3); }
int main(int argc, char * argv[]) { char * in = 0; char * out = 0; FILE * f_in; FILE * f_out; struct sb_stemmer * stemmer; char * language = "english"; char * charenc = NULL; char * s; int i = 1; pretty = 0; progname = argv[0]; while(i < argc) { s = argv[i++]; if (s[0] == '-') { if (strcmp(s, "-o") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } out = argv[i++]; } else if (strcmp(s, "-i") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } in = argv[i++]; } else if (strcmp(s, "-l") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } language = argv[i++]; } else if (strcmp(s, "-c") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } charenc = argv[i++]; } else if (strcmp(s, "-p") == 0) { pretty = 1; } else if (strcmp(s, "-h") == 0) { usage(0); } else { fprintf(stderr, "option %s unknown\n", s); usage(1); } } else { fprintf(stderr, "unexpected parameter %s\n", s); usage(1); } } /* prepare the files */ f_in = (in == 0) ? stdin : fopen(in, "r"); if (f_in == 0) { fprintf(stderr, "file %s not found\n", in); exit(1); } f_out = (out == 0) ? stdout : fopen(out, "w"); if (f_out == 0) { fprintf(stderr, "file %s cannot be opened\n", out); exit(1); } /* do the stemming process: */ stemmer = sb_stemmer_new(language, charenc); if (stemmer == 0) { if (charenc == NULL) { fprintf(stderr, "language `%s' not available for stemming\n", language); exit(1); } else { fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc); exit(1); } } stem_file(stemmer, f_in, f_out); sb_stemmer_delete(stemmer); if (in != 0) (void) fclose(f_in); if (out != 0) (void) fclose(f_out); return 0; }
SbStemmerWrapper::~SbStemmerWrapper() { if (stemmer_) { sb_stemmer_delete(stemmer_); stemmer_ = nullptr; } }