示例#1
0
文件: por.c 项目: jakeprobst/iplus1
int destroy(iplus1_lang_t* lang)
{
    iplus1_portugese_t* por = (iplus1_portugese_t*)lang->param;
    
    iplus1_tree_foreach_postorder(por->stopwords, &iplus1_tree_free_key, NULL);
    iplus1_tree_destroy(por->stopwords);
    free(por->stopwords);
    
    sb_stemmer_delete(por->stemmer);
    free(lang->full_lang);
    free(lang->param);
    
    return IPLUS1_SUCCESS;
}
示例#2
0
文件: deu.c 项目: jakeprobst/iplus1
int destroy(iplus1_lang_t* lang)
{
    iplus1_german_t* deu = (iplus1_german_t*)lang->param;
    
    iplus1_tree_foreach_postorder(deu->stopwords, &iplus1_tree_free_key, NULL);
    iplus1_tree_destroy(deu->stopwords);
    free(deu->stopwords);
    
    sb_stemmer_delete(deu->stemmer);
    free(lang->full_lang);
    free(lang->param);
    
    return IPLUS1_SUCCESS;
}
示例#3
0
文件: swe.c 项目: jakeprobst/iplus1
int destroy(iplus1_lang_t* lang)
{
    iplus1_swedish_t* swe = (iplus1_swedish_t*)lang->param;
    
    iplus1_tree_foreach_postorder(swe->stopwords, &iplus1_tree_free_key, NULL);
    iplus1_tree_destroy(swe->stopwords);
    free(swe->stopwords);
    
    sb_stemmer_delete(swe->stemmer);
    free(lang->full_lang);
    free(lang->param);
    
    return IPLUS1_SUCCESS;
}
示例#4
0
文件: stem.c 项目: tamano/groonga
static void
stem_fin(grn_ctx *ctx, void *user_data)
{
  grn_stem_token_filter *token_filter = user_data;
  if (!token_filter) {
    return;
  }

  grn_tokenizer_token_fin(ctx, &(token_filter->token));
  if (token_filter->stemmer) {
    sb_stemmer_delete(token_filter->stemmer);
  }
  GRN_PLUGIN_FREE(ctx, token_filter);
}
示例#5
0
		CSnowballWrapper::~CSnowballWrapper()
		{
			if(m_pStemmer)
				sb_stemmer_delete(m_pStemmer);
			if(m_pStemmerBuffer)
			{
				delete[] m_pStemmerBuffer;
				m_pStemmerBuffer = NULL;
				m_nBufferSize = 0;
			}
			if(m_pIrregularWords)
			{
				m_pIrregularWords->clear();
				delete m_pIrregularWords;
				m_pIrregularWords = NULL;
			}
		}
示例#6
0
VALUE fuzzy_snowball(int argc, VALUE * argv, VALUE self) {
    VALUE word, language, result = Qnil;

    rb_scan_args(argc, argv, "11", &word, &language);
    if (NIL_P(language))
        language = fuzzy_default_language;

    if (TYPE(word) != T_STRING)
        rb_raise(rb_eArgError, "invalid word, expect string");

    struct sb_stemmer *stemmer = sb_stemmer_new(CSTRING(language), "UTF_8");
    if (stemmer) {
        const sb_symbol *stem = sb_stemmer_stem(stemmer, RSTRING_PTR(word), RSTRING_LEN(word));
        uint32_t stem_len = sb_stemmer_length(stemmer);
        result = rb_enc_str_new(stem, stem_len, rb_enc_get(word));
        sb_stemmer_delete(stemmer);
    }

    return result;
}
示例#7
0
void __sbstemmer_Free(Stemmer *s) {
    sb_stemmer_delete(s->ctx);
    free(s);
}
示例#8
0
文件: stem.hpp 项目: alisheikh/warp
		~stem() {
			sb_stemmer_delete(m_stem);
		}
示例#9
0
文件: stemmer.cpp 项目: ChrisBg/mongo
 Stemmer::~Stemmer() {
     if ( _stemmer ) {
         sb_stemmer_delete(_stemmer);
         _stemmer = NULL;
     }
 }
示例#10
0
文件: Stemmer.c 项目: buriy/pystemmer
  /* "/home/richard/private/Working/snowball/pystemmer/src/Stemmer.pyx":123 */
  __pyx_3 = PyDict_New(); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; goto __pyx_L1;}
  Py_DECREF(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache);
  ((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cache = __pyx_3;
  __pyx_3 = 0;

  __pyx_r = 0;
  goto __pyx_L0;
  __pyx_L1:;
  Py_XDECREF(__pyx_3);
  Py_XDECREF(__pyx_4);
  __Pyx_AddTraceback("Stemmer.Stemmer.__init__");
  __pyx_r = -1;
  __pyx_L0:;
  Py_DECREF(__pyx_v_self);
  Py_DECREF(__pyx_v_algorithm);
  return __pyx_r;
}

static void __pyx_f_7Stemmer_7Stemmer___dealloc__(PyObject *__pyx_v_self); /*proto*/
static void __pyx_f_7Stemmer_7Stemmer___dealloc__(PyObject *__pyx_v_self) {
  Py_INCREF(__pyx_v_self);
  sb_stemmer_delete(((struct __pyx_obj_7Stemmer_Stemmer *)__pyx_v_self)->cobj);

  Py_DECREF(__pyx_v_self);
}
示例#11
0
QString RStemmer::stem(const QString& word, const QString& locale) {
    static QString prevLocale = "";
    static struct sb_stemmer* stemmer = NULL;

    // keep always the last used stemmer in memory:
    if (locale!=prevLocale) {
        if (stemmer!=NULL) {
            sb_stemmer_delete(stemmer);
            stemmer = NULL;
        }
        stemmer = sb_stemmer_new(locale.toUtf8(), "UTF_8");
        prevLocale = locale;
    }

    if (stemmer == NULL) {
        qWarning() << "No stemmer found for locale: " << locale;
        return word;
    }

    QByteArray ba = word.toUtf8();

    int lim = 10;
    sb_symbol* sbWord = (sb_symbol*)malloc(lim * sizeof(sb_symbol));
    int i = 0;
    int inlen = 0;

    while (1) {
        if (i==ba.length()) {
            break;
        }
        int ch = ba.at(i);
        if (i == lim) {
            sb_symbol * newb;
            newb = (sb_symbol*)realloc(sbWord, (lim + 10) * sizeof(sb_symbol));
            if (newb == 0) {
                Q_ASSERT(false);
                qWarning() << "RStemmer::stem: Memory allocation error.";
            }
            sbWord = newb;
            lim = lim + 10;
        }
        // Update count of utf-8 characters.
        if (ch < 0x80 || ch > 0xBF) {
            inlen += 1;
        }
        // force lower case:
        //if (isupper(ch)) {
        //    ch = tolower(ch);
        //}

        sbWord[i] = ch;
        i++;
    }

    const sb_symbol* sbStemmed = sb_stemmer_stem(stemmer, sbWord, i);
    QString stemmed;

    i=0;
    while(1) {
        if (sbStemmed[i]==0) {
            break;
        }
        stemmed += QChar(sbStemmed[i]);
        i++;
    }

    return stemmed;
}
void Snowball(sLONG_PTR *pResult, PackagePtr pParams)
{
				C_TEXT Param1;
				ARRAY_TEXT Param2;
				ARRAY_TEXT Param3;
				C_LONGINT Param4;

				Param1.fromParamAtIndex(pParams, 1);
				Param4.fromParamAtIndex(pParams, 4);

				CUTF8String t;
				Param1.copyUTF8String(&t);
				std::string str((const char *)t.c_str());
				std::vector<std::string>words = split(str);

				struct sb_stemmer *stemmer;
				char *language = (char *)"english";

				Snowball_Language lang = (Snowball_Language)Param4.getIntValue();

				switch (lang) {
						case Snowball_Danish:
								language = (char *)"danish";
								break;
						case Snowball_Dutch:
								language = (char *)"dutch";
								break;
						case Snowball_English:
								language = (char *)"english";
								break;
						case Snowball_Finnish:
								language = (char *)"finnish";
								break;
						case Snowball_French:
								language = (char *)"french";
								break;
						case Snowball_German:
								language = (char *)"german";
								break;
						case Snowball_Hungarian:
								language = (char *)"hungarian";
								break;
						case Snowball_Italian:
								language = (char *)"italian";
								break;
						case Snowball_Norwegian:
								language = (char *)"norwegian";
								break;
						case Snowball_Portuguese:
								language = (char *)"portuguese";
								break;
						case Snowball_Romanian:
								language = (char *)"romanian";
								break;
						case Snowball_Russian:
								language = (char *)"russian";
								break;
						case Snowball_Spanish:
								language = (char *)"spanish";
								break;
						case Snowball_Swedish:
								language = (char *)"swedish";
								break;
						case Snowball_Turkish:
								language = (char *)"turkish";
								break;
						default:
								break;
				}

				char *charenc = NULL;//UTF-8

				stemmer = sb_stemmer_new(language, charenc);

				if (stemmer)
				{
								Param2.setSize(1);
								Param3.setSize(1);

								for(std::vector<std::string>::iterator it = words.begin(); it != words.end(); ++it) {
												std::string word = *it;
												sb_symbol * symbol = (sb_symbol *)word.c_str();
												int size = word.length();
												const sb_symbol *stemmed = sb_stemmer_stem(stemmer, symbol, size);
												if (stemmed)
												{
																CUTF8String w((const uint8_t *)symbol);
																CUTF8String s((const uint8_t *)stemmed);
																Param2.appendUTF8String(&w);
																Param3.appendUTF8String(&s);
												}
								}
								
								sb_stemmer_delete(stemmer);
				}

					Param2.toParamAtIndex(pParams, 2);
					Param3.toParamAtIndex(pParams, 3);
}
示例#13
0
int
main(int argc, char * argv[])
{
    char * in = 0;
    char * out = 0;
    FILE * f_in;
    FILE * f_out;
    struct sb_stemmer * stemmer;

    char * language = "english";
    char * charenc = NULL;

    char * s;
    int i = 1;
    pretty = 0;

    progname = argv[0];

    while(i < argc) {
	s = argv[i++];
	if (s[0] == '-') {
	    if (strcmp(s, "-o") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		out = argv[i++];
	    } else if (strcmp(s, "-i") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		in = argv[i++];
	    } else if (strcmp(s, "-l") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		language = argv[i++];
	    } else if (strcmp(s, "-c") == 0) {
		if (i >= argc) {
		    fprintf(stderr, "%s requires an argument\n", s);
		    exit(1);
		}
		charenc = argv[i++];
	    } else if (strcmp(s, "-p") == 0) {
		pretty = 1;
	    } else if (strcmp(s, "-h") == 0) {
		usage(0);
	    } else {
		fprintf(stderr, "option %s unknown\n", s);
		usage(1);
	    }
	} else {
	    fprintf(stderr, "unexpected parameter %s\n", s);
	    usage(1);
	}
    }

    /* prepare the files */
    f_in = (in == 0) ? stdin : fopen(in, "r");
    if (f_in == 0) {
	fprintf(stderr, "file %s not found\n", in);
	exit(1);
    }
    f_out = (out == 0) ? stdout : fopen(out, "w");
    if (f_out == 0) {
	fprintf(stderr, "file %s cannot be opened\n", out);
	exit(1);
    }

    /* do the stemming process: */
    stemmer = sb_stemmer_new(language, charenc);
    if (stemmer == 0) {
        if (charenc == NULL) {
            fprintf(stderr, "language `%s' not available for stemming\n", language);
            exit(1);
        } else {
            fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
            exit(1);
        }
    }
    stem_file(stemmer, f_in, f_out);
    sb_stemmer_delete(stemmer);

    if (in != 0) (void) fclose(f_in);
    if (out != 0) (void) fclose(f_out);

    return 0;
}
示例#14
0
SbStemmerWrapper::~SbStemmerWrapper() {
  if (stemmer_) {
    sb_stemmer_delete(stemmer_);
    stemmer_ = nullptr;
  }
}