Example #1
0
static void
test_Code_Point_At_and_From(TestBatchRunner *runner) {
    int32_t code_points[] = {
        'a', smiley_cp, smiley_cp, 'b', smiley_cp, 'c'
    };
    uint32_t num_code_points = sizeof(code_points) / sizeof(int32_t);
    String *string = Str_newf("a%s%sb%sc", smiley, smiley, smiley);
    uint32_t i;

    for (i = 0; i < num_code_points; i++) {
        uint32_t from = num_code_points - i;
        TEST_INT_EQ(runner, Str_Code_Point_At(string, i), code_points[i],
                    "Code_Point_At %ld", (long)i);
        TEST_INT_EQ(runner, Str_Code_Point_From(string, from),
                    code_points[i], "Code_Point_From %ld", (long)from);
    }

    TEST_INT_EQ(runner, Str_Code_Point_At(string, num_code_points), STR_OOB,
                "Code_Point_At %ld", (long)num_code_points);
    TEST_INT_EQ(runner, Str_Code_Point_From(string, 0), STR_OOB,
                "Code_Point_From 0");
    TEST_INT_EQ(runner, Str_Code_Point_From(string, num_code_points + 1),
                STR_OOB, "Code_Point_From %ld", (long)(num_code_points + 1));

    DECREF(string);
}
Example #2
0
Hash*
SnowStop_gen_stoplist(String *language) {
    char lang[2];
    lang[0] = tolower(Str_Code_Point_At(language, 0));
    lang[1] = tolower(Str_Code_Point_At(language, 1));
    const uint8_t **words = NULL;
    if (memcmp(lang, "da", 2) == 0)      { words = SnowStop_snow_da; }
    else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; }
    else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; }
    else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; }
    else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; }
    else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; }
    else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; }
    else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; }
    else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; }
    else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; }
    else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; }
    else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; }
    else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; }
    else {
        return NULL;
    }
    size_t num_stopwords = 0;
    for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; }
    Hash *stoplist = Hash_new(num_stopwords);
    for (uint32_t i = 0; words[i] != NULL; i++) {
        char *word = (char*)words[i];
        String *stop = Str_new_wrap_trusted_utf8(word, strlen(word));
        Hash_Store(stoplist, stop, (Obj*)CFISH_TRUE);
        DECREF(stop);
    }
    return (Hash*)stoplist;
}
Example #3
0
static bool
S_is_absolute(String *path) {
    int32_t code_point = Str_Code_Point_At(path, 0);

    if (isalpha(code_point)) {
        code_point = Str_Code_Point_At(path, 1);
        if (code_point != ':') { return false; }
        code_point = Str_Code_Point_At(path, 2);
    }

    return code_point == '\\' || code_point == '/';
}
SnowballStemmer*
SnowStemmer_init(SnowballStemmer *self, String *language) {
    char lang_buf[3];
    Analyzer_init((Analyzer*)self);
    SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self);
    ivars->language = Str_Clone(language);

    // Get a Snowball stemmer.  Be case-insensitive.
    lang_buf[0] = tolower(Str_Code_Point_At(language, 0));
    lang_buf[1] = tolower(Str_Code_Point_At(language, 1));
    lang_buf[2] = '\0';
    ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
    if (!ivars->snowstemmer) {
        THROW(ERR, "Can't find a Snowball stemmer for %o", language);
    }

    return self;
}