static void test_Code_Point_At_and_From(TestBatchRunner *runner) { int32_t code_points[] = { 'a', smiley_cp, smiley_cp, 'b', smiley_cp, 'c' }; uint32_t num_code_points = sizeof(code_points) / sizeof(int32_t); String *string = Str_newf("a%s%sb%sc", smiley, smiley, smiley); uint32_t i; for (i = 0; i < num_code_points; i++) { uint32_t from = num_code_points - i; TEST_INT_EQ(runner, Str_Code_Point_At(string, i), code_points[i], "Code_Point_At %ld", (long)i); TEST_INT_EQ(runner, Str_Code_Point_From(string, from), code_points[i], "Code_Point_From %ld", (long)from); } TEST_INT_EQ(runner, Str_Code_Point_At(string, num_code_points), STR_OOB, "Code_Point_At %ld", (long)num_code_points); TEST_INT_EQ(runner, Str_Code_Point_From(string, 0), STR_OOB, "Code_Point_From 0"); TEST_INT_EQ(runner, Str_Code_Point_From(string, num_code_points + 1), STR_OOB, "Code_Point_From %ld", (long)(num_code_points + 1)); DECREF(string); }
Hash* SnowStop_gen_stoplist(String *language) { char lang[2]; lang[0] = tolower(Str_Code_Point_At(language, 0)); lang[1] = tolower(Str_Code_Point_At(language, 1)); const uint8_t **words = NULL; if (memcmp(lang, "da", 2) == 0) { words = SnowStop_snow_da; } else if (memcmp(lang, "de", 2) == 0) { words = SnowStop_snow_de; } else if (memcmp(lang, "en", 2) == 0) { words = SnowStop_snow_en; } else if (memcmp(lang, "es", 2) == 0) { words = SnowStop_snow_es; } else if (memcmp(lang, "fi", 2) == 0) { words = SnowStop_snow_fi; } else if (memcmp(lang, "fr", 2) == 0) { words = SnowStop_snow_fr; } else if (memcmp(lang, "hu", 2) == 0) { words = SnowStop_snow_hu; } else if (memcmp(lang, "it", 2) == 0) { words = SnowStop_snow_it; } else if (memcmp(lang, "nl", 2) == 0) { words = SnowStop_snow_nl; } else if (memcmp(lang, "no", 2) == 0) { words = SnowStop_snow_no; } else if (memcmp(lang, "pt", 2) == 0) { words = SnowStop_snow_pt; } else if (memcmp(lang, "ru", 2) == 0) { words = SnowStop_snow_ru; } else if (memcmp(lang, "sv", 2) == 0) { words = SnowStop_snow_sv; } else { return NULL; } size_t num_stopwords = 0; for (uint32_t i = 0; words[i] != NULL; i++) { num_stopwords++; } Hash *stoplist = Hash_new(num_stopwords); for (uint32_t i = 0; words[i] != NULL; i++) { char *word = (char*)words[i]; String *stop = Str_new_wrap_trusted_utf8(word, strlen(word)); Hash_Store(stoplist, stop, (Obj*)CFISH_TRUE); DECREF(stop); } return (Hash*)stoplist; }
static bool S_is_absolute(String *path) { int32_t code_point = Str_Code_Point_At(path, 0); if (isalpha(code_point)) { code_point = Str_Code_Point_At(path, 1); if (code_point != ':') { return false; } code_point = Str_Code_Point_At(path, 2); } return code_point == '\\' || code_point == '/'; }
SnowballStemmer* SnowStemmer_init(SnowballStemmer *self, String *language) { char lang_buf[3]; Analyzer_init((Analyzer*)self); SnowballStemmerIVARS *const ivars = SnowStemmer_IVARS(self); ivars->language = Str_Clone(language); // Get a Snowball stemmer. Be case-insensitive. lang_buf[0] = tolower(Str_Code_Point_At(language, 0)); lang_buf[1] = tolower(Str_Code_Point_At(language, 1)); lang_buf[2] = '\0'; ivars->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8"); if (!ivars->snowstemmer) { THROW(ERR, "Can't find a Snowball stemmer for %o", language); } return self; }