int strieq(const char *p, const char *q) { for (; cmlower(*p) == cmlower(*q); p++, q++) if (*p == '\0') return 1; return 0; }
static int store_key(char *s, int len) { if (len < shortest_len) return 0; int is_number = 1; for (int i = 0; i < len; i++) if (!csdigit(s[i])) { is_number = 0; s[i] = cmlower(s[i]); } if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9')) return 0; int h = hash(s, len) % hash_table_size; if (common_words_table) { for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next) if (len == ptr->len && memcmp(s, ptr->str, len) == 0) return 0; } table_entry *pp = hash_table + h; if (!pp->ptr) pp->ptr = new block; else if (pp->ptr->v[pp->ptr->used - 1] == ntags) return 1; else if (pp->ptr->used >= BLOCK_SIZE) pp->ptr = new block(pp->ptr); pp->ptr->v[(pp->ptr->used)++] = ntags; return 1; }
void sortify_title(const char *s, int len, string &key) { const char *end = s + len; for (; s < end && (*s == ' ' || *s == '\n'); s++) ; const char *ptr = s; for (;;) { const char *token_start = ptr; if (!get_token(&ptr, end)) break; if (ptr - token_start == 1 && (*token_start == ' ' || *token_start == '\n')) break; } if (ptr < end) { unsigned int first_word_len = ptr - s - 1; const char *ae = articles.contents() + articles.length(); for (const char *a = articles.contents(); a < ae; a = strchr(a, '\0') + 1) if (first_word_len == strlen(a)) { unsigned int j; for (j = 0; j < first_word_len; j++) if (a[j] != cmlower(s[j])) break; if (j >= first_word_len) { s = ptr; for (; s < end && (*s == ' ' || *s == '\n'); s++) ; break; } } } sortify_words(s, end, 0, key); }
void token_info::sortify(const char *start, const char *end, string &result) const { if (sort_key) result += sort_key; else if (type == TOKEN_UPPER || type == TOKEN_LOWER) { for (; start < end; start++) if (csalpha(*start)) result += cmlower(*start); } }
static void articles_command(int argc, argument *argv) { articles.clear(); int i; for (i = 0; i < argc; i++) { articles += argv[i].s; articles += '\0'; } int len = articles.length(); for (i = 0; i < len; i++) articles[i] = cmlower(articles[i]); }
void token_info::lower_case(const char *start, const char *end, string &result) const { if (type != TOKEN_UPPER) { while (start < end) result += *start++; } else if (other_case) result += other_case; else { while (start < end) result += cmlower(*start++); } }
void index_search_item::read_common_words_file() { if (header.common <= 0) return; const char *common_words_file = munge_filename(strchr(pool, '\0') + 1); errno = 0; FILE *fp = fopen(common_words_file, "r"); if (!fp) { error("can't open `%1': %2", common_words_file, strerror(errno)); return; } common_words_table_size = 2*header.common + 1; while (!is_prime(common_words_table_size)) common_words_table_size++; common_words_table = new char *[common_words_table_size]; for (int i = 0; i < common_words_table_size; i++) common_words_table[i] = 0; int count = 0; int key_len = 0; for (;;) { int c = getc(fp); while (c != EOF && !csalnum(c)) c = getc(fp); if (c == EOF) break; do { if (key_len < header.truncate) key_buffer[key_len++] = cmlower(c); c = getc(fp); } while (c != EOF && csalnum(c)); if (key_len >= header.shortest) { int h = hash(key_buffer, key_len) % common_words_table_size; while (common_words_table[h]) { if (h == 0) h = common_words_table_size; --h; } common_words_table[h] = new char[key_len + 1]; memcpy(common_words_table[h], key_buffer, key_len); common_words_table[h][key_len] = '\0'; } if (++count >= header.common) break; key_len = 0; if (c == EOF) break; } fclose(fp); }
map_init::map_init() { int i; for (i = 0; i < 256; i++) map[i] = csalnum(i) ? cmlower(i) : '\0'; for (i = 0; i < 256; i++) { if (cslower(i)) { inv_map[i][0] = i; inv_map[i][1] = cmupper(i); inv_map[i][2] = '\0'; } else if (csdigit(i)) { inv_map[i][0] = i; inv_map[i][1] = 0; } else inv_map[i][0] = '\0'; } }
static int find_month(const char *start, const char *end) { static const char *months[] = { "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december", }; for (;;) { while (start < end && !csalpha(*start)) start++; const char *ptr = start; if (start == end) break; while (ptr < end && csalpha(*ptr)) ptr++; if (ptr - start >= 3) { for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) { const char *q = months[i]; const char *p = start; for (; p < ptr; p++, q++) if (cmlower(*p) != *q) break; if (p >= ptr) return i; } } start = ptr; } return -1; }
const int *index_search_item::search1(const char **pp, const char *end) { while (*pp < end && !csalnum(**pp)) *pp += 1; if (*pp >= end) return 0; const char *start = *pp; while (*pp < end && csalnum(**pp)) *pp += 1; int len = *pp - start; if (len < header.shortest) return 0; if (len > header.truncate) len = header.truncate; int is_number = 1; for (int i = 0; i < len; i++) if (csdigit(start[i])) key_buffer[i] = start[i]; else { key_buffer[i] = cmlower(start[i]); is_number = 0; } if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9')) return 0; unsigned hc = hash(key_buffer, len); if (common_words_table) { for (int h = hc % common_words_table_size; common_words_table[h]; --h) { if (strlen(common_words_table[h]) == (size_t)len && memcmp(common_words_table[h], key_buffer, len) == 0) return 0; if (h == 0) h = common_words_table_size; } } int li = table[int(hc % header.table_size)]; return li < 0 ? &minus_one : lists + li; }
static void read_common_words_file() { if (n_ignore_words <= 0) return; errno = 0; FILE *fp = fopen(common_words_file, "r"); if (!fp) fatal("can't open `%1': %2", common_words_file, strerror(errno)); common_words_table = new word_list * [hash_table_size]; for (int i = 0; i < hash_table_size; i++) common_words_table[i] = 0; int count = 0; int key_len = 0; for (;;) { int c = getc(fp); while (c != EOF && !csalnum(c)) c = getc(fp); if (c == EOF) break; do { if (key_len < truncate_len) key_buffer[key_len++] = cmlower(c); c = getc(fp); } while (c != EOF && csalnum(c)); if (key_len >= shortest_len) { int h = hash(key_buffer, key_len) % hash_table_size; common_words_table[h] = new word_list(key_buffer, key_len, common_words_table[h]); } if (++count >= n_ignore_words) break; key_len = 0; if (c == EOF) break; } n_ignore_words = count; fclose(fp); }