Beispiel #1
0
Datei: main.cpp Projekt: att/uwin
int strieq(const char *p, const char *q)
{
  for (; cmlower(*p) == cmlower(*q); p++, q++)
    if (*p == '\0')
      return 1;
  return 0;
}
Beispiel #2
0
static int store_key(char *s, int len)
{
  if (len < shortest_len)
    return 0;
  int is_number = 1;
  for (int i = 0; i < len; i++)
    if (!csdigit(s[i])) {
      is_number = 0;
      s[i] = cmlower(s[i]);
    }
  if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9'))
    return 0;
  int h = hash(s, len) % hash_table_size;
  if (common_words_table) {
    for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next)
      if (len == ptr->len && memcmp(s, ptr->str, len) == 0)
	return 0;
  }
  table_entry *pp =  hash_table + h;
  if (!pp->ptr)
    pp->ptr = new block;
  else if (pp->ptr->v[pp->ptr->used - 1] == ntags)
    return 1;
  else if (pp->ptr->used >= BLOCK_SIZE)
    pp->ptr = new block(pp->ptr);
  pp->ptr->v[(pp->ptr->used)++] = ntags;
  return 1;
}
Beispiel #3
0
Datei: ref.cpp Projekt: att/uwin
void sortify_title(const char *s, int len, string &key)
{
  const char *end = s + len;
  for (; s < end && (*s == ' ' || *s == '\n'); s++) 
    ;
  const char *ptr = s;
  for (;;) {
    const char *token_start = ptr;
    if (!get_token(&ptr, end))
      break;
    if (ptr - token_start == 1
	&& (*token_start == ' ' || *token_start == '\n'))
      break;
  }
  if (ptr < end) {
    unsigned int first_word_len = ptr - s - 1;
    const char *ae = articles.contents() + articles.length();
    for (const char *a = articles.contents();
	 a < ae;
	 a = strchr(a, '\0') + 1)
      if (first_word_len == strlen(a)) {
	unsigned int j;
	for (j = 0; j < first_word_len; j++)
	  if (a[j] != cmlower(s[j]))
	    break;
	if (j >= first_word_len) {
	  s = ptr;
	  for (; s < end && (*s == ' ' || *s == '\n'); s++)
	    ;
	  break;
	}
      }
  }
  sortify_words(s, end, 0, key);
}
Beispiel #4
0
void token_info::sortify(const char *start, const char *end, string &result)
     const
{
  if (sort_key)
    result += sort_key;
  else if (type == TOKEN_UPPER || type == TOKEN_LOWER) {
    for (; start < end; start++)
      if (csalpha(*start))
	result += cmlower(*start);
  }
}
Beispiel #5
0
static void articles_command(int argc, argument *argv)
{
  articles.clear();
  int i;
  for (i = 0; i < argc; i++) {
    articles += argv[i].s;
    articles += '\0';
  }
  int len = articles.length();
  for (i = 0; i < len; i++)
    articles[i] = cmlower(articles[i]);
}
Beispiel #6
0
void token_info::lower_case(const char *start, const char *end,
			    string &result) const
{
  if (type != TOKEN_UPPER) {
    while (start < end)
      result += *start++;
  }
  else if (other_case)
    result += other_case;
  else {
    while (start < end)
      result += cmlower(*start++);
  }
}
Beispiel #7
0
void index_search_item::read_common_words_file()
{
  if (header.common <= 0)
    return;
  const char *common_words_file = munge_filename(strchr(pool, '\0') + 1);
  errno = 0;
  FILE *fp = fopen(common_words_file, "r");
  if (!fp) {
    error("can't open `%1': %2", common_words_file, strerror(errno));
    return;
  }
  common_words_table_size = 2*header.common + 1;
  while (!is_prime(common_words_table_size))
    common_words_table_size++;
  common_words_table = new char *[common_words_table_size];
  for (int i = 0; i < common_words_table_size; i++)
    common_words_table[i] = 0;
  int count = 0;
  int key_len = 0;
  for (;;) {
    int c = getc(fp);
    while (c != EOF && !csalnum(c))
      c = getc(fp);
    if (c == EOF)
      break;
    do {
      if (key_len < header.truncate)
	key_buffer[key_len++] = cmlower(c);
      c = getc(fp);
    } while (c != EOF && csalnum(c));
    if (key_len >= header.shortest) {
      int h = hash(key_buffer, key_len) % common_words_table_size;
      while (common_words_table[h]) {
	if (h == 0)
	  h = common_words_table_size;
	--h;
      }
      common_words_table[h] = new char[key_len + 1];
      memcpy(common_words_table[h], key_buffer, key_len);
      common_words_table[h][key_len] = '\0';
    }
    if (++count >= header.common)
      break;
    key_len = 0;
    if (c == EOF)
      break;
  }
  fclose(fp);
}
Beispiel #8
0
map_init::map_init()
{
  int i;
  for (i = 0; i < 256; i++)
    map[i] = csalnum(i) ? cmlower(i) : '\0';
  for (i = 0; i < 256; i++) {
    if (cslower(i)) {
      inv_map[i][0] = i;
      inv_map[i][1] = cmupper(i);
      inv_map[i][2] = '\0';
    }
    else if (csdigit(i)) {
      inv_map[i][0] = i;
      inv_map[i][1] = 0;
    }
    else
      inv_map[i][0] = '\0';
  }
}
Beispiel #9
0
Datei: ref.cpp Projekt: att/uwin
static int find_month(const char *start, const char *end)
{
  static const char *months[] = {
    "january",
    "february",
    "march",
    "april",
    "may",
    "june",
    "july",
    "august",
    "september",
    "october",
    "november",
    "december",
  };
  for (;;) {
    while (start < end && !csalpha(*start))
      start++;
    const char *ptr = start;
    if (start == end)
      break;
    while (ptr < end && csalpha(*ptr))
      ptr++;
    if (ptr - start >= 3) {
      for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
	const char *q = months[i];
	const char *p = start;
	for (; p < ptr; p++, q++)
	  if (cmlower(*p) != *q)
	    break;
	if (p >= ptr)
	  return i;
      }
    }
    start = ptr;
  }
  return -1;
}
Beispiel #10
0
const int *index_search_item::search1(const char **pp, const char *end)
{
  while (*pp < end && !csalnum(**pp))
    *pp += 1;
  if (*pp >= end)
    return 0;
  const char *start = *pp;
  while (*pp < end && csalnum(**pp))
    *pp += 1;
  int len = *pp - start;
  if (len < header.shortest)
    return 0;
  if (len > header.truncate)
    len = header.truncate;
  int is_number = 1;
  for (int i = 0; i < len; i++)
    if (csdigit(start[i]))
      key_buffer[i] = start[i];
    else {
      key_buffer[i] = cmlower(start[i]);
      is_number = 0;
    }
  if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9'))
    return 0;
  unsigned hc = hash(key_buffer, len);
  if (common_words_table) {
    for (int h = hc % common_words_table_size;
	 common_words_table[h];
	 --h) {
      if (strlen(common_words_table[h]) == (size_t)len
	  && memcmp(common_words_table[h], key_buffer, len) == 0)
	return 0;
      if (h == 0)
	h = common_words_table_size;
    }
  }
  int li = table[int(hc % header.table_size)];
  return li < 0 ? &minus_one : lists + li;
}
Beispiel #11
0
static void read_common_words_file()
{
  if (n_ignore_words <= 0)
    return;
  errno = 0;
  FILE *fp = fopen(common_words_file, "r");
  if (!fp)
    fatal("can't open `%1': %2", common_words_file, strerror(errno));
  common_words_table = new word_list * [hash_table_size];
  for (int i = 0; i < hash_table_size; i++)
    common_words_table[i] = 0;
  int count = 0;
  int key_len = 0;
  for (;;) {
    int c = getc(fp);
    while (c != EOF && !csalnum(c))
      c = getc(fp);
    if (c == EOF)
      break;
    do {
      if (key_len < truncate_len)
	key_buffer[key_len++] = cmlower(c);
      c = getc(fp);
    } while (c != EOF && csalnum(c));
    if (key_len >= shortest_len) {
      int h = hash(key_buffer, key_len) % hash_table_size;
      common_words_table[h] = new word_list(key_buffer, key_len,
					    common_words_table[h]);
    }
    if (++count >= n_ignore_words)
      break;
    key_len = 0;
    if (c == EOF)
      break;
  }
  n_ignore_words = count;
  fclose(fp);
}