예제 #1
0
파일: snip.c 프로젝트: ry05cga/groonga
void
grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags)
{
  register unsigned char *limit, ck;
  register const unsigned char *p, *cp;
  register size_t *bmBc, delta1, i;

  const unsigned char *x;
  unsigned char *y;
  size_t shift, found;

  const char *string_original;
  unsigned int string_original_length_in_bytes;
  const short *string_checks;
  grn_encoding string_encoding;
  const char *string_norm, *keyword_norm;
  unsigned int n, m;

  grn_string_get_original(ctx, string,
                          &string_original, &string_original_length_in_bytes);
  string_checks = grn_string_get_checks(ctx, string);
  string_encoding = grn_string_get_encoding(ctx, string);
  grn_string_get_normalized(ctx, string, &string_norm, &n, NULL);
  grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL);

  y = (unsigned char *)string_norm;
  if (m == 1) {
    if (n > cond->found) {
      shift = 1;
      p = memchr(y + cond->found, keyword_norm[0], n - cond->found);
      if (p != NULL) {
        found = p - y;
        GRN_BM_COMPARE;
      }
    }
    cond->stopflag = SNIPCOND_STOP;
    return;
  }

  x = (unsigned char *)keyword_norm;
  bmBc = cond->bmBc;
  shift = cond->shift;

  /* Restart */
  p = y + m + cond->found;
  cp = x + m;
  ck = cp[-2];

  /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
  if (n - cond->found > 12 * m) {
    limit = y + n - 11 * m;
    while (p <= limit) {
      p += bmBc[p[-1]];
      if(!(delta1 = bmBc[p[-1]])) {
        goto check;
      }
      p += delta1;
      p += bmBc[p[-1]];
      p += bmBc[p[-1]];
      if(!(delta1 = bmBc[p[-1]])) {
        goto check;
      }
      p += delta1;
      p += bmBc[p[-1]];
      p += bmBc[p[-1]];
      if(!(delta1 = bmBc[p[-1]])) {
        goto check;
      }
      p += delta1;
      p += bmBc[p[-1]];
      p += bmBc[p[-1]];
      continue;
    check:
      GRN_BM_BM_COMPARE;
      p += shift;
    }
  }
  /* limit check + search */
  limit = y + n;
  while(p <= limit) {
    if (!(delta1 = bmBc[p[-1]])) {
      GRN_BM_BM_COMPARE;
      p += shift;
    }
    p += delta1;
  }
  cond->stopflag = SNIPCOND_STOP;
}
예제 #2
0
파일: dat.cpp 프로젝트: bossato/groonga
    int
    grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str,
                 unsigned int str_size, grn_dat_scan_hit *scan_hits,
                 unsigned int max_num_scan_hits, const char **str_rest) {
        if (!grn_dat_open_trie_if_needed(ctx, dat) || !str ||
                !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) {
            return -1;
        }

        grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie);
        if (!trie) {
            return -1;
        }

        if (!max_num_scan_hits || !str_size) {
            if (str_rest) {
                *str_rest = str;
            }
            return 0;
        }

        int num_scan_hits = 0;
        try {
            if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) {
                grn_obj *normalizer = GRN_NORMALIZER_AUTO;
                int flags = GRN_STRING_WITH_CHECKS;
                grn_obj * const normalized_string = grn_string_open(ctx, str, str_size,
                                                    normalizer,
                                                    flags);
                if (!normalized_string) {
                    fprintf(stderr, "error: grn_string_open() failed!\n");
                    return -1;
                }
                grn_string_get_normalized(ctx, normalized_string, &str, &str_size, NULL);
                const short *checks = grn_string_get_checks(ctx, normalized_string);
                unsigned int offset = 0;
                while (str_size) {
                    if (*checks) {
                        grn::dat::UInt32 key_pos;
                        if (trie->lcp_search(str, str_size, &key_pos)) {
                            const grn::dat::Key &key = trie->get_key(key_pos);
                            const grn::dat::UInt32 key_length = key.length();
                            if ((key_length == str_size) || (checks[key_length])) {
                                unsigned int length = 0;
                                for (grn::dat::UInt32 i = 0; i < key_length; ++i) {
                                    if (checks[i] > 0) {
                                        length += checks[i];
                                    }
                                }
                                scan_hits[num_scan_hits].id = key.id();
                                scan_hits[num_scan_hits].offset = offset;
                                scan_hits[num_scan_hits].length = length;
                                offset += length;
                                str += key_length;
                                str_size -= key_length;
                                checks += key_length;
                                if (++num_scan_hits >= max_num_scan_hits) {
                                    break;
                                }
                                continue;
                            }
                        }
                        if (*checks > 0) {
                            offset += *checks;
                        }
                    }
                    ++str;
                    --str_size;
                    ++checks;
                }
                if (str_rest) {
                    grn_string_get_original(ctx, normalized_string, str_rest, NULL);
                    *str_rest += offset;
                }
                grn_obj_close(ctx, normalized_string);
            } else {
                const char * const begin = str;
                while (str_size) {
                    grn::dat::UInt32 key_pos;
                    if (trie->lcp_search(str, str_size, &key_pos)) {
                        const grn::dat::Key &key = trie->get_key(key_pos);
                        scan_hits[num_scan_hits].id = key.id();
                        scan_hits[num_scan_hits].offset = str - begin;
                        scan_hits[num_scan_hits].length = key.length();
                        str += key.length();
                        str_size -= key.length();
                        if (++num_scan_hits >= max_num_scan_hits) {
                            break;
                        }
                    } else {
                        const int char_length = grn_charlen(ctx, str, str + str_size);
                        if (char_length) {
                            str += char_length;
                            str_size -= char_length;
                        } else {
                            ++str;
                            --str_size;
                        }
                    }
                }
                if (str_rest) {
                    *str_rest = str;
                }
            }
        } catch (const grn::dat::Exception &ex) {
            ERR(grn_dat_translate_error_code(ex.code()),
                "grn::dat::lcp_search failed");
            return -1;
        }
        return num_scan_hits;
    }