Пример #1
0
grn_rc
grn_query_scan(grn_ctx *ctx, grn_query *q, const char **strs, unsigned int *str_lens, unsigned int nstrs,
               int flags, int *found, int *score)
{
  unsigned int i;
  grn_rc rc;
  if (!q || !strs || !nstrs) { return GRN_INVALID_ARGUMENT; }
  *found = *score = 0;
  if (!q->snip_conds) {
    if ((rc = alloc_snip_conds(ctx, q))) { return rc; }
    flags |= GRN_QUERY_SCAN_ALLOCCONDS;
  } else if (flags & GRN_QUERY_SCAN_ALLOCCONDS) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "invalid flags specified on grn_query_scan");
    return GRN_INVALID_ARGUMENT;
  }
  for (i = 0; i < nstrs; i++) {
    grn_str *n;
    snip_cond *sc = q->snip_conds;
    int f = GRN_STR_WITH_CHECKS | GRN_STR_REMOVEBLANK;
    if (flags & GRN_QUERY_SCAN_NORMALIZE) { f |= GRN_STR_NORMALIZE; }
    n = grn_str_open(ctx, *(strs + i), *(str_lens + i), f);
    if (!n) { return GRN_NO_MEMORY_AVAILABLE; }
    if ((rc = scan_query(ctx, q, n, i + 1, q->expr, &sc, GRN_OP_OR, flags, found, score))) {
      grn_str_close(ctx, n);
      return rc;
    }
    flags &= ~GRN_QUERY_SCAN_ALLOCCONDS;
    grn_str_close(ctx, n);
  }
  return GRN_SUCCESS;
}
Пример #2
0
void
test_normalize(gconstpointer data)
{
  const gchar *utf8_expected, *encoded_expected;
  const gchar *utf8_input, *encoded_input;
  grn_str *string;
  const gchar *normalized_text;
  guint normalized_text_len;
  int flags;
  grn_encoding encoding;

  encoding = gcut_data_get_int(data, "encoding");
  GRN_CTX_SET_ENCODING(&context, encoding);
  flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;
  utf8_input = gcut_data_get_string(data, "input");
  encoded_input = convert_encoding(utf8_input, encoding);
  string = grn_str_open(&context, encoded_input, strlen(encoded_input), flags);
  normalized_text = cut_take_strndup(string->norm, string->norm_blen);
  normalized_text_len = string->norm_blen;
  grn_test_assert(grn_str_close(&context, string));

  utf8_expected = gcut_data_get_string(data, "expected");
  encoded_expected = convert_encoding(utf8_expected, encoding);
  cut_assert_equal_string(encoded_expected, normalized_text);
  cut_assert_equal_int(strlen(encoded_expected), normalized_text_len);
}
Пример #3
0
void
test_normalize_broken(gconstpointer data)
{
  grn_str *string;
  const gchar *input, *encoded_input;
  grn_encoding input_encoding, context_encoding;
  gint input_length;
  int flags = GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS | GRN_STR_WITH_CTYPES;

  context_encoding = gcut_data_get_int(data, "context-encoding");
  GRN_CTX_SET_ENCODING(&context, context_encoding);

  input = gcut_data_get_string(data, "input");
  input_encoding = gcut_data_get_int(data, "input-encoding");
  input_length = gcut_data_get_int(data, "input-length");
  encoded_input = convert_encoding(input, input_encoding);
  if (input_length < 0) {
    input_length = strlen(encoded_input);
  }
  string = grn_str_open(&context, encoded_input, input_length, flags);
  cut_assert_equal_string("", string->norm);
  cut_assert_equal_int(0, string->norm_blen);
  grn_test_assert(grn_str_close(&context, string));
}
Пример #4
0
int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str,
                         unsigned int str_size, grn_dat_scan_hit *scan_hits,
                         unsigned int max_num_scan_hits, const char **str_rest) {
  if (!grn_dat_open_trie_if_needed(ctx, dat) || !str ||
      !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) {
    return -1;
  }

  grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie);
  if (!trie) {
    return -1;
  }

  if (!max_num_scan_hits || !str_size) {
    if (str_rest) {
      *str_rest = str;
    }
    return 0;
  }

  int num_scan_hits = 0;
  try {
    if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) {
      grn_str * const normalized_str = grn_str_open(
          ctx, str, str_size, GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS);
      if (!normalized_str) {
        fprintf(stderr, "error: grn_str_open() failed!\n");
        return -1;
      }
      str = normalized_str->norm;
      str_size = normalized_str->norm_blen;
      const short *checks = normalized_str->checks;
      unsigned int offset = 0;
      while (str_size) {
        if (*checks) {
          grn::dat::UInt32 key_pos;
          if (trie->lcp_search(str, str_size, &key_pos)) {
            const grn::dat::Key &key = trie->get_key(key_pos);
            const grn::dat::UInt32 key_length = key.length();
            if ((key_length == str_size) || (checks[key_length])) {
              unsigned int length = 0;
              for (grn::dat::UInt32 i = 0; i < key_length; ++i) {
                if (checks[i] > 0) {
                  length += checks[i];
                }
              }
              scan_hits[num_scan_hits].id = key.id();
              scan_hits[num_scan_hits].offset = offset;
              scan_hits[num_scan_hits].length = length;
              offset += length;
              str += key_length;
              str_size -= key_length;
              checks += key_length;
              if (++num_scan_hits >= max_num_scan_hits) {
                break;
              }
              continue;
            }
          }
          if (*checks > 0) {
            offset += *checks;
          }
        }
        ++str;
        --str_size;
        ++checks;
      }
      if (str_rest) {
        *str_rest = normalized_str->orig + offset;
      }
      grn_str_close(ctx, normalized_str);
    } else {
      const char * const begin = str;
      while (str_size) {
        grn::dat::UInt32 key_pos;
        if (trie->lcp_search(str, str_size, &key_pos)) {
          const grn::dat::Key &key = trie->get_key(key_pos);
          scan_hits[num_scan_hits].id = key.id();
          scan_hits[num_scan_hits].offset = str - begin;
          scan_hits[num_scan_hits].length = key.length();
          str += key.length();
          str_size -= key.length();
          if (++num_scan_hits >= max_num_scan_hits) {
            break;
          }
        } else {
          const int char_length = grn_charlen(ctx, str, str + str_size);
          if (char_length) {
            str += char_length;
            str_size -= char_length;
          } else {
            ++str;
            --str_size;
          }
        }
      }
      if (str_rest) {
        *str_rest = str;
      }
    }
  } catch (const grn::dat::Exception &ex) {
    ERR(grn_dat_translate_error_code(ex.code()),
        "grn::dat::lcp_search failed");
    return -1;
  }
  return num_scan_hits;
}