Esempio n. 1
0
static grn_bool
string_match_regexp(grn_ctx *ctx,
                    const char *target, unsigned int target_len,
                    const char *pattern, unsigned int pattern_len)
{
#ifdef GRN_SUPPORT_REGEXP
  OnigRegex regex;
  grn_bool matched;

  regex = grn_onigmo_new(ctx,
                         pattern, pattern_len,
                         GRN_ONIGMO_OPTION_DEFAULT,
                         GRN_ONIGMO_SYNTAX_DEFAULT,
                         "[operator]");
  if (!regex) {
    return GRN_FALSE;
  }

  matched = regexp_is_match(ctx, regex, target, target_len);
  onig_free(regex);
  return matched;
#else /* GRN_SUPPORT_REGEXP */
  return GRN_FALSE;
#endif /* GRN_SUPPORT_REGEXP */
}
Esempio n. 2
0
static grn_bool
string_match_regexp(grn_ctx *ctx,
                    const char *target, unsigned int target_len,
                    const char *pattern, unsigned int pattern_len)
{
#ifdef GRN_SUPPORT_REGEXP
  OnigRegex regex;
  grn_bool matched;

  regex = regexp_compile(ctx, pattern, pattern_len, ONIG_SYNTAX_RUBY);
  if (!regex) {
    return GRN_FALSE;
  }

  matched = regexp_is_match(ctx, regex, target, target_len);
  onig_free(regex);
  return matched;
#else /* GRN_SUPPORT_REGEXP */
  return GRN_FALSE;
#endif /* GRN_SUPPORT_REGEXP */
}
Esempio n. 3
0
static grn_bool
string_have_sub_text(grn_ctx *ctx,
                     const char *text, unsigned int text_len,
                     const char *sub_text, unsigned int sub_text_len)
{
  if (sub_text_len == 0) {
    return GRN_FALSE;
  }

  if (sub_text_len > text_len) {
    return GRN_FALSE;
  }

#ifdef GRN_SUPPORT_REGEXP
  if (grn_onigmo_is_valid_encoding(ctx)) {
    OnigRegex regex;
    grn_bool matched;

    regex = grn_onigmo_new(ctx,
                           sub_text,
                           sub_text_len,
                           GRN_ONIGMO_OPTION_DEFAULT,
                           ONIG_SYNTAX_ASIS,
                           "[operator]");
    if (!regex) {
      return GRN_FALSE;
    }

    matched = regexp_is_match(ctx, regex, text, text_len);
    onig_free(regex);
    return matched;
  }
#endif /* GRN_SUPPORT_REGEXP */
  {
    const char *text_current = text;
    const char *text_end = text + text_len;
    const char *sub_text_current = sub_text;
    const char *sub_text_end = sub_text + sub_text_len;
    int sub_text_start_char_len;
    int sub_text_char_len;

    sub_text_start_char_len = grn_charlen(ctx, sub_text, sub_text_end);
    if (sub_text_start_char_len == 0) {
      return GRN_FALSE;
    }
    sub_text_char_len = sub_text_start_char_len;

    while (text_current < text_end) {
      int text_char_len;

      text_char_len = grn_charlen(ctx, text_current, text_end);
      if (text_char_len == 0) {
        return GRN_FALSE;
      }

      if (text_char_len == sub_text_char_len &&
          memcmp(text_current, sub_text_current, text_char_len) == 0) {
        sub_text_current += sub_text_char_len;
        if (sub_text_current == sub_text_end) {
          return GRN_TRUE;
        }

        sub_text_char_len = grn_charlen(ctx, sub_text_current, sub_text_end);
        if (sub_text_char_len == 0) {
          return GRN_FALSE;
        }
      } else {
        if (sub_text_current != sub_text) {
          sub_text_current = sub_text;
          sub_text_char_len = sub_text_start_char_len;
          continue;
        }
      }

      text_current += text_char_len;
    }

    return GRN_FALSE;
  }
}
Esempio n. 4
0
static grn_bool
exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern)
{
#ifdef GRN_SUPPORT_REGEXP
  grn_obj *normalizer = NULL;
  grn_bool matched = GRN_FALSE;
  unsigned int i, size;
  OnigRegex regex;

  size = grn_vector_size(ctx, vector);
  if (size == 0) {
    return GRN_FALSE;
  }

  regex = grn_onigmo_new(ctx,
                         GRN_TEXT_VALUE(pattern),
                         GRN_TEXT_LEN(pattern),
                         GRN_ONIGMO_OPTION_DEFAULT,
                         GRN_ONIGMO_SYNTAX_DEFAULT,
                         "[operator]");
  if (!regex) {
    return GRN_FALSE;
  }

  normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
  for (i = 0; i < size; i++) {
    const char *content;
    unsigned int content_size;
    grn_id domain_id;
    grn_obj *norm_content;
    const char *norm_content_raw;
    unsigned int norm_content_raw_length_in_bytes;

    content_size = grn_vector_get_element(ctx, vector, i,
                                          &content, NULL, &domain_id);
    if (content_size == 0) {
      continue;
    }

    norm_content = grn_string_open(ctx, content, content_size, normalizer, 0);
    grn_string_get_normalized(ctx, norm_content,
                              &norm_content_raw,
                              &norm_content_raw_length_in_bytes,
                              NULL);

    matched = regexp_is_match(ctx, regex,
                              norm_content_raw,
                              norm_content_raw_length_in_bytes);

    grn_obj_unlink(ctx, norm_content);

    if (matched) {
      break;
    }
  }
  grn_obj_unlink(ctx, normalizer);

  onig_free(regex);

  return matched;
#else /* GRN_SUPPORT_REGEXP */
  return GRN_FALSE;
#endif /* GRN_SUPPORT_REGEXP */
}
Esempio n. 5
0
static grn_bool
exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern)
{
#ifdef GRN_SUPPORT_REGEXP
  grn_bool matched = GRN_FALSE;
  unsigned int i, size;
  OnigRegex regex;
  grn_obj *domain;
  grn_obj *normalizer;
  grn_obj *normalizer_auto = NULL;

  size = grn_uvector_size(ctx, uvector);
  if (size == 0) {
    return GRN_FALSE;
  }

  regex = grn_onigmo_new(ctx,
                         GRN_TEXT_VALUE(pattern),
                         GRN_TEXT_LEN(pattern),
                         GRN_ONIGMO_OPTION_DEFAULT,
                         GRN_ONIGMO_SYNTAX_DEFAULT,
                         "[operator]");
  if (!regex) {
    return GRN_FALSE;
  }

  domain = grn_ctx_at(ctx, uvector->header.domain);
  if (!domain) {
    onig_free(regex);
    return GRN_FALSE;
  }

  grn_table_get_info(ctx, domain, NULL, NULL, NULL, &normalizer, NULL);
  if (!normalizer) {
    normalizer_auto = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
  }

  for (i = 0; i < size; i++) {
    grn_id record_id;
    char key[GRN_TABLE_MAX_KEY_SIZE];
    int key_size;

    record_id = grn_uvector_get_element(ctx, uvector, i, NULL);
    key_size = grn_table_get_key(ctx, domain, record_id,
                                 key, GRN_TABLE_MAX_KEY_SIZE);
    if (key_size == 0) {
      continue;
    }

    if (normalizer) {
      matched = regexp_is_match(ctx, regex, key, key_size);
    } else {
      grn_obj *norm_key;
      const char *norm_key_raw;
      unsigned int norm_key_raw_length_in_bytes;

      norm_key = grn_string_open(ctx, key, key_size, normalizer_auto, 0);
      grn_string_get_normalized(ctx, norm_key,
                                &norm_key_raw,
                                &norm_key_raw_length_in_bytes,
                                NULL);
      matched = regexp_is_match(ctx, regex,
                                norm_key_raw,
                                norm_key_raw_length_in_bytes);
      grn_obj_unlink(ctx, norm_key);
    }

    if (matched) {
      break;
    }
  }

  if (normalizer_auto) {
    grn_obj_unlink(ctx, normalizer_auto);
  }

  grn_obj_unlink(ctx, domain);

  onig_free(regex);

  return matched;
#else /* GRN_SUPPORT_REGEXP */
  return GRN_FALSE;
#endif /* GRN_SUPPORT_REGEXP */
}