Esempio n. 1
0
static grn_obj *
func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args,
                    grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;
  grn_obj *string;
  grn_obj *lexicon = NULL;
  grn_obj *expression = NULL;
  grn_highlighter *highlighter;
  grn_obj *highlighter_ptr;

  if (!(1 <= nargs && nargs <= 2)) {
    GRN_PLUGIN_ERROR(ctx,
                     GRN_INVALID_ARGUMENT,
                     "highlight_html(): wrong number of arguments (%d for 1..2)",
                     nargs);
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
    return highlighted;
  }

  string = args[0];
  if (nargs == 2) {
    lexicon = args[1];
  }

  grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);

  highlighter_ptr = grn_expr_get_var(ctx, expression,
                                     GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                     strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
  if (highlighter_ptr) {
    highlighter = (grn_highlighter *)GRN_PTR_VALUE(highlighter_ptr);
  } else {
    highlighter_ptr =
      grn_expr_get_or_add_var(ctx, expression,
                              GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                              strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
    GRN_OBJ_FIN(ctx, highlighter_ptr);
    GRN_PTR_INIT(highlighter_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

    highlighter = func_highlight_html_create_highlighter(ctx, expression);
    grn_highlighter_set_lexicon(ctx, highlighter, lexicon);
    GRN_PTR_SET(ctx, highlighter_ptr, highlighter);
  }

  highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
  grn_highlighter_highlight(ctx,
                            highlighter,
                            GRN_TEXT_VALUE(string),
                            GRN_TEXT_LEN(string),
                            highlighted);

  return highlighted;
}
Esempio n. 2
0
static grn_obj *
func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args,
                    grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;

#define N_REQUIRED_ARGS 1
  if (nargs == N_REQUIRED_ARGS) {
    grn_obj *string = args[0];
    grn_obj *expression = NULL;
    grn_obj *keywords;
    grn_obj *keywords_ptr;
    grn_bool use_html_escape = GRN_TRUE;

    grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);

    keywords_ptr = grn_expr_get_var(ctx, expression,
                                    GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                    strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
    if (keywords_ptr) {
      keywords = GRN_PTR_VALUE(keywords_ptr);
    } else {
      keywords_ptr =
        grn_expr_get_or_add_var(ctx, expression,
                                GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
      GRN_OBJ_FIN(ctx, keywords_ptr);
      GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

      keywords = func_highlight_html_create_keywords_table(ctx, expression);
      GRN_PTR_SET(ctx, keywords_ptr, keywords);
    }

    highlighted = highlight_keywords(ctx, user_data,
                                     string, keywords, use_html_escape,
                                     "<span class=\"keyword\">",
                                     strlen("<span class=\"keyword\">"),
                                     "</span>",
                                     strlen("</span>"));
  }
#undef N_REQUIRED_ARGS

  if (!highlighted) {
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return highlighted;
}
Esempio n. 3
0
static grn_obj *
func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression)
{
  grn_obj *keywords;
  grn_obj *condition_ptr = NULL;
  grn_obj *condition = NULL;

  keywords = grn_table_create(ctx, NULL, 0, NULL,
                              GRN_OBJ_TABLE_PAT_KEY,
                              grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
                              NULL);

  {
    grn_obj *normalizer;
    normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1);
    grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer);
    grn_obj_unlink(ctx, normalizer);
  }

  condition_ptr = grn_expr_get_var(ctx, expression,
                                   GRN_SELECT_INTERNAL_VAR_CONDITION,
                                   strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
  if (condition_ptr) {
    condition = GRN_PTR_VALUE(condition_ptr);
  }

  if (condition) {
    size_t i, n_keywords;
    grn_obj current_keywords;
    GRN_PTR_INIT(&current_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL);
    grn_expr_get_keywords(ctx, condition, &current_keywords);

    n_keywords = GRN_BULK_VSIZE(&current_keywords) / sizeof(grn_obj *);
    for (i = 0; i < n_keywords; i++) {
      grn_obj *keyword;
      keyword = GRN_PTR_VALUE_AT(&current_keywords, i);
      grn_table_add(ctx, keywords,
                    GRN_TEXT_VALUE(keyword),
                    GRN_TEXT_LEN(keyword),
                    NULL);
    }
    grn_obj_unlink(ctx, &current_keywords);
  }

  return keywords;
}
Esempio n. 4
0
static grn_highlighter *
func_highlight_html_create_highlighter(grn_ctx *ctx, grn_obj *expression)
{
  grn_highlighter *highlighter;
  grn_obj *condition_ptr = NULL;
  grn_obj *condition = NULL;

  highlighter = grn_highlighter_open(ctx);

  condition_ptr = grn_expr_get_var(ctx, expression,
                                   GRN_SELECT_INTERNAL_VAR_CONDITION,
                                   strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
  if (condition_ptr) {
    condition = GRN_PTR_VALUE(condition_ptr);
  }

  if (condition) {
    size_t i, n_keywords;
    grn_obj current_keywords;
    GRN_TEXT_INIT(&current_keywords, GRN_OBJ_VECTOR);
    grn_expr_get_keywords(ctx, condition, &current_keywords);

    n_keywords = grn_vector_size(ctx, &current_keywords);
    for (i = 0; i < n_keywords; i++) {
      const char *keyword;
      unsigned int keyword_size;
      keyword_size = grn_vector_get_element(ctx,
                                            &current_keywords,
                                            i,
                                            &keyword,
                                            NULL,
                                            NULL);
      grn_highlighter_add_keyword(ctx,
                                  highlighter,
                                  keyword,
                                  keyword_size);
    }
    GRN_OBJ_FIN(ctx, &current_keywords);
  }

  return highlighter;
}
Esempio n. 5
0
static grn_obj *
func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

  /* TODO: support parameters */
  if (nargs == 1) {
    grn_obj *text = args[0];
    grn_obj *expression = NULL;
    grn_obj *condition_ptr = NULL;
    grn_obj *condition = NULL;
    grn_obj *snip = NULL;
    int flags = GRN_SNIP_SKIP_LEADING_SPACES;
    unsigned int width = 200;
    unsigned int max_n_results = 3;
    const char *open_tag = "<span class=\"keyword\">";
    const char *close_tag = "</span>";
    grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;

    grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);
    condition_ptr = grn_expr_get_var(ctx, expression,
                                     GRN_SELECT_INTERNAL_VAR_CONDITION,
                                     strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
    if (condition_ptr) {
      condition = GRN_PTR_VALUE(condition_ptr);
    }

    if (condition) {
      grn_obj *snip_ptr;
      snip_ptr = grn_expr_get_var(ctx, expression,
                                  GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
                                  strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
      if (snip_ptr) {
        snip = GRN_PTR_VALUE(snip_ptr);
      } else {
        snip_ptr =
          grn_expr_get_or_add_var(ctx, expression,
                                  GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
                                  strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
        GRN_OBJ_FIN(ctx, snip_ptr);
        GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

        snip = grn_snip_open(ctx, flags, width, max_n_results,
                             open_tag, strlen(open_tag),
                             close_tag, strlen(close_tag),
                             mapping);
        if (snip) {
          grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
          grn_expr_snip_add_conditions(ctx, condition, snip,
                                       0, NULL, NULL, NULL, NULL);
          GRN_PTR_SET(ctx, snip_ptr, snip);
        }
      }
    }

    if (snip) {
      snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0);
    }
  }

  if (!snippets) {
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
Esempio n. 6
0
static grn_rc
selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index,
                      int nargs, grn_obj **args,
                      grn_obj *res, grn_operator op)
{
  grn_rc rc = GRN_SUCCESS;
  grn_obj *target = NULL;
  grn_obj *obj;
  grn_obj *query;
  grn_obj *hash_args_ptr;
  uint32_t max_distance = 1;
  uint32_t prefix_length = 0;
  uint32_t prefix_match_size = 0;
  uint32_t max_expansion = 0;
  int flags = 0;
  grn_bool use_sequential_search = GRN_FALSE;

  if ((nargs - 1) < 2) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "fuzzy_search(): wrong number of arguments (%d ...)",
                     nargs - 1);
    rc = ctx->rc;
    goto exit;
  }
  obj = args[1];
  query = args[2];

  if (nargs == 4) {
    grn_obj *hash;
    grn_hash_cursor *cursor;
    void *key;
    grn_obj *value;
    int key_size, value_size;
    hash_args_ptr = args[3];
    if (hash_args_ptr->header.type == GRN_PTR) {
      hash = GRN_PTR_VALUE(hash_args_ptr);
    }
    if (hash->header.type != GRN_TABLE_HASH_KEY) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "fuzzy_search(): 3rd argument must be object literal: <%.*s>",
                       (int)GRN_TEXT_LEN(args[3]), GRN_TEXT_VALUE(args[3]));
      goto exit;
    }
    hash = GRN_PTR_VALUE(hash_args_ptr);

    if (!(cursor = grn_hash_cursor_open(ctx, (grn_hash *)hash, NULL, 0, NULL, 0, 0, -1, 0))) {
      GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                       "fuzzy_search(): couldn't open cursor");
      goto exit;
    }
    while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
      value_size = grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, (void **)&value);

      if (key_size == 12 && !memcmp(key, "max_distance", 12)) {
        max_distance = GRN_UINT32_VALUE(value);
      } else if (key_size == 13 && !memcmp(key, "prefix_length", 13)) {
        prefix_length = GRN_UINT32_VALUE(value);
      } else if (key_size == 13 && !memcmp(key, "max_expansion", 13)) {
        max_expansion = GRN_UINT32_VALUE(value);
      } else if (key_size == 18 && !memcmp(key, "with_transposition", 18)) {
        if (GRN_BOOL_VALUE(value)) {
          flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION;
        }
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: %.*s",
                         key_size, (char *)key);
        grn_hash_cursor_close(ctx, cursor);
        goto exit;
      }
    }
    grn_hash_cursor_close(ctx, cursor);
  }

  if (index) {
    target = index;
  } else {
    if (obj->header.type == GRN_COLUMN_INDEX) {
      target = obj;
    } else {
      grn_column_index(ctx, obj, GRN_OP_FUZZY, &target, 1, NULL);
    }
  }

  if (target) {
    grn_obj *lexicon;
    use_sequential_search = GRN_TRUE;
    lexicon = grn_ctx_at(ctx, target->header.domain);
    if (lexicon) {
      if (lexicon->header.type == GRN_TABLE_PAT_KEY) {
        use_sequential_search = GRN_FALSE;
      }
      grn_obj_unlink(ctx, lexicon);
    }
  } else {
    if (grn_obj_is_key_accessor(ctx, obj) &&
        table->header.type == GRN_TABLE_PAT_KEY) {
      target = table;
    } else {
      use_sequential_search = GRN_TRUE;
    }
  }

  if (prefix_length) {
    const char *s = GRN_TEXT_VALUE(query);
    const char *e = GRN_BULK_CURR(query);
    const char *p;
    unsigned int cl = 0;
    unsigned int length = 0;
    for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) {
      length++;
      if (length > prefix_length) {
        break;
      }
    }
    prefix_match_size = p - s;
  }

  if (use_sequential_search) {
    rc = sequential_fuzzy_search(ctx, table, obj, query,
                                 max_distance, prefix_match_size,
                                 max_expansion, flags, res, op);
    goto exit;
  }

  if (!target) {
    grn_obj inspected;
    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, target);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "fuzzy_search(): "
                     "column must be COLUMN_INDEX or TABLE_PAT_KEY: <%.*s>",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    rc = ctx->rc;
    GRN_OBJ_FIN(ctx, &inspected);
  } else {
    grn_search_optarg options = {0};
    options.mode = GRN_OP_FUZZY;
    options.fuzzy.prefix_match_size = prefix_match_size;
    options.fuzzy.max_distance = max_distance;
    options.fuzzy.max_expansion = max_expansion;
    options.fuzzy.flags = flags;
    grn_obj_search(ctx, target, query, res, op, &options);
  }

exit :
  return rc;
}