コード例 #1
0
ファイル: proc_snippet.c プロジェクト: cafedomancer/groonga
static grn_obj *
snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text,
             grn_user_data *user_data,
             const char *prefix, int prefix_length,
             const char *suffix, int suffix_length)
{
  grn_rc rc;
  unsigned int i, n_results, max_tagged_length;
  grn_obj snippet_buffer;
  grn_obj *snippets;

  if (GRN_TEXT_LEN(text) == 0) {
    return NULL;
  }

  rc = grn_snip_exec(ctx, snip,
                     GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text),
                     &n_results, &max_tagged_length);
  if (rc != GRN_SUCCESS) {
    return NULL;
  }

  if (n_results == 0) {
    return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR);
  if (!snippets) {
    return NULL;
  }

  GRN_TEXT_INIT(&snippet_buffer, 0);
  grn_bulk_space(ctx, &snippet_buffer,
                 prefix_length + max_tagged_length + suffix_length);
  for (i = 0; i < n_results; i++) {
    unsigned int snippet_length;

    GRN_BULK_REWIND(&snippet_buffer);
    if (prefix_length) {
      GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length);
    }
    rc = grn_snip_get_result(ctx, snip, i,
                             GRN_TEXT_VALUE(&snippet_buffer) + prefix_length,
                             &snippet_length);
    if (rc == GRN_SUCCESS) {
      grn_strncat(GRN_TEXT_VALUE(&snippet_buffer),
                  GRN_BULK_WSIZE(&snippet_buffer),
                  suffix,
                  suffix_length);
      grn_vector_add_element(ctx, snippets,
                             GRN_TEXT_VALUE(&snippet_buffer),
                             prefix_length + snippet_length + suffix_length,
                             0, GRN_DB_SHORT_TEXT);
    }
  }
  GRN_OBJ_FIN(ctx, &snippet_buffer);

  return snippets;
}
コード例 #2
0
ファイル: proc_highlight.c プロジェクト: groonga/groonga
static grn_obj *
func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args,
                    grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;
  grn_obj *string;
  grn_obj *lexicon = NULL;
  grn_obj *expression = NULL;
  grn_highlighter *highlighter;
  grn_obj *highlighter_ptr;

  if (!(1 <= nargs && nargs <= 2)) {
    GRN_PLUGIN_ERROR(ctx,
                     GRN_INVALID_ARGUMENT,
                     "highlight_html(): wrong number of arguments (%d for 1..2)",
                     nargs);
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
    return highlighted;
  }

  string = args[0];
  if (nargs == 2) {
    lexicon = args[1];
  }

  grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);

  highlighter_ptr = grn_expr_get_var(ctx, expression,
                                     GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                     strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
  if (highlighter_ptr) {
    highlighter = (grn_highlighter *)GRN_PTR_VALUE(highlighter_ptr);
  } else {
    highlighter_ptr =
      grn_expr_get_or_add_var(ctx, expression,
                              GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                              strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
    GRN_OBJ_FIN(ctx, highlighter_ptr);
    GRN_PTR_INIT(highlighter_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

    highlighter = func_highlight_html_create_highlighter(ctx, expression);
    grn_highlighter_set_lexicon(ctx, highlighter, lexicon);
    GRN_PTR_SET(ctx, highlighter_ptr, highlighter);
  }

  highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
  grn_highlighter_highlight(ctx,
                            highlighter,
                            GRN_TEXT_VALUE(string),
                            GRN_TEXT_LEN(string),
                            highlighted);

  return highlighted;
}
コード例 #3
0
ファイル: proc_highlight.c プロジェクト: ohkubo/groonga
static grn_obj *
highlight_keywords(grn_ctx *ctx, grn_user_data *user_data,
                   grn_obj *string, grn_obj *keywords, grn_bool use_html_escape,
                   const char *default_open_tag, unsigned int default_open_tag_length,
                   const char *default_close_tag, unsigned int default_close_tag_length)
{
  grn_obj *highlighted = NULL;
  const char *open_tags[1];
  unsigned int open_tag_lengths[1];
  const char *close_tags[1];
  unsigned int close_tag_lengths[1];
  unsigned int n_keyword_sets = 1;

  open_tags[0] = default_open_tag;
  open_tag_lengths[0] = default_open_tag_length;
  close_tags[0] = default_close_tag;
  close_tag_lengths[0] = default_close_tag_length;

  highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
  grn_pat_tag_keys(ctx, keywords,
                   GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
                   open_tags,
                   open_tag_lengths,
                   close_tags,
                   close_tag_lengths,
                   n_keyword_sets,
                   highlighted,
                   use_html_escape);

  return highlighted;
}
コード例 #4
0
ファイル: tsv.c プロジェクト: cosmo0920/groonga
static grn_obj *
func_query_expander_tsv(grn_ctx *ctx, int nargs, grn_obj **args,
                        grn_user_data *user_data)
{
  grn_rc rc = GRN_END_OF_DATA;
  grn_id id;
  grn_obj *term, *expanded_term;
  void *value;
  grn_obj *rc_object;

  term = args[0];
  expanded_term = args[1];
  id = grn_hash_get(ctx, synonyms,
                    GRN_TEXT_VALUE(term), GRN_TEXT_LEN(term),
                    &value);
  if (id != GRN_ID_NIL) {
    const char *query = value;
    GRN_TEXT_PUTS(ctx, expanded_term, query);
    rc = GRN_SUCCESS;
  }

  rc_object = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_INT32, 0);
  if (rc_object) {
    GRN_INT32_SET(ctx, rc_object, rc);
  }

  return rc_object;
}
コード例 #5
0
static grn_obj *
func_snippet_tritonn(grn_ctx *ctx, int nargs, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

  if (nargs > 10) {
    grn_obj *text = args[0];
    grn_obj *snip = NULL;
    unsigned int width = GRN_UINT64_VALUE(args[1]);
    unsigned int max_n_results = GRN_UINT64_VALUE(args[2]);
    grn_snip_mapping *mapping = NULL;

    int flags = GRN_SNIP_COPY_TAG;

    if(GRN_UINT64_VALUE(args[4])){
      flags |= GRN_SNIP_SKIP_LEADING_SPACES;
    }
    if(GRN_UINT64_VALUE(args[5])){
      mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
    }
    snip = grn_snip_open(ctx, flags, width, max_n_results, "", 0, "", 0, mapping);

    if (snip) {
      grn_rc rc;
      unsigned int i;

      if(GRN_TEXT_LEN(args[3])){
        grn_obj * normalizer;
        normalizer = grn_ctx_get(ctx, GRN_TEXT_VALUE(args[3]), GRN_TEXT_LEN(args[3]));
        grn_snip_set_normalizer(ctx, snip, normalizer); 
      }

      for(i = 8; i < (unsigned int)nargs; i += 3){
        rc = grn_snip_add_cond(ctx, snip,
                               GRN_TEXT_VALUE(args[i]), GRN_TEXT_LEN(args[i]),
                               GRN_TEXT_VALUE(args[i + 1]), GRN_TEXT_LEN(args[i + 1]),
                               GRN_TEXT_VALUE(args[i + 2]), GRN_TEXT_LEN(args[i + 2]));
      }

      snippets = snippet_exec(ctx, snip, text, user_data, args);
      grn_obj_close(ctx, snip);
    }
  }
  if(!snippets){
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
コード例 #6
0
ファイル: proc_highlight.c プロジェクト: ohkubo/groonga
static grn_obj *
func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args,
                    grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;

#define N_REQUIRED_ARGS 1
  if (nargs == N_REQUIRED_ARGS) {
    grn_obj *string = args[0];
    grn_obj *expression = NULL;
    grn_obj *keywords;
    grn_obj *keywords_ptr;
    grn_bool use_html_escape = GRN_TRUE;

    grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);

    keywords_ptr = grn_expr_get_var(ctx, expression,
                                    GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                    strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
    if (keywords_ptr) {
      keywords = GRN_PTR_VALUE(keywords_ptr);
    } else {
      keywords_ptr =
        grn_expr_get_or_add_var(ctx, expression,
                                GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
                                strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
      GRN_OBJ_FIN(ctx, keywords_ptr);
      GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

      keywords = func_highlight_html_create_keywords_table(ctx, expression);
      GRN_PTR_SET(ctx, keywords_ptr, keywords);
    }

    highlighted = highlight_keywords(ctx, user_data,
                                     string, keywords, use_html_escape,
                                     "<span class=\"keyword\">",
                                     strlen("<span class=\"keyword\">"),
                                     "</span>",
                                     strlen("</span>"));
  }
#undef N_REQUIRED_ARGS

  if (!highlighted) {
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return highlighted;
}
コード例 #7
0
ファイル: vector.c プロジェクト: digideskio/groonga
static grn_obj *
func_vector_size(grn_ctx *ctx, int n_args, grn_obj **args,
                 grn_user_data *user_data)
{
  grn_obj *target;
  unsigned int size;
  grn_obj *grn_size;

  if (n_args != 1) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "vector_size(): wrong number of arguments (%d for 1)",
                     n_args);
    return NULL;
  }

  target = args[0];
  switch (target->header.type) {
  case GRN_VECTOR :
  case GRN_PVECTOR :
  case GRN_UVECTOR :
    size = grn_vector_size(ctx, target);
    break;
  default :
    {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, target, &inspected);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "vector_size(): target object must be vector: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
    break;
  }

  grn_size = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0);
  if (!grn_size) {
    return NULL;
  }

  GRN_UINT32_SET(ctx, grn_size, size);

  return grn_size;
}
コード例 #8
0
ファイル: proc_fuzzy_search.c プロジェクト: XLPE/groonga
static grn_obj *
func_edit_distance(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
#define N_REQUIRED_ARGS 2
#define MAX_ARGS 3
  int d = 0;
  int flags = 0;
  grn_obj *obj;
  if (nargs >= N_REQUIRED_ARGS && nargs <= MAX_ARGS) {
    if (nargs == MAX_ARGS && GRN_BOOL_VALUE(args[2])) {
      flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION;
    }
    d = calc_edit_distance(ctx, GRN_TEXT_VALUE(args[0]), GRN_BULK_CURR(args[0]),
                           GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1]), flags);
  }
  if ((obj = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0))) {
    GRN_UINT32_SET(ctx, obj, d);
  }
  return obj;
#undef N_REQUIRED_ARGS
#undef MAX_ARGS
}
コード例 #9
0
ファイル: proc_highlight.c プロジェクト: groonga/groonga
static grn_obj *
func_highlight_full(grn_ctx *ctx, int nargs, grn_obj **args,
                    grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;

#define N_REQUIRED_ARGS 3
#define KEYWORD_SET_SIZE 3
  if ((nargs >= (N_REQUIRED_ARGS + KEYWORD_SET_SIZE) &&
      (nargs - N_REQUIRED_ARGS) % KEYWORD_SET_SIZE == 0)) {
    grn_obj *string = args[0];
    grn_obj *keywords;
    const char *normalizer_name = GRN_TEXT_VALUE(args[1]);
    unsigned int normalizer_name_length = GRN_TEXT_LEN(args[1]);
    grn_bool use_html_escape = GRN_BOOL_VALUE(args[2]);

    keywords =
      func_highlight_create_keywords_table(ctx, user_data,
                                           normalizer_name,
                                           normalizer_name_length);
    if (keywords) {
      highlighted = highlight_keyword_sets(ctx, user_data,
                                           args + N_REQUIRED_ARGS,
                                           nargs - N_REQUIRED_ARGS,
                                           string, keywords,
                                           use_html_escape);
      grn_obj_unlink(ctx, keywords);
    }
  }

  if (!highlighted) {
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }
#undef KEYWORD_SET_SIZE
#undef N_REQUIRED_ARGS

  return highlighted;
}
コード例 #10
0
ファイル: time.c プロジェクト: digideskio/groonga
static grn_obj *
func_time_classify_raw(grn_ctx *ctx,
                       int n_args,
                       grn_obj **args,
                       grn_user_data *user_data,
                       const char *function_name,
                       grn_time_classify_unit unit)
{
  grn_obj *time;
  uint32_t interval_raw = 1;
  grn_obj *classed_time;
  grn_bool accept_interval = GRN_TRUE;

  switch (unit) {
  case GRN_TIME_CLASSIFY_UNIT_SECOND :
  case GRN_TIME_CLASSIFY_UNIT_MINUTE :
  case GRN_TIME_CLASSIFY_UNIT_HOUR :
    accept_interval = GRN_TRUE;
    break;
  case GRN_TIME_CLASSIFY_UNIT_DAY :
  case GRN_TIME_CLASSIFY_UNIT_WEEK :
    accept_interval = GRN_FALSE;
    break;
  case GRN_TIME_CLASSIFY_UNIT_MONTH :
  case GRN_TIME_CLASSIFY_UNIT_YEAR :
    accept_interval = GRN_TRUE;
    break;
  }

  if (accept_interval) {
    if (!(n_args == 1 || n_args == 2)) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "%s(): "
                       "wrong number of arguments (%d for 1..2)",
                       function_name,
                       n_args);
      return NULL;
    }
  } else {
    if (n_args != 1) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "%s(): "
                       "wrong number of arguments (%d for 1)",
                       function_name,
                       n_args);
      return NULL;
    }
  }

  time = args[0];
  if (!(time->header.type == GRN_BULK &&
        time->header.domain == GRN_DB_TIME)) {
    grn_obj inspected;

    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, time);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "%s(): "
                     "the first argument must be a time: "
                     "<%.*s>",
                     function_name,
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    GRN_OBJ_FIN(ctx, &inspected);
    return NULL;
  }

  if (n_args == 2) {
    grn_obj *interval;
    grn_obj casted_interval;

    interval = args[1];
    if (!(interval->header.type == GRN_BULK &&
          grn_type_id_is_number_family(ctx, interval->header.domain))) {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, interval);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "%s(): "
                       "the second argument must be a number: "
                       "<%.*s>",
                       function_name,
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }

    GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, GRN_DB_UINT32);
    grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE);
    interval_raw = GRN_UINT32_VALUE(&casted_interval);
    GRN_OBJ_FIN(ctx, &casted_interval);
  }

  {
    int64_t time_raw;
    struct tm tm;
    int64_t classed_time_raw;

    time_raw = GRN_TIME_VALUE(time);
    if (!grn_time_to_tm(ctx, time_raw, &tm)) {
      return NULL;
    }

    switch (unit) {
    case GRN_TIME_CLASSIFY_UNIT_SECOND :
      tm.tm_sec = (tm.tm_sec / interval_raw) * interval_raw;
      break;
    case GRN_TIME_CLASSIFY_UNIT_MINUTE :
      tm.tm_min = (tm.tm_min / interval_raw) * interval_raw;
      tm.tm_sec = 0;
      break;
    case GRN_TIME_CLASSIFY_UNIT_HOUR :
      tm.tm_hour = (tm.tm_hour / interval_raw) * interval_raw;
      tm.tm_min = 0;
      tm.tm_sec = 0;
      break;
    case GRN_TIME_CLASSIFY_UNIT_DAY :
      tm.tm_hour = 0;
      tm.tm_min = 0;
      tm.tm_sec = 0;
      break;
    case GRN_TIME_CLASSIFY_UNIT_WEEK :
      if ((tm.tm_mday - tm.tm_wday) >= 0) {
        tm.tm_mday -= tm.tm_wday;
      } else {
        int n_underflowed_mday = -(tm.tm_mday - tm.tm_wday);
        int mday;
        int max_mday = 31;

        if (tm.tm_mon == 0) {
          tm.tm_year--;
          tm.tm_mon = 11;
        } else {
          tm.tm_mon--;
        }

        for (mday = max_mday; mday > n_underflowed_mday; mday--) {
          int64_t unused;
          tm.tm_mday = mday;
          if (grn_time_from_tm(ctx, &unused, &tm)) {
            break;
          }
        }
        tm.tm_mday -= n_underflowed_mday;
      }
      tm.tm_hour = 0;
      tm.tm_min = 0;
      tm.tm_sec = 0;
      break;
    case GRN_TIME_CLASSIFY_UNIT_MONTH :
      tm.tm_mon = (tm.tm_mon / interval_raw) * interval_raw;
      tm.tm_mday = 1;
      tm.tm_hour = 0;
      tm.tm_min = 0;
      tm.tm_sec = 0;
      break;
    case GRN_TIME_CLASSIFY_UNIT_YEAR :
      tm.tm_year = (((1900 + tm.tm_year) / interval_raw) * interval_raw) - 1900;
      tm.tm_mon = 0;
      tm.tm_mday = 1;
      tm.tm_hour = 0;
      tm.tm_min = 0;
      tm.tm_sec = 0;
      break;
    }

    if (!grn_time_from_tm(ctx, &classed_time_raw, &tm)) {
      return NULL;
    }

    classed_time = grn_plugin_proc_alloc(ctx,
                                         user_data,
                                         time->header.domain,
                                         0);
    if (!classed_time) {
      return NULL;
    }
    GRN_TIME_SET(ctx, classed_time, classed_time_raw);

    return classed_time;
  }
}
コード例 #11
0
ファイル: proc_snippet.c プロジェクト: cafedomancer/groonga
/* TODO: support caching for the same parameter. */
static grn_obj *
func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

#define N_REQUIRED_ARGS 1
#define KEYWORD_SET_SIZE 3
  if (nargs > N_REQUIRED_ARGS) {
    grn_obj *text = args[0];
    grn_obj *end_arg = args[nargs - 1];
    grn_obj *snip = NULL;
    unsigned int width = 200;
    unsigned int max_n_results = 3;
    grn_snip_mapping *mapping = NULL;
    int flags = GRN_SNIP_SKIP_LEADING_SPACES;
    const char *prefix = NULL;
    int prefix_length = 0;
    const char *suffix = NULL;
    int suffix_length = 0;
    const char *normalizer_name = NULL;
    int normalizer_name_length = 0;
    const char *default_open_tag = NULL;
    int default_open_tag_length = 0;
    const char *default_close_tag = NULL;
    int default_close_tag_length = 0;
    int n_args_without_option = nargs;

    if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
      grn_obj *options = end_arg;
      grn_hash_cursor *cursor;
      void *key;
      int key_size;
      grn_obj *value;

      n_args_without_option--;
      cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                    NULL, 0, NULL, 0,
                                    0, -1, 0);
      if (!cursor) {
        GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                         "snippet(): couldn't open cursor");
        goto exit;
      }
      while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
        grn_hash_cursor_get_key_value(ctx, cursor,
                                      &key, &key_size,
                                      (void **)&value);
        if (key_size == 5 && !memcmp(key, "width", 5)) {
          width = GRN_UINT32_VALUE(value);
        } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) {
          max_n_results = GRN_UINT32_VALUE(value);
        } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) {
          if (GRN_BOOL_VALUE(value) == GRN_FALSE) {
            flags &= ~GRN_SNIP_SKIP_LEADING_SPACES;
          }
        } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
          if (GRN_BOOL_VALUE(value)) {
            mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
          }
        } else if (key_size == 6 && !memcmp(key, "prefix", 6)) {
          prefix = GRN_TEXT_VALUE(value);
          prefix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 6 && !memcmp(key, "suffix", 6)) {
          suffix = GRN_TEXT_VALUE(value);
          suffix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
          normalizer_name = GRN_TEXT_VALUE(value);
          normalizer_name_length = GRN_TEXT_LEN(value);
        } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
          default_open_tag = GRN_TEXT_VALUE(value);
          default_open_tag_length = GRN_TEXT_LEN(value);
        } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
          default_close_tag = GRN_TEXT_VALUE(value);
          default_close_tag_length = GRN_TEXT_LEN(value);
        } else {
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "invalid option name: <%.*s>",
                           key_size, (char *)key);
          grn_hash_cursor_close(ctx, cursor);
          goto exit;
        }
      }
      grn_hash_cursor_close(ctx, cursor);
    }

    snip = grn_snip_open(ctx, flags, width, max_n_results,
                         default_open_tag, default_open_tag_length,
                         default_close_tag, default_close_tag_length, mapping);
    if (snip) {
      grn_rc rc;
      unsigned int i;
      if (!normalizer_name) {
        grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
      } else if (normalizer_name_length > 0) {
        grn_obj *normalizer;
        normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length);
        if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
          grn_obj inspected;
          GRN_TEXT_INIT(&inspected, 0);
          grn_inspect(ctx, &inspected, normalizer);
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "snippet(): not normalizer: <%.*s>",
                           (int)GRN_TEXT_LEN(&inspected),
                           GRN_TEXT_VALUE(&inspected));
          GRN_OBJ_FIN(ctx, &inspected);
          grn_obj_unlink(ctx, normalizer);
          goto exit;
        }
        grn_snip_set_normalizer(ctx, snip, normalizer);
        grn_obj_unlink(ctx, normalizer);
      }
      if (default_open_tag_length == 0 && default_close_tag_length == 0) {
        unsigned int n_keyword_sets =
          (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE;
        grn_obj **keyword_set_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keyword_sets; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2]));
        }
      } else {
        unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS;
        grn_obj **keyword_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keywords; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_args[i]),
                                 GRN_TEXT_LEN(keyword_args[i]),
                                 NULL, 0,
                                 NULL, 0);
        }
      }
      snippets = snippet_exec(ctx, snip, text, user_data,
                              prefix, prefix_length,
                              suffix, suffix_length);
    }
  }
#undef KEYWORD_SET_SIZE
#undef N_REQUIRED_ARGS

exit :
  if (!snippets) {
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
コード例 #12
0
ファイル: proc_snippet.c プロジェクト: cafedomancer/groonga
static grn_obj *
func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

  /* TODO: support parameters */
  if (nargs == 1) {
    grn_obj *text = args[0];
    grn_obj *expression = NULL;
    grn_obj *condition_ptr = NULL;
    grn_obj *condition = NULL;
    grn_obj *snip = NULL;
    int flags = GRN_SNIP_SKIP_LEADING_SPACES;
    unsigned int width = 200;
    unsigned int max_n_results = 3;
    const char *open_tag = "<span class=\"keyword\">";
    const char *close_tag = "</span>";
    grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;

    grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);
    condition_ptr = grn_expr_get_var(ctx, expression,
                                     GRN_SELECT_INTERNAL_VAR_CONDITION,
                                     strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
    if (condition_ptr) {
      condition = GRN_PTR_VALUE(condition_ptr);
    }

    if (condition) {
      grn_obj *snip_ptr;
      snip_ptr = grn_expr_get_var(ctx, expression,
                                  GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
                                  strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
      if (snip_ptr) {
        snip = GRN_PTR_VALUE(snip_ptr);
      } else {
        snip_ptr =
          grn_expr_get_or_add_var(ctx, expression,
                                  GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
                                  strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
        GRN_OBJ_FIN(ctx, snip_ptr);
        GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);

        snip = grn_snip_open(ctx, flags, width, max_n_results,
                             open_tag, strlen(open_tag),
                             close_tag, strlen(close_tag),
                             mapping);
        if (snip) {
          grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
          grn_expr_snip_add_conditions(ctx, condition, snip,
                                       0, NULL, NULL, NULL, NULL);
          GRN_PTR_SET(ctx, snip_ptr, snip);
        }
      }
    }

    if (snip) {
      snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0);
    }
  }

  if (!snippets) {
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
コード例 #13
0
ファイル: proc_highlight.c プロジェクト: ohkubo/groonga
static grn_obj *
func_highlight(grn_ctx *ctx, int nargs, grn_obj **args,
               grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;

#define N_REQUIRED_ARGS 1
  if (nargs > N_REQUIRED_ARGS) {
    grn_obj *string = args[0];
    grn_bool use_html_escape = GRN_FALSE;
    grn_obj *keywords;
    const char *normalizer_name = "NormalizerAuto";
    unsigned int normalizer_name_length = 14;
    const char *default_open_tag = NULL;
    unsigned int default_open_tag_length = 0;
    const char *default_close_tag = NULL;
    unsigned int default_close_tag_length = 0;
    grn_obj *end_arg = args[nargs - 1];
    int n_args_without_option = nargs;

    if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
      grn_obj *options = end_arg;
      grn_hash_cursor *cursor;
      void *key;
      grn_obj *value;
      int key_size;

      n_args_without_option--;
      cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                    NULL, 0, NULL, 0,
                                    0, -1, 0);
      if (!cursor) {
        GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                         "highlight(): couldn't open cursor");
        goto exit;
      }
      while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
        grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size,
                                      (void **)&value);
        if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
          normalizer_name = GRN_TEXT_VALUE(value);
          normalizer_name_length = GRN_TEXT_LEN(value);
        } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
          if (GRN_BOOL_VALUE(value)) {
            use_html_escape = GRN_TRUE;
          }
        } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
          default_open_tag = GRN_TEXT_VALUE(value);
          default_open_tag_length = GRN_TEXT_LEN(value);
        } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
          default_close_tag = GRN_TEXT_VALUE(value);
          default_close_tag_length = GRN_TEXT_LEN(value);
        } else {
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>",
                           key_size, (char *)key);
          grn_hash_cursor_close(ctx, cursor);
          goto exit;
        }
      }
      grn_hash_cursor_close(ctx, cursor);
    }

    keywords =
      func_highlight_create_keywords_table(ctx, user_data,
                                           normalizer_name,
                                           normalizer_name_length);

    if (keywords) {
      grn_obj **keyword_args = args + N_REQUIRED_ARGS;
      unsigned int n_keyword_args = n_args_without_option - N_REQUIRED_ARGS;
      if (default_open_tag_length == 0 && default_close_tag_length == 0) {
        highlighted = highlight_keyword_sets(ctx, user_data,
                                             keyword_args, n_keyword_args,
                                             string, keywords, use_html_escape);
      } else {
        unsigned int i;
        for (i = 0; i < n_keyword_args; i++) {
          grn_table_add(ctx, keywords,
                        GRN_TEXT_VALUE(keyword_args[i]),
                        GRN_TEXT_LEN(keyword_args[i]),
                        NULL);
        }
        highlighted = highlight_keywords(ctx, user_data,
                                         string, keywords, use_html_escape,
                                         default_open_tag, default_open_tag_length,
                                         default_close_tag, default_close_tag_length);
      }
    }
  }
#undef N_REQUIRED_ARGS

exit :
  if (!highlighted) {
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return highlighted;
}
コード例 #14
0
ファイル: proc_highlight.c プロジェクト: ohkubo/groonga
static grn_obj *
highlight_keyword_sets(grn_ctx *ctx, grn_user_data *user_data,
                       grn_obj **keyword_set_args, unsigned int n_keyword_args,
                       grn_obj *string, grn_obj *keywords,
                       grn_bool use_html_escape)
{
  grn_obj *highlighted = NULL;
#define KEYWORD_SET_SIZE 3
  {
    unsigned int i;
    unsigned int n_keyword_sets;
    grn_obj open_tags;
    grn_obj open_tag_lengths;
    grn_obj close_tags;
    grn_obj close_tag_lengths;

    n_keyword_sets = n_keyword_args / KEYWORD_SET_SIZE;

    GRN_OBJ_INIT(&open_tags, GRN_BULK, 0, GRN_DB_VOID);
    GRN_OBJ_INIT(&open_tag_lengths, GRN_BULK, 0, GRN_DB_VOID);
    GRN_OBJ_INIT(&close_tags, GRN_BULK, 0, GRN_DB_VOID);
    GRN_OBJ_INIT(&close_tag_lengths, GRN_BULK, 0, GRN_DB_VOID);

    for (i = 0; i < n_keyword_sets; i++) {
      grn_obj *keyword   = keyword_set_args[i * KEYWORD_SET_SIZE + 0];
      grn_obj *open_tag  = keyword_set_args[i * KEYWORD_SET_SIZE + 1];
      grn_obj *close_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 2];

      grn_table_add(ctx, keywords,
                    GRN_TEXT_VALUE(keyword),
                    GRN_TEXT_LEN(keyword),
                    NULL);
      {
        const char *open_tag_content = GRN_TEXT_VALUE(open_tag);
        grn_bulk_write(ctx, &open_tags,
                       (const char *)(&open_tag_content),
                       sizeof(char *));
      }
      {
        unsigned int open_tag_length = GRN_TEXT_LEN(open_tag);
        grn_bulk_write(ctx, &open_tag_lengths,
                       (const char *)(&open_tag_length),
                       sizeof(unsigned int));
      }
      {
        const char *close_tag_content = GRN_TEXT_VALUE(close_tag);
        grn_bulk_write(ctx, &close_tags,
                       (const char *)(&close_tag_content),
                       sizeof(char *));
      }
      {
        unsigned int close_tag_length = GRN_TEXT_LEN(close_tag);
        grn_bulk_write(ctx, &close_tag_lengths,
                       (const char *)(&close_tag_length),
                       sizeof(unsigned int));
      }
    }

    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
    grn_pat_tag_keys(ctx, keywords,
                     GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
                     (const char **)GRN_BULK_HEAD(&open_tags),
                     (unsigned int *)GRN_BULK_HEAD(&open_tag_lengths),
                     (const char **)GRN_BULK_HEAD(&close_tags),
                     (unsigned int *)GRN_BULK_HEAD(&close_tag_lengths),
                     n_keyword_sets,
                     highlighted,
                     use_html_escape);
    grn_obj_unlink(ctx, &open_tags);
    grn_obj_unlink(ctx, &open_tag_lengths);
    grn_obj_unlink(ctx, &close_tags);
    grn_obj_unlink(ctx, &close_tag_lengths);
  }
#undef KEYWORD_SET_SIZE
  return highlighted;
}
コード例 #15
0
ファイル: vector.c プロジェクト: digideskio/groonga
static grn_obj *
func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args,
                  grn_user_data *user_data)
{
  grn_obj *target;
  grn_obj *from_raw = NULL;
  grn_obj *length_raw = NULL;
  int64_t from = 0;
  int64_t length = -1;
  uint32_t to = 0;
  uint32_t size = 0;
  grn_obj *slice;

  if (n_args < 2 || n_args > 3) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "vector_slice(): wrong number of arguments (%d for 2..3)",
                     n_args);
    return NULL;
  }

  target = args[0];
  from_raw = args[1];
  if (n_args == 3) {
    length_raw = args[2];
  }
  switch (target->header.type) {
  case GRN_VECTOR :
  case GRN_PVECTOR :
  case GRN_UVECTOR :
    size = grn_vector_size(ctx, target);
    break;
  default :
    {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, target, &inspected);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "vector_slice(): target object must be vector: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
    break;
  }

  if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
    grn_obj inspected;

    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, from_raw);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "vector_slice(): from must be a number: <%.*s>",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    GRN_OBJ_FIN(ctx, &inspected);
    return NULL;
  }
  if (from_raw->header.domain == GRN_DB_INT32) {
    from = GRN_INT32_VALUE(from_raw);
  } else if (from_raw->header.domain == GRN_DB_INT64) {
    from = GRN_INT64_VALUE(from_raw);
  } else {
    grn_obj buffer;
    grn_rc rc;

    GRN_INT64_INIT(&buffer, 0);
    rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
    if (rc == GRN_SUCCESS) {
      from = GRN_INT64_VALUE(&buffer);
    }
    GRN_OBJ_FIN(ctx, &buffer);

    if (rc != GRN_SUCCESS) {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, from_raw);
      GRN_PLUGIN_ERROR(ctx, rc,
                       "vector_slice(): "
                       "failed to cast from value to number: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
  }

  if (length_raw) {
    if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
      grn_obj inspected;

      GRN_TEXT_INIT(&inspected, 0);
      grn_inspect(ctx, &inspected, length_raw);
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "vector_slice(): length must be a number: <%.*s>",
                       (int)GRN_TEXT_LEN(&inspected),
                       GRN_TEXT_VALUE(&inspected));
      GRN_OBJ_FIN(ctx, &inspected);
      return NULL;
    }
    if (length_raw->header.domain == GRN_DB_INT32) {
      length = GRN_INT32_VALUE(length_raw);
    } else if (length_raw->header.domain == GRN_DB_INT64) {
      length = GRN_INT64_VALUE(length_raw);
    } else {
      grn_obj buffer;
      grn_rc rc;

      GRN_INT64_INIT(&buffer, 0);
      rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
      if (rc == GRN_SUCCESS) {
        length = GRN_INT64_VALUE(&buffer);
      }
      GRN_OBJ_FIN(ctx, &buffer);

      if (rc != GRN_SUCCESS) {
        grn_obj inspected;

        GRN_TEXT_INIT(&inspected, 0);
        grn_inspect(ctx, &inspected, length_raw);
        GRN_PLUGIN_ERROR(ctx, rc,
                         "vector_slice(): "
                         "failed to cast length value to number: <%.*s>",
                         (int)GRN_TEXT_LEN(&inspected),
                         GRN_TEXT_VALUE(&inspected));
        GRN_OBJ_FIN(ctx, &inspected);
        return NULL;
      }
    }
  }

  slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR);
  if (!slice) {
    return NULL;
  }

  if (target->header.flags & GRN_OBJ_WITH_WEIGHT) {
    slice->header.flags |= GRN_OBJ_WITH_WEIGHT;
  }

  if (length < 0) {
    length = size + length + 1;
  }

  if (length > size) {
    length = size;
  }

  if (length <= 0) {
    return slice;
  }

  while (from < 0) {
    from += size;
  }

  to = from + length;
  if (to > size) {
    to = size;
  }

  switch (target->header.type) {
  case GRN_VECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        const char *content;
        unsigned int content_length;
        unsigned int weight;
        grn_id domain;
        content_length = grn_vector_get_element(ctx, target, i,
                                                &content, &weight, &domain);
        grn_vector_add_element(ctx, slice,
                               content, content_length, weight, domain);
      }
    }
    break;
  case GRN_PVECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        grn_obj *element = GRN_PTR_VALUE_AT(target, i);
        GRN_PTR_PUT(ctx, slice, element);
      }
    }
    break;
  case GRN_UVECTOR :
    {
      unsigned int i;
      for (i = from; i < to; i++) {
        grn_id id;
        unsigned int weight;
        id = grn_uvector_get_element(ctx, target, i, &weight);
        grn_uvector_add_element(ctx, slice, id, weight);
      }
    }
    break;
  }

  return slice;
}