Esempio n. 1
0
grn_rc
grn_com_event_poll(grn_ctx *ctx, grn_com_event *ev, int timeout)
{
  int nevents;
  grn_com *com;
#ifdef USE_SELECT
  uint32_t dummy;
  grn_sock *pfd;
  int nfds = 0;
  fd_set rfds;
  fd_set wfds;
  struct timeval tv;
  if (timeout >= 0) {
    tv.tv_sec = timeout / 1000;
    tv.tv_usec = (timeout % 1000) * 1000;
  }
  FD_ZERO(&rfds);
  FD_ZERO(&wfds);
  ctx->errlvl = GRN_OK;
  ctx->rc = GRN_SUCCESS;
  {
    grn_hash_cursor *cursor;
    cursor = grn_hash_cursor_open(ctx, ev->hash, NULL, 0, NULL, 0, 0, -1, 0);
    if (cursor) {
      grn_id id;
      while ((id = grn_hash_cursor_next(ctx, cursor))) {
        grn_hash_cursor_get_key_value(ctx,
                                      cursor,
                                      (void **)(&pfd),
                                      &dummy,
                                      (void **)(&com));
        if ((com->events & GRN_COM_POLLIN)) { FD_SET(*pfd, &rfds); }
        if ((com->events & GRN_COM_POLLOUT)) { FD_SET(*pfd, &wfds); }
# ifndef WIN32
        if (*pfd > nfds) { nfds = *pfd; }
# endif /* WIN32 */
      }
      grn_hash_cursor_close(ctx, cursor);
    }
  }
  nevents = select(nfds + 1, &rfds, &wfds, NULL, (timeout >= 0) ? &tv : NULL);
  if (nevents < 0) {
    SOERR("select");
    if (ctx->rc == GRN_INTERRUPTED_FUNCTION_CALL) { ERRCLR(ctx); }
    return ctx->rc;
  }
  if (timeout < 0 && !nevents) { GRN_LOG(ctx, GRN_LOG_NOTICE, "select returns 0 events"); }
  GRN_HASH_EACH(ctx, ev->hash, eh, &pfd, &dummy, &com, {
    if (FD_ISSET(*pfd, &rfds)) { grn_com_receiver(ctx, com); }
  });
Esempio n. 2
0
/* TODO: support caching for the same parameter. */
static grn_obj *
func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
  grn_obj *snippets = NULL;

#define N_REQUIRED_ARGS 1
#define KEYWORD_SET_SIZE 3
  if (nargs > N_REQUIRED_ARGS) {
    grn_obj *text = args[0];
    grn_obj *end_arg = args[nargs - 1];
    grn_obj *snip = NULL;
    unsigned int width = 200;
    unsigned int max_n_results = 3;
    grn_snip_mapping *mapping = NULL;
    int flags = GRN_SNIP_SKIP_LEADING_SPACES;
    const char *prefix = NULL;
    int prefix_length = 0;
    const char *suffix = NULL;
    int suffix_length = 0;
    const char *normalizer_name = NULL;
    int normalizer_name_length = 0;
    const char *default_open_tag = NULL;
    int default_open_tag_length = 0;
    const char *default_close_tag = NULL;
    int default_close_tag_length = 0;
    int n_args_without_option = nargs;

    if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
      grn_obj *options = end_arg;
      grn_hash_cursor *cursor;
      void *key;
      int key_size;
      grn_obj *value;

      n_args_without_option--;
      cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                    NULL, 0, NULL, 0,
                                    0, -1, 0);
      if (!cursor) {
        GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                         "snippet(): couldn't open cursor");
        goto exit;
      }
      while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
        grn_hash_cursor_get_key_value(ctx, cursor,
                                      &key, &key_size,
                                      (void **)&value);
        if (key_size == 5 && !memcmp(key, "width", 5)) {
          width = GRN_UINT32_VALUE(value);
        } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) {
          max_n_results = GRN_UINT32_VALUE(value);
        } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) {
          if (GRN_BOOL_VALUE(value) == GRN_FALSE) {
            flags &= ~GRN_SNIP_SKIP_LEADING_SPACES;
          }
        } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
          if (GRN_BOOL_VALUE(value)) {
            mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
          }
        } else if (key_size == 6 && !memcmp(key, "prefix", 6)) {
          prefix = GRN_TEXT_VALUE(value);
          prefix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 6 && !memcmp(key, "suffix", 6)) {
          suffix = GRN_TEXT_VALUE(value);
          suffix_length = GRN_TEXT_LEN(value);
        } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
          normalizer_name = GRN_TEXT_VALUE(value);
          normalizer_name_length = GRN_TEXT_LEN(value);
        } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
          default_open_tag = GRN_TEXT_VALUE(value);
          default_open_tag_length = GRN_TEXT_LEN(value);
        } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
          default_close_tag = GRN_TEXT_VALUE(value);
          default_close_tag_length = GRN_TEXT_LEN(value);
        } else {
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "invalid option name: <%.*s>",
                           key_size, (char *)key);
          grn_hash_cursor_close(ctx, cursor);
          goto exit;
        }
      }
      grn_hash_cursor_close(ctx, cursor);
    }

    snip = grn_snip_open(ctx, flags, width, max_n_results,
                         default_open_tag, default_open_tag_length,
                         default_close_tag, default_close_tag_length, mapping);
    if (snip) {
      grn_rc rc;
      unsigned int i;
      if (!normalizer_name) {
        grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
      } else if (normalizer_name_length > 0) {
        grn_obj *normalizer;
        normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length);
        if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
          grn_obj inspected;
          GRN_TEXT_INIT(&inspected, 0);
          grn_inspect(ctx, &inspected, normalizer);
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                           "snippet(): not normalizer: <%.*s>",
                           (int)GRN_TEXT_LEN(&inspected),
                           GRN_TEXT_VALUE(&inspected));
          GRN_OBJ_FIN(ctx, &inspected);
          grn_obj_unlink(ctx, normalizer);
          goto exit;
        }
        grn_snip_set_normalizer(ctx, snip, normalizer);
        grn_obj_unlink(ctx, normalizer);
      }
      if (default_open_tag_length == 0 && default_close_tag_length == 0) {
        unsigned int n_keyword_sets =
          (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE;
        grn_obj **keyword_set_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keyword_sets; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
                                 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]),
                                 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2]));
        }
      } else {
        unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS;
        grn_obj **keyword_args = args + N_REQUIRED_ARGS;
        for (i = 0; i < n_keywords; i++) {
          rc = grn_snip_add_cond(ctx, snip,
                                 GRN_TEXT_VALUE(keyword_args[i]),
                                 GRN_TEXT_LEN(keyword_args[i]),
                                 NULL, 0,
                                 NULL, 0);
        }
      }
      snippets = snippet_exec(ctx, snip, text, user_data,
                              prefix, prefix_length,
                              suffix, suffix_length);
    }
  }
#undef KEYWORD_SET_SIZE
#undef N_REQUIRED_ARGS

exit :
  if (!snippets) {
    snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return snippets;
}
Esempio n. 3
0
static grn_rc
selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index,
                      int nargs, grn_obj **args,
                      grn_obj *res, grn_operator op)
{
  grn_rc rc = GRN_SUCCESS;
  grn_obj *target = NULL;
  grn_obj *obj;
  grn_obj *query;
  uint32_t max_distance = 1;
  uint32_t prefix_length = 0;
  uint32_t prefix_match_size = 0;
  uint32_t max_expansion = 0;
  int flags = 0;
  grn_bool use_sequential_search = GRN_FALSE;

  if ((nargs - 1) < 2) {
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "fuzzy_search(): wrong number of arguments (%d ...)",
                     nargs - 1);
    rc = ctx->rc;
    goto exit;
  }
  obj = args[1];
  query = args[2];

  if (nargs == 4) {
    grn_obj *options = args[3];
    grn_hash_cursor *cursor;
    void *key;
    grn_obj *value;
    int key_size;

    if (options->header.type != GRN_TABLE_HASH_KEY) {
      GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                       "fuzzy_search(): "
                       "3rd argument must be object literal: <%.*s>",
                       (int)GRN_TEXT_LEN(options),
                       GRN_TEXT_VALUE(options));
      goto exit;
    }

    cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                  NULL, 0, NULL, 0,
                                  0, -1, 0);
    if (!cursor) {
      GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                       "fuzzy_search(): couldn't open cursor");
      goto exit;
    }
    while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
      grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size,
                                    (void **)&value);

      if (key_size == 12 && !memcmp(key, "max_distance", 12)) {
        max_distance = GRN_UINT32_VALUE(value);
      } else if (key_size == 13 && !memcmp(key, "prefix_length", 13)) {
        prefix_length = GRN_UINT32_VALUE(value);
      } else if (key_size == 13 && !memcmp(key, "max_expansion", 13)) {
        max_expansion = GRN_UINT32_VALUE(value);
      } else if (key_size == 18 && !memcmp(key, "with_transposition", 18)) {
        if (GRN_BOOL_VALUE(value)) {
          flags |= GRN_TABLE_FUZZY_SEARCH_WITH_TRANSPOSITION;
        }
      } else {
        GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                         "invalid option name: <%.*s>",
                         key_size, (char *)key);
        grn_hash_cursor_close(ctx, cursor);
        goto exit;
      }
    }
    grn_hash_cursor_close(ctx, cursor);
  }

  if (index) {
    target = index;
  } else {
    if (obj->header.type == GRN_COLUMN_INDEX) {
      target = obj;
    } else {
      grn_column_index(ctx, obj, GRN_OP_FUZZY, &target, 1, NULL);
    }
  }

  if (target) {
    grn_obj *lexicon;
    use_sequential_search = GRN_TRUE;
    lexicon = grn_ctx_at(ctx, target->header.domain);
    if (lexicon) {
      if (lexicon->header.type == GRN_TABLE_PAT_KEY) {
        use_sequential_search = GRN_FALSE;
      }
      grn_obj_unlink(ctx, lexicon);
    }
  } else {
    if (grn_obj_is_key_accessor(ctx, obj) &&
        table->header.type == GRN_TABLE_PAT_KEY) {
      target = table;
    } else {
      use_sequential_search = GRN_TRUE;
    }
  }

  if (prefix_length) {
    const char *s = GRN_TEXT_VALUE(query);
    const char *e = GRN_BULK_CURR(query);
    const char *p;
    unsigned int cl = 0;
    unsigned int length = 0;
    for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) {
      length++;
      if (length > prefix_length) {
        break;
      }
    }
    prefix_match_size = p - s;
  }

  if (use_sequential_search) {
    rc = sequential_fuzzy_search(ctx, table, obj, query,
                                 max_distance, prefix_match_size,
                                 max_expansion, flags, res, op);
    goto exit;
  }

  if (!target) {
    grn_obj inspected;
    GRN_TEXT_INIT(&inspected, 0);
    grn_inspect(ctx, &inspected, target);
    GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
                     "fuzzy_search(): "
                     "column must be COLUMN_INDEX or TABLE_PAT_KEY: <%.*s>",
                     (int)GRN_TEXT_LEN(&inspected),
                     GRN_TEXT_VALUE(&inspected));
    rc = ctx->rc;
    GRN_OBJ_FIN(ctx, &inspected);
  } else {
    grn_search_optarg options = {0};
    options.mode = GRN_OP_FUZZY;
    options.fuzzy.prefix_match_size = prefix_match_size;
    options.fuzzy.max_distance = max_distance;
    options.fuzzy.max_expansion = max_expansion;
    options.fuzzy.flags = flags;
    grn_obj_search(ctx, target, query, res, op, &options);
  }

exit :
  return rc;
}
Esempio n. 4
0
static grn_obj *
func_highlight(grn_ctx *ctx, int nargs, grn_obj **args,
               grn_user_data *user_data)
{
  grn_obj *highlighted = NULL;

#define N_REQUIRED_ARGS 1
  if (nargs > N_REQUIRED_ARGS) {
    grn_obj *string = args[0];
    grn_bool use_html_escape = GRN_FALSE;
    grn_obj *keywords;
    const char *normalizer_name = "NormalizerAuto";
    unsigned int normalizer_name_length = 14;
    const char *default_open_tag = NULL;
    unsigned int default_open_tag_length = 0;
    const char *default_close_tag = NULL;
    unsigned int default_close_tag_length = 0;
    grn_obj *end_arg = args[nargs - 1];
    int n_args_without_option = nargs;

    if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
      grn_obj *options = end_arg;
      grn_hash_cursor *cursor;
      void *key;
      grn_obj *value;
      int key_size;

      n_args_without_option--;
      cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
                                    NULL, 0, NULL, 0,
                                    0, -1, 0);
      if (!cursor) {
        GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                         "highlight(): couldn't open cursor");
        goto exit;
      }
      while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
        grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size,
                                      (void **)&value);
        if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
          normalizer_name = GRN_TEXT_VALUE(value);
          normalizer_name_length = GRN_TEXT_LEN(value);
        } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
          if (GRN_BOOL_VALUE(value)) {
            use_html_escape = GRN_TRUE;
          }
        } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
          default_open_tag = GRN_TEXT_VALUE(value);
          default_open_tag_length = GRN_TEXT_LEN(value);
        } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
          default_close_tag = GRN_TEXT_VALUE(value);
          default_close_tag_length = GRN_TEXT_LEN(value);
        } else {
          GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>",
                           key_size, (char *)key);
          grn_hash_cursor_close(ctx, cursor);
          goto exit;
        }
      }
      grn_hash_cursor_close(ctx, cursor);
    }

    keywords =
      func_highlight_create_keywords_table(ctx, user_data,
                                           normalizer_name,
                                           normalizer_name_length);

    if (keywords) {
      grn_obj **keyword_args = args + N_REQUIRED_ARGS;
      unsigned int n_keyword_args = n_args_without_option - N_REQUIRED_ARGS;
      if (default_open_tag_length == 0 && default_close_tag_length == 0) {
        highlighted = highlight_keyword_sets(ctx, user_data,
                                             keyword_args, n_keyword_args,
                                             string, keywords, use_html_escape);
      } else {
        unsigned int i;
        for (i = 0; i < n_keyword_args; i++) {
          grn_table_add(ctx, keywords,
                        GRN_TEXT_VALUE(keyword_args[i]),
                        GRN_TEXT_LEN(keyword_args[i]),
                        NULL);
        }
        highlighted = highlight_keywords(ctx, user_data,
                                         string, keywords, use_html_escape,
                                         default_open_tag, default_open_tag_length,
                                         default_close_tag, default_close_tag_length);
      }
    }
  }
#undef N_REQUIRED_ARGS

exit :
  if (!highlighted) {
    highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
  }

  return highlighted;
}