Beispiel #1
0
grn_rc
grn_query_search(grn_ctx *ctx, grn_ii *i, grn_query *q, grn_hash *r, grn_operator op)
{
  int p = q->escalation_threshold;
  // dump_query(q, q->expr, 0);
  // grn_log("escalation_threshold=%d", p);
  if (p >= 0 || (-p & 1)) {
    q->default_mode = GRN_OP_EXACT;
    exec_search(ctx, i, q, q->expr, r, op);
    GRN_LOG(ctx, GRN_LOG_INFO, "hits(exact)=%d", *r->n_entries);
  }
  if ((p >= 0) ? (p >= *r->n_entries) : (-p & 2)) {
    q->weight_offset -= q->escalation_decaystep;
    q->default_mode = GRN_OP_UNSPLIT;
    exec_search(ctx, i, q, q->expr, r, op);
    GRN_LOG(ctx, GRN_LOG_INFO, "hits(unsplit)=%d", *r->n_entries);
  }
  if ((p >= 0) ? (p >= *r->n_entries) : (-p & 4)) {
    q->weight_offset -= q->escalation_decaystep;
    q->default_mode = GRN_OP_PARTIAL;
    exec_search(ctx, i, q, q->expr, r, op);
    GRN_LOG(ctx, GRN_LOG_INFO, "hits(partial)=%d", *r->n_entries);
  }
  return GRN_SUCCESS;
}
static void
ngx_http_groonga_logger_reopen(grn_ctx *ctx, void *user_data)
{
  GRN_LOG(ctx, GRN_LOG_NOTICE, "log will be closed.");
  ngx_reopen_files((ngx_cycle_t *)ngx_cycle, -1);
  GRN_LOG(ctx, GRN_LOG_NOTICE, "log opened.");
}
Beispiel #3
0
static void
default_logger_reopen(grn_ctx *ctx, void *user_data)
{
  GRN_LOG(ctx, GRN_LOG_NOTICE, "log will be closed.");
  CRITICAL_SECTION_ENTER(default_logger_lock);
  if (default_logger_file) {
    fclose(default_logger_file);
    default_logger_file = NULL;
  }
  CRITICAL_SECTION_LEAVE(default_logger_lock);
  GRN_LOG(ctx, GRN_LOG_NOTICE, "log opened.");
}
Beispiel #4
0
static grn_ra *
_grn_ra_create(grn_ctx *ctx, grn_ra *ra, const char *path, unsigned int element_size)
{
  grn_io *io;
  int max_segments, n_elm, w_elm;
  struct grn_ra_header *header;
  unsigned int actual_size;
  if (element_size > GRN_RA_SEGMENT_SIZE) {
    GRN_LOG(ctx, GRN_LOG_ERROR, "element_size too large (%d)", element_size);
    return NULL;
  }
  for (actual_size = 1; actual_size < element_size; actual_size *= 2) ;
  max_segments = ((GRN_ID_MAX + 1) / GRN_RA_SEGMENT_SIZE) * actual_size;
  io = grn_io_create(ctx, path, sizeof(struct grn_ra_header),
                     GRN_RA_SEGMENT_SIZE, max_segments, grn_io_auto,
                     GRN_IO_EXPIRE_SEGMENT);
  if (!io) { return NULL; }
  header = grn_io_header(io);
  grn_io_set_type(io, GRN_COLUMN_FIX_SIZE);
  header->element_size = actual_size;
  n_elm = GRN_RA_SEGMENT_SIZE / header->element_size;
  for (w_elm = 22; (1 << w_elm) > n_elm; w_elm--);
  ra->io = io;
  ra->header = header;
  ra->element_mask =  n_elm - 1;
  ra->element_width = w_elm;
  return ra;
}
Beispiel #5
0
grn_query *
grn_query_open(grn_ctx *ctx, const char *str, unsigned int str_len,
               grn_operator default_op, int max_exprs)
{
  grn_query *q;
  int max_cells = max_exprs * 4;
  if (!(q = GRN_MALLOC(sizeof(grn_query) + max_cells * sizeof(grn_cell) + str_len + 1))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_query_open malloc fail");
    return NULL;
  }
  q->header.type = GRN_QUERY;
  q->str = (char *)&q->cell_pool[max_cells];
  memcpy(q->str, str, str_len);
  q->str[str_len] = '\0';
  q->cur = q->str;
  q->str_end = q->str + str_len;
  q->default_op = default_op;
  q->encoding = ctx->encoding;
  q->max_exprs = max_exprs;
  q->max_cells = max_cells;
  q->cur_cell = 0;
  q->cur_expr = 0;
  q->escalation_threshold = GRN_DEFAULT_MATCH_ESCALATION_THRESHOLD;
  q->escalation_decaystep = DEFAULT_DECAYSTEP;
  q->weight_offset = 0;
  q->opt.weight_vector = NULL;
  q->weight_set = NULL;
  get_pragma(ctx, q);
  q->expr = get_expr(ctx, q);
  q->opt.vector_size = DEFAULT_WEIGHT_VECTOR_SIZE;
  q->opt.func = q->weight_set ? section_weight_cb : NULL;
  q->opt.func_arg = q->weight_set;
  q->snip_conds = NULL;
  return q;
}
Beispiel #6
0
grn_rc
grn_query_scan(grn_ctx *ctx, grn_query *q, const char **strs, unsigned int *str_lens, unsigned int nstrs,
               int flags, int *found, int *score)
{
  unsigned int i;
  grn_rc rc;
  if (!q || !strs || !nstrs) { return GRN_INVALID_ARGUMENT; }
  *found = *score = 0;
  if (!q->snip_conds) {
    if ((rc = alloc_snip_conds(ctx, q))) { return rc; }
    flags |= GRN_QUERY_SCAN_ALLOCCONDS;
  } else if (flags & GRN_QUERY_SCAN_ALLOCCONDS) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "invalid flags specified on grn_query_scan");
    return GRN_INVALID_ARGUMENT;
  }
  for (i = 0; i < nstrs; i++) {
    grn_str *n;
    snip_cond *sc = q->snip_conds;
    int f = GRN_STR_WITH_CHECKS | GRN_STR_REMOVEBLANK;
    if (flags & GRN_QUERY_SCAN_NORMALIZE) { f |= GRN_STR_NORMALIZE; }
    n = grn_str_open(ctx, *(strs + i), *(str_lens + i), f);
    if (!n) { return GRN_NO_MEMORY_AVAILABLE; }
    if ((rc = scan_query(ctx, q, n, i + 1, q->expr, &sc, GRN_OP_OR, flags, found, score))) {
      grn_str_close(ctx, n);
      return rc;
    }
    flags &= ~GRN_QUERY_SCAN_ALLOCCONDS;
    grn_str_close(ctx, n);
  }
  return GRN_SUCCESS;
}
Beispiel #7
0
grn_obj *
grn_string_open_(grn_ctx *ctx, const char *str, unsigned int str_len,
                 grn_obj *normalizer, int flags, grn_encoding encoding)
{
  grn_string *string;
  grn_obj *obj;
  grn_bool is_normalizer_auto;

  if (!str || !str_len) {
    return NULL;
  }

  is_normalizer_auto = (normalizer == GRN_NORMALIZER_AUTO);
  if (is_normalizer_auto) {
    normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
    if (!normalizer) {
      ERR(GRN_INVALID_ARGUMENT,
          "[string][open] NormalizerAuto normalizer isn't available");
      return NULL;
    }
  }

  string = GRN_MALLOCN(grn_string, 1);
  if (!string) {
    if (is_normalizer_auto) {
      grn_obj_unlink(ctx, normalizer);
    }
    GRN_LOG(ctx, GRN_LOG_ALERT,
            "[string][open] failed to allocate memory");
    return NULL;
  }

  obj = (grn_obj *)string;
  GRN_OBJ_INIT(obj, GRN_STRING, GRN_OBJ_ALLOCATED, GRN_ID_NIL);
  string->original = str;
  string->original_length_in_bytes = str_len;
  string->normalized = NULL;
  string->normalized_length_in_bytes = 0;
  string->n_characters = 0;
  string->checks = NULL;
  string->ctypes = NULL;
  string->encoding = encoding;
  string->flags = flags;

  if (!normalizer) {
    return (grn_obj *)grn_fake_string_open(ctx, string);
  }

  grn_normalizer_normalize(ctx, normalizer, (grn_obj *)string);
  if (ctx->rc) {
    grn_obj_close(ctx, obj);
    obj = NULL;
  }

  if (is_normalizer_auto) {
    grn_obj_unlink(ctx, normalizer);
  }

  return obj;
}
Beispiel #8
0
static void
report_set_column_value_failure(grn_ctx *ctx,
                                grn_obj *key,
                                const char *column_name,
                                unsigned int column_name_size,
                                grn_obj *column_value)
{
  grn_obj key_inspected, column_value_inspected;

  GRN_TEXT_INIT(&key_inspected, 0);
  GRN_TEXT_INIT(&column_value_inspected, 0);
  grn_inspect_limited(ctx, &key_inspected, key);
  grn_inspect_limited(ctx, &column_value_inspected, column_value);
  GRN_LOG(ctx, GRN_LOG_ERROR,
          "[table][load] failed to set column value: %s: "
          "key: <%.*s>, column: <%.*s>, value: <%.*s>",
          ctx->errbuf,
          (int)GRN_TEXT_LEN(&key_inspected),
          GRN_TEXT_VALUE(&key_inspected),
          column_name_size,
          column_name,
          (int)GRN_TEXT_LEN(&column_value_inspected),
          GRN_TEXT_VALUE(&column_value_inspected));
  GRN_OBJ_FIN(ctx, &key_inspected);
  GRN_OBJ_FIN(ctx, &column_value_inspected);
}
Beispiel #9
0
grn_rc
grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len,
                   grn_encoding enc, grn_obj *normalizer, int flags)
{
  const char *norm;
  unsigned int norm_blen;
  int f = GRN_STR_REMOVEBLANK;
  memset(sc, 0, sizeof(snip_cond));
  if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len,
                                      normalizer, f))) {
    GRN_LOG(ctx, GRN_LOG_ALERT,
            "grn_string_open on snip_cond_init failed!");
    return GRN_NO_MEMORY_AVAILABLE;
  }
  grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL);
  if (!norm_blen) {
    grn_snip_cond_close(ctx, sc);
    return GRN_INVALID_ARGUMENT;
  }
  if (norm_blen != 1) {
    grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc);
    sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]];
    sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0;
  }
  return GRN_SUCCESS;
}
Beispiel #10
0
static grn_rc
ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit)
{
  grn_obj *str;
  int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES;
  grn_ngram_tokenizer *token;
  grn_obj_flags table_flags;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->uni_alpha = 1;
  token->uni_digit = 1;
  token->uni_symbol = 1;
  token->ngram_unit = ngram_unit;
  token->overlap = 0;
  token->pos = 0;
  token->skip = 0;
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  token->next = (unsigned char *)token->nstr->norm;
  token->end = token->next + token->nstr->norm_blen;
  token->ctypes = token->nstr->ctypes;
  token->len = token->nstr->length;
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Beispiel #11
0
void
grn_output_array_open(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type,
                      const char *name, int nelements)
{
  put_delimiter(ctx, outbuf, output_type);
  switch (output_type) {
  case GRN_CONTENT_JSON:
    GRN_TEXT_PUTC(ctx, outbuf, '[');
    break;
  case GRN_CONTENT_XML:
    GRN_TEXT_PUTC(ctx, outbuf, '<');
    GRN_TEXT_PUTS(ctx, outbuf, name);
    GRN_TEXT_PUTC(ctx, outbuf, '>');
    grn_vector_add_element(ctx, &ctx->impl->names, name, strlen(name), 0, GRN_DB_SHORT_TEXT);
    break;
  case GRN_CONTENT_TSV:
    if (DEPTH > 2) { GRN_TEXT_PUTS(ctx, outbuf, "[\t"); }
    break;
  case GRN_CONTENT_MSGPACK :
#ifdef HAVE_MESSAGE_PACK
    if (nelements < 0) {
      GRN_LOG(ctx, GRN_LOG_DEBUG,
              "grn_output_array_open nelements (%d) for <%s>",
              nelements,
              name);
    }
    msgpack_pack_array(&ctx->impl->msgpacker, nelements);
#endif
    break;
  case GRN_CONTENT_NONE:
    break;
  }
  INCR_DEPTH(0);
}
Beispiel #12
0
static void
load_synonyms(grn_ctx *ctx)
{
  static char path_env[GRN_ENV_BUFFER_SIZE];
  const char *path;
  grn_file_reader *file_reader;
  int number_of_lines;
  grn_encoding encoding;
  grn_obj line, key, value;

  grn_getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE",
             path_env,
             GRN_ENV_BUFFER_SIZE);
  if (path_env[0]) {
    path = path_env;
  } else {
    path = get_system_synonyms_file();
  }
  file_reader = grn_file_reader_open(ctx, path);
  if (!file_reader) {
    GRN_LOG(ctx, GRN_LOG_WARNING,
            "[plugin][query-expander][tsv] "
            "synonyms file doesn't exist: <%s>",
            path);
    return;
  }

  GRN_TEXT_INIT(&line, 0);
  GRN_TEXT_INIT(&key, 0);
  GRN_TEXT_INIT(&value, 0);
  grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES);
  number_of_lines = 0;
  while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) {
    const char *line_value = GRN_TEXT_VALUE(&line);
    size_t line_length = GRN_TEXT_LEN(&line);

    if (line_length > 0 && line_value[line_length - 1] == '\n') {
      if (line_length > 1 && line_value[line_length - 2] == '\r') {
        line_length -= 2;
      } else {
        line_length -= 1;
      }
    }
    number_of_lines++;
    if (number_of_lines == 1) {
      encoding = guess_encoding(ctx, &line_value, &line_length);
    }
    GRN_BULK_REWIND(&key);
    GRN_BULK_REWIND(&value);
    parse_synonyms_file_line(ctx, line_value, line_length, &key, &value);
    GRN_BULK_REWIND(&line);
  }
  GRN_OBJ_FIN(ctx, &line);
  GRN_OBJ_FIN(ctx, &key);
  GRN_OBJ_FIN(ctx, &value);

  grn_file_reader_close(ctx, file_reader);
}
Beispiel #13
0
bool mrn_hash_put(grn_ctx *ctx, grn_hash *hash, const char *key, grn_obj *value)
{
  int added;
  bool succeed;
  void *buf;
  grn_hash_add(ctx, hash, (const char *)key, strlen(key), &buf, &added);
  // duplicate check
  if (added == 0) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "hash put duplicated (key=%s)", key);
    succeed = false;
  } else {
    // store address of value
    memcpy(buf, &value, sizeof(grn_obj *));
    GRN_LOG(ctx, GRN_LOG_DEBUG, "hash put (key=%s)", key);
    succeed = true;
  }
  return succeed;
}
Beispiel #14
0
static grn_rc
alloc_snip_conds(grn_ctx *ctx, grn_query *q)
{
  if (!(q->snip_conds = GRN_CALLOC(sizeof(snip_cond) * q->cur_expr))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "snip_cond allocation failed");
    return GRN_NO_MEMORY_AVAILABLE;
  }
  return GRN_SUCCESS;
}
Beispiel #15
0
static void
grn_ctx_impl_clear_n_same_error_messagges(grn_ctx *ctx)
{
  if (ctx->impl->n_same_error_messages == 0) {
    return;
  }

  GRN_LOG(ctx, GRN_LOG_NOTICE, "(%u same messages are truncated)",
          ctx->impl->n_same_error_messages);
  ctx->impl->n_same_error_messages = 0;
}
Beispiel #16
0
bool mrn_hash_remove(grn_ctx *ctx, grn_hash *hash, const char *key)
{
  bool succeed;
  grn_rc rc;
  grn_id id;
  id = grn_hash_get(ctx, hash, (const char*) key, strlen(key), NULL);
  if (id == GRN_ID_NIL) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "hash remove not found (key=%s)", key);
    succeed = false;
  } else {
    rc = grn_hash_delete_by_id(ctx, hash, id, NULL);
    if (rc != GRN_SUCCESS) {
      GRN_LOG(ctx, GRN_LOG_ERROR, "hash remove error (key=%s)", key);
      succeed = false;
    } else {
      GRN_LOG(ctx, GRN_LOG_DEBUG, "hash remove (key=%s)", key);
      succeed = true;
    }
  }
  return succeed;
}
Beispiel #17
0
grn_rc
grn_com_event_poll(grn_ctx *ctx, grn_com_event *ev, int timeout)
{
  int nevents;
  grn_com *com;
#ifdef USE_SELECT
  uint32_t dummy;
  grn_sock *pfd;
  int nfds = 0;
  fd_set rfds;
  fd_set wfds;
  struct timeval tv;
  if (timeout >= 0) {
    tv.tv_sec = timeout / 1000;
    tv.tv_usec = (timeout % 1000) * 1000;
  }
  FD_ZERO(&rfds);
  FD_ZERO(&wfds);
  ctx->errlvl = GRN_OK;
  ctx->rc = GRN_SUCCESS;
  {
    grn_hash_cursor *cursor;
    cursor = grn_hash_cursor_open(ctx, ev->hash, NULL, 0, NULL, 0, 0, -1, 0);
    if (cursor) {
      grn_id id;
      while ((id = grn_hash_cursor_next(ctx, cursor))) {
        grn_hash_cursor_get_key_value(ctx,
                                      cursor,
                                      (void **)(&pfd),
                                      &dummy,
                                      (void **)(&com));
        if ((com->events & GRN_COM_POLLIN)) { FD_SET(*pfd, &rfds); }
        if ((com->events & GRN_COM_POLLOUT)) { FD_SET(*pfd, &wfds); }
# ifndef WIN32
        if (*pfd > nfds) { nfds = *pfd; }
# endif /* WIN32 */
      }
      grn_hash_cursor_close(ctx, cursor);
    }
  }
  nevents = select(nfds + 1, &rfds, &wfds, NULL, (timeout >= 0) ? &tv : NULL);
  if (nevents < 0) {
    SOERR("select");
    if (ctx->rc == GRN_INTERRUPTED_FUNCTION_CALL) { ERRCLR(ctx); }
    return ctx->rc;
  }
  if (timeout < 0 && !nevents) { GRN_LOG(ctx, GRN_LOG_NOTICE, "select returns 0 events"); }
  GRN_HASH_EACH(ctx, ev->hash, eh, &pfd, &dummy, &com, {
    if (FD_ISSET(*pfd, &rfds)) { grn_com_receiver(ctx, com); }
  });
Beispiel #18
0
static const char *
get_weight_vector(grn_ctx *ctx, grn_query *query, const char *source)
{
  const char *p;

  if (!query->opt.weight_vector &&
      !query->weight_set &&
      !(query->opt.weight_vector = GRN_CALLOC(sizeof(int) * DEFAULT_WEIGHT_VECTOR_SIZE))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "get_weight_vector malloc fail");
    return source;
  }
  for (p = source; p < query->str_end; ) {
    unsigned int key;
    int value;

    /* key, key is not zero */
    key = grn_atoui(p, query->str_end, &p);
    if (!key || key > GRN_ID_MAX) { break; }

    /* value */
    if (*p == ':') {
      p++;
      value = grn_atoi(p, query->str_end, &p);
    } else {
      value = 1;
    }

    if (query->weight_set) {
      int *pval;
      if (grn_hash_add(ctx, query->weight_set, &key, sizeof(unsigned int), (void **)&pval, NULL)) {
        *pval = value;
      }
    } else if (key < DEFAULT_WEIGHT_VECTOR_SIZE) {
      query->opt.weight_vector[key - 1] = value;
    } else {
      GRN_FREE(query->opt.weight_vector);
      query->opt.weight_vector = NULL;
      if (!(query->weight_set = grn_hash_create(ctx, NULL, sizeof(unsigned int), sizeof(int),
                                                0))) {
        return source;
      }
      p = source;           /* reparse */
      continue;
    }
    if (*p != ',') { break; }
    p++;
  }
  return p;
}
Beispiel #19
0
bool mrn_hash_get(grn_ctx *ctx, grn_hash *hash, const char *key, grn_obj **value)
{
  bool found;
  grn_id id;
  void *buf;
  id = grn_hash_get(ctx, hash, (const char *)key, strlen(key), &buf);
  // key not found
  if (id == GRN_ID_NIL) {
    GRN_LOG(ctx, GRN_LOG_DEBUG, "hash get not found (key=%s)", key);
    found = false;
  } else {
    // restore address of value
    memcpy(value, buf, sizeof(grn_obj *));
    found = true;
  }
  return found;
}
Beispiel #20
0
grn_rc
grn_com_event_add(grn_ctx *ctx, grn_com_event *ev, grn_sock fd, int events, grn_com **com)
{
    grn_com *c;
    /* todo : expand events */
    if (!ev || *ev->hash->n_entries == ev->max_nevents) {
        if (ev) {
            GRN_LOG(ctx, GRN_LOG_ERROR, "too many connections (%d)", ev->max_nevents);
        }
        return GRN_INVALID_ARGUMENT;
    }
#ifdef USE_EPOLL
    {
        struct epoll_event e;
        memset(&e, 0, sizeof(struct epoll_event));
        e.data.fd = (fd);
        e.events = (uint32_t) events;
        if (epoll_ctl(ev->epfd, EPOLL_CTL_ADD, (fd), &e) == -1) {
            SERR("epoll_ctl");
            return ctx->rc;
        }
    }
#endif /* USE_EPOLL*/
#ifdef USE_KQUEUE
    {
        struct kevent e;
        /* todo: udata should have fd */
        EV_SET(&e, (fd), events, EV_ADD, 0, 0, NULL);
        if (kevent(ev->kqfd, &e, 1, NULL, 0, NULL) == -1) {
            SERR("kevent");
            return ctx->rc;
        }
    }
#endif /* USE_KQUEUE */
    {
        if (grn_hash_add(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c, NULL)) {
            c->ev = ev;
            c->fd = fd;
            c->events = events;
            if (com) {
                *com = c;
            }
        }
    }
    return ctx->rc;
}
Beispiel #21
0
grn_rc
grn_com_event_mod(grn_ctx *ctx, grn_com_event *ev, grn_sock fd, int events, grn_com **com)
{
    grn_com *c;
    if (!ev) {
        return GRN_INVALID_ARGUMENT;
    }
    if (grn_hash_get(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c)) {
        if (c->fd != fd) {
            GRN_LOG(ctx, GRN_LOG_ERROR,
                    "grn_com_event_mod fd unmatch "
                    "%" GRN_FMT_SOCKET " != %" GRN_FMT_SOCKET,
                    c->fd, fd);
            return GRN_OBJECT_CORRUPT;
        }
        if (com) {
            *com = c;
        }
        if (c->events != events) {
#ifdef USE_EPOLL
            struct epoll_event e;
            memset(&e, 0, sizeof(struct epoll_event));
            e.data.fd = (fd);
            e.events = (uint32_t) events;
            if (epoll_ctl(ev->epfd, EPOLL_CTL_MOD, (fd), &e) == -1) {
                SERR("epoll_ctl");
                return ctx->rc;
            }
#endif /* USE_EPOLL*/
#ifdef USE_KQUEUE
            // experimental
            struct kevent e[2];
            EV_SET(&e[0], (fd), GRN_COM_POLLIN|GRN_COM_POLLOUT, EV_DELETE, 0, 0, NULL);
            EV_SET(&e[1], (fd), events, EV_ADD, 0, 0, NULL);
            if (kevent(ev->kqfd, e, 2, NULL, 0, NULL) == -1) {
                SERR("kevent");
                return ctx->rc;
            }
#endif /* USE_KQUEUE */
            c->events = events;
        }
        return GRN_SUCCESS;
    }
    return GRN_INVALID_ARGUMENT;
}
Beispiel #22
0
/* TODO: delete overlapping logic with exec_query */
static grn_rc
snip_query(grn_ctx *ctx, grn_query *q, grn_snip *snip, grn_cell *c, grn_operator op,
           unsigned int n_tags, int c_but,
           const char **opentags, unsigned int *opentag_lens,
           const char **closetags, unsigned int *closetag_lens)
{
  grn_cell *e, *ope = NIL;
  grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op;
  while (c != NIL) {
    POP(e, c);
    switch (e->header.type) {
    case GRN_CELL_OP :
      ope = e;
      op1 = ope->u.op.op;
      continue;
    case GRN_CELL_STR :
      if (ope != NIL) {
        q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode;
      } else {
        q->opt.mode = q->default_mode;
      }
      if (!(c_but ^ (*opp == GRN_OP_BUT))) {
        grn_rc rc;
        unsigned int i = snip->cond_len % n_tags;
        if ((rc = grn_snip_add_cond(ctx, snip, e->u.b.value, e->u.b.size,
                                    opentags[i], opentag_lens[i],
                                    closetags[i], closetag_lens[i]))) {
          return rc;
        }
      }
      break;
    case GRN_CELL_LIST :
      snip_query(ctx, q, snip, e, *opp, n_tags, (*opp == GRN_OP_BUT) ? c_but ^ 1 : c_but,
                 opentags, opentag_lens, closetags, closetag_lens);
      break;
    default :
      GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query!! (%d)", e->header.type);
      break;
    }
    opp = &op1;
    ope = NIL;
    op1 = q->default_op;
  }
  return GRN_SUCCESS;
}
Beispiel #23
0
void
grn_report_index(grn_ctx *ctx,
                 const char *action,
                 const char *tag,
                 grn_obj *index)
{
  char index_name[GRN_TABLE_MAX_KEY_SIZE];
  int index_name_size;

  if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) {
    return;
  }

  index_name_size = grn_obj_name(ctx, index, index_name, GRN_TABLE_MAX_KEY_SIZE);
  GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL,
          "%s[index]%s <%.*s>",
          action, tag, index_name_size, index_name);
}
Beispiel #24
0
grn_rc
grn_com_event_del(grn_ctx *ctx, grn_com_event *ev, grn_sock fd)
{
    if (!ev) {
        return GRN_INVALID_ARGUMENT;
    }
    {
        grn_com *c;
        grn_id id = grn_hash_get(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c);
        if (id) {
#ifdef USE_EPOLL
            if (!c->closed) {
                struct epoll_event e;
                memset(&e, 0, sizeof(struct epoll_event));
                e.data.fd = fd;
                e.events = c->events;
                if (epoll_ctl(ev->epfd, EPOLL_CTL_DEL, fd, &e) == -1) {
                    SERR("epoll_ctl");
                    return ctx->rc;
                }
            }
#endif /* USE_EPOLL*/
#ifdef USE_KQUEUE
            struct kevent e;
            EV_SET(&e, (fd), c->events, EV_DELETE, 0, 0, NULL);
            if (kevent(ev->kqfd, &e, 1, NULL, 0, NULL) == -1) {
                SERR("kevent");
                return ctx->rc;
            }
#endif /* USE_KQUEUE */
            return grn_hash_delete_by_id(ctx, ev->hash, id, NULL);
        } else {
            GRN_LOG(ctx, GRN_LOG_ERROR,
                    "%04x| fd(%" GRN_FMT_SOCKET ") not found in ev(%p)",
                    getpid(), fd, ev);
            return GRN_INVALID_ARGUMENT;
        }
    }
}
Beispiel #25
0
void
grn_report_table(grn_ctx *ctx,
                 const char *action,
                 const char *tag,
                 grn_obj *table)
{
  grn_obj description;
  grn_obj *target;

  if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) {
    return;
  }

  GRN_TEXT_INIT(&description, 0);
  for (target = table; target; target = grn_ctx_at(ctx, target->header.domain)) {
    char name[GRN_TABLE_MAX_KEY_SIZE];
    int name_size;

    name_size = grn_obj_name(ctx, target, name, GRN_TABLE_MAX_KEY_SIZE);
    if (GRN_TEXT_LEN(&description) > 0) {
      GRN_TEXT_PUTS(ctx, &description, " -> ");
    }
    if (name_size == 0) {
      GRN_TEXT_PUTS(ctx, &description, "(temporary)");
    } else {
      GRN_TEXT_PUTS(ctx, &description, "<");
      GRN_TEXT_PUT(ctx, &description, name, name_size);
      GRN_TEXT_PUTS(ctx, &description, ">");
    }
  }
  GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL,
          "%s[table]%s %.*s",
          action, tag,
          (int)GRN_TEXT_LEN(&description),
          GRN_TEXT_VALUE(&description));
  GRN_OBJ_FIN(ctx, &description);
}
Beispiel #26
0
static grn_rc
mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data)
{
  grn_obj *str;
  int nflags = 0;
  char *buf, *s, *p;
  char mecab_err[256];
  grn_obj_flags table_flags;
  grn_mecab_tokenizer *token;
  unsigned int bufsize, maxtrial = 10, len;
  if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; }
  SOLE_MECAB_CONFIRM;
  if (!sole_mecab) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init");
    return GRN_TOKENIZER_ERROR;
  }
  if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; }
  user_data->ptr = token;
  token->mecab = sole_mecab;
  // if (!(token->mecab = mecab_new3())) {
  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
                                    nflags, token->encoding))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open");
    return GRN_TOKENIZER_ERROR;
  }
  len = token->nstr->norm_blen;
  mecab_err[sizeof(mecab_err) - 1] = '\0';
  for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
    if(!(buf = GRN_MALLOC(bufsize + 1))) {
      GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
      GRN_FREE(token);
      return ctx->rc;
    }
    MUTEX_LOCK(sole_mecab_lock);
    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
    if (!s) {
      strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
    }
    MUTEX_UNLOCK(sole_mecab_lock);
    if (s) { break; }
    GRN_FREE(buf);
    if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
  }
  if (!s) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
            len, bufsize, mecab_err);
    GRN_FREE(token);
    return GRN_TOKENIZER_ERROR;
  }
  // certain version of mecab returns trailing lf or spaces.
  for (p = buf + strlen(buf) - 1;
       buf <= p && (*p == '\n' || isspace(*(unsigned char *)p));
       p--) { *p = '\0'; }
  //grn_log("sparsed='%s'", s);
  token->buf = (unsigned char *)buf;
  token->next = (unsigned char *)buf;
  token->end = (unsigned char *)buf + strlen(buf);
  GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
  GRN_UINT32_INIT(&token->stat_, 0);
  return GRN_SUCCESS;
}
Beispiel #27
0
grn_rc
grn_snip_exec(grn_ctx *ctx, grn_snip *snip, const char *string, unsigned int string_len,
              unsigned int *nresults, unsigned int *max_tagged_len)
{
  size_t i;
  int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK;
  if (!snip || !string || !nresults || !max_tagged_len) {
    return GRN_INVALID_ARGUMENT;
  }
  GRN_API_ENTER;
  exec_clean(ctx, snip);
  *nresults = 0;
  snip->nstr = grn_string_open(ctx, string, string_len, snip->normalizer, f);
  if (!snip->nstr) {
    exec_clean(ctx, snip);
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !");
    GRN_API_RETURN(ctx->rc);
  }
  for (i = 0; i < snip->cond_len; i++) {
    grn_bm_tunedbm(ctx, snip->cond + i, snip->nstr, snip->flags);
  }

  {
    _snip_tag_result *tag_result = snip->tag_result;
    _snip_result *snip_result = snip->snip_result;
    size_t last_end_offset = 0, last_last_end_offset = 0;
    unsigned int unfound_cond_count = snip->cond_len;

    *max_tagged_len = 0;
    while (1) {
      size_t tagged_len = 0, last_tag_end = 0;
      int_least8_t all_stop = 1, found_cond = 0;
      snip_result->tag_count = 0;

      while (1) {
        size_t min_start_offset = (size_t) -1;
        size_t max_end_offset = 0;
        snip_cond *cond = NULL;

        /* get condition which have minimum offset and is not stopped */
        for (i = 0; i < snip->cond_len; i++) {
          if (snip->cond[i].stopflag == SNIPCOND_NONSTOP &&
              (min_start_offset > snip->cond[i].start_offset ||
               (min_start_offset == snip->cond[i].start_offset &&
                max_end_offset < snip->cond[i].end_offset))) {
            min_start_offset = snip->cond[i].start_offset;
            max_end_offset = snip->cond[i].end_offset;
            cond = &snip->cond[i];
          }
        }
        if (!cond) {
          break;
        }
        /* check whether condtion is the first condition in snippet */
        if (snip_result->tag_count == 0) {
          /* skip condition if the number of rest snippet field is smaller than */
          /* the number of unfound keywords. */
          if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
            int_least8_t exclude_other_cond = 1;
            for (i = 0; i < snip->cond_len; i++) {
              if ((snip->cond + i) != cond
                  && snip->cond[i].end_offset <= cond->start_offset + snip->width
                  && snip->cond[i].count == 0) {
                exclude_other_cond = 0;
              }
            }
            if (exclude_other_cond) {
              grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
              continue;
            }
          }
          snip_result->start_offset = cond->start_offset;
          snip_result->first_tag_result_idx = snip->tag_count;
        } else {
          if (cond->start_offset >= snip_result->start_offset + snip->width) {
            break;
          }
          /* check nesting to make valid HTML */
          /* ToDo: allow <test><te>te</te><st>st</st></test> */
          if (cond->start_offset < last_tag_end) {
            grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
            continue;
          }
        }
        if (cond->end_offset > snip_result->start_offset + snip->width) {
          /* If a keyword gets across a snippet, */
          /* it was skipped and never to be tagged. */
          cond->stopflag = SNIPCOND_ACROSS;
          grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
        } else {
          found_cond = 1;
          if (cond->count == 0) {
            unfound_cond_count--;
          }
          cond->count++;
          last_end_offset = cond->end_offset;

          tag_result->cond = cond;
          tag_result->start_offset = cond->start_offset;
          tag_result->end_offset = last_tag_end = cond->end_offset;

          snip_result->tag_count++;
          tag_result++;
          tagged_len += cond->opentag_len + cond->closetag_len;
          if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) {
            break;
          }
          grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags);
        }
      }
      if (!found_cond) {
        break;
      }
      if (snip_result->start_offset + last_end_offset < snip->width) {
        snip_result->start_offset = 0;
      } else {
        snip_result->start_offset =
          MAX(MIN
              ((snip_result->start_offset + last_end_offset - snip->width) / 2,
               string_len - snip->width), last_last_end_offset);
      }
      snip_result->start_offset =
        grn_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1);

      snip_result->end_offset = snip_result->start_offset + snip->width;
      if (snip_result->end_offset < string_len) {
        snip_result->end_offset =
          grn_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1);
      } else {
        snip_result->end_offset = string_len;
      }
      last_last_end_offset = snip_result->end_offset;

      if (snip->mapping == (grn_snip_mapping *) -1) {
        tagged_len +=
          count_mapped_chars(&string[snip_result->start_offset],
                             &string[snip_result->end_offset]) + 1;
      } else {
        tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
      }

      *max_tagged_len = MAX(*max_tagged_len, tagged_len);

      snip_result->last_tag_result_idx = snip->tag_count - 1;
      (*nresults)++;
      snip_result++;

      if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) {
        break;
      }
      for (i = 0; i < snip->cond_len; i++) {
        if (snip->cond[i].stopflag != SNIPCOND_STOP) {
          all_stop = 0;
          snip->cond[i].stopflag = SNIPCOND_NONSTOP;
        }
      }
      if (all_stop) {
        break;
      }
    }
  }
  snip->snip_count = *nresults;
  snip->string = string;

  snip->max_tagged_len = *max_tagged_len;

  GRN_API_RETURN(ctx->rc);
}
Beispiel #28
0
grn_snip *
grn_snip_open(grn_ctx *ctx, int flags, unsigned int width,
              unsigned int max_results,
              const char *defaultopentag, unsigned int defaultopentag_len,
              const char *defaultclosetag, unsigned int defaultclosetag_len,
              grn_snip_mapping *mapping)
{
  int copy_tag;
  grn_snip *ret = NULL;
  if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open");
    return NULL;
  }
  if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
    GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open");
    GRN_FREE(ret);
    return NULL;
  }
  GRN_API_ENTER;
  ret->encoding = ctx->encoding;
  ret->flags = flags;
  ret->width = width;
  ret->max_results = max_results;
  ret->defaultopentag = NULL;
  ret->defaultclosetag = NULL;

  copy_tag = flags & GRN_SNIP_COPY_TAG;
  if (grn_snip_set_default_tag(ctx,
                               &(ret->defaultopentag),
                               &(ret->defaultopentag_len),
                               defaultopentag, defaultopentag_len,
                               copy_tag)) {
    GRN_FREE(ret);
    GRN_API_RETURN(NULL);
  }

  if (grn_snip_set_default_tag(ctx,
                               &(ret->defaultclosetag),
                               &(ret->defaultclosetag_len),
                               defaultclosetag, defaultclosetag_len,
                               copy_tag)) {
    if (copy_tag && ret->defaultopentag) {
      GRN_FREE((void *)ret->defaultopentag);
    }
    GRN_FREE(ret);
    GRN_API_RETURN(NULL);
  }

  ret->cond_len = 0;
  ret->mapping = mapping;
  ret->nstr = NULL;
  ret->tag_count = 0;
  ret->snip_count = 0;
  if (ret->flags & GRN_SNIP_NORMALIZE) {
    ret->normalizer = GRN_NORMALIZER_AUTO;
  } else {
    ret->normalizer = NULL;
  }

  GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP);
  {
    grn_obj *db;
    grn_id id;
    db = grn_ctx_db(ctx);
    id = grn_obj_register(ctx, db, NULL, 0);
    DB_OBJ(ret)->header.domain = GRN_ID_NIL;
    DB_OBJ(ret)->range = GRN_ID_NIL;
    grn_db_obj_init(ctx, db, id, DB_OBJ(ret));
  }

  GRN_API_RETURN(ret);
}
Beispiel #29
0
static void
exec_search(grn_ctx *ctx, grn_ii *i, grn_query *q, grn_cell *c,
            grn_hash *r, grn_operator op)
{
  grn_hash *s;
  grn_cell *e, *ope = NIL;
  int n = *r->n_entries;
  grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op;
  if (!n && op != GRN_OP_OR) { return; }
  if (n) {
    s = grn_hash_create(ctx, NULL, r->key_size, r->value_size, r->obj.header.flags);
    s->obj.header.impl_flags = 0;
    s->obj.header.domain = r->obj.header.domain;
    s->obj.range = r->obj.range;
    s->obj.max_n_subrecs = r->obj.max_n_subrecs;
    s->obj.subrec_size = r->obj.subrec_size;
    s->obj.subrec_offset = r->obj.subrec_offset;
    s->obj.id = r->obj.id;
    s->obj.db = r->obj.db;
    s->obj.source = r->obj.source;
    s->obj.source_size = r->obj.source_size;
    /*
    grn_hook_entry entry;
    for (entry = 0; entry < N_HOOK_ENTRIES; entry++) {
      s->obj.hooks[entry] = NULL;
    }
    */
  } else {
    s = r;
  }
  while (c != NIL) {
    POP(e, c);
    switch (e->header.type) {
    case GRN_CELL_OP :
      if (opp == &op0 && e->u.op.op == GRN_OP_BUT) {
        POP(e, c);
      } else {
        ope = e;
        op1 = ope->u.op.op;
      }
      continue;
    case GRN_CELL_STR :
      if (ope != NIL) {
        q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode;
        q->opt.max_interval = q->opt.similarity_threshold = ope->u.op.option;
        if (!q->opt.weight_vector) {
          q->opt.vector_size = ope->u.op.weight + q->weight_offset;
        }
        if (ope->u.op.mode == GRN_OP_SIMILAR) {
          q->opt.max_interval = q->default_mode;
        }
      } else {
        q->opt.mode = q->default_mode;
        q->opt.max_interval = DEFAULT_MAX_INTERVAL;
        q->opt.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
        if (!q->opt.weight_vector) {
          q->opt.vector_size = DEFAULT_WEIGHT + q->weight_offset;
        }
      }
      if (grn_ii_select(ctx, i, e->u.b.value, e->u.b.size, s, *opp, &q->opt)) {
        GRN_LOG(ctx, GRN_LOG_ERROR, "grn_inv_select on exec_search failed !");
        return;
      }
      break;
    case GRN_CELL_LIST :
      exec_search(ctx, i, q, e, s, *opp);
      break;
    default :
      GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query (%d)", e->header.type);
      break;
    }
    opp = &op1;
    ope = NIL;
    op1 = q->default_op;
  }
  if (n) {
    grn_table_setoperation(ctx, (grn_obj *)r, (grn_obj *)s, (grn_obj *)r, op);
    grn_hash_close(ctx, s);
  }
}
Beispiel #30
0
/* TODO: delete overlapping logic with exec_query */
static grn_rc
scan_query(grn_ctx *ctx, grn_query *q, grn_str *nstr, grn_id section, grn_cell *c, snip_cond **sc,
           grn_operator op, int flags, int *found, int *score)
{
  int _found = 0, _score = 0;
  grn_cell *e, *ope = NIL;
  grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op;
  while (c != NIL) {
    POP(e, c);
    switch (e->header.type) {
    case GRN_CELL_OP :
      if (opp == &op0 && e->u.op.op == GRN_OP_BUT) {
        POP(e, c);
      } else {
        ope = e;
        op1 = ope->u.op.op;
      }
      continue;
    case GRN_CELL_STR :
      if (ope != NIL) {
        q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode;
        q->opt.max_interval = q->opt.similarity_threshold = ope->u.op.option;
        if (!q->opt.weight_vector) {
          q->opt.vector_size = ope->u.op.weight + q->weight_offset;
        }
      } else {
        q->opt.mode = q->default_mode;
        q->opt.max_interval = DEFAULT_MAX_INTERVAL;
        q->opt.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
        if (!q->opt.weight_vector) {
          q->opt.vector_size = DEFAULT_WEIGHT + q->weight_offset;
        }
      }
      if ((flags & GRN_QUERY_SCAN_ALLOCCONDS)) {
        grn_rc rc;
        /* NOTE: GRN_SNIP_NORMALIZE = GRN_QUERY_SCAN_NORMALIZE */
        if ((rc = grn_snip_cond_init(ctx, *sc, e->u.b.value, e->u.b.size,
                                     q->encoding, flags & GRN_SNIP_NORMALIZE))) {
          return rc;
        }
      } else {
        grn_snip_cond_reinit(*sc);
      }
      scan_keyword(*sc, nstr, section, *opp, &q->opt, &_found, &_score);
      (*sc)++;
      break;
    case GRN_CELL_LIST :
      scan_query(ctx, q, nstr, section, e, sc, *opp, flags, &_found, &_score);
      break;
    default :
      GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query! (%d)", e->header.type);
      break;
    }
    opp = &op1;
    ope = NIL;
    op1 = q->default_op;
  }
  switch (op) {
  case GRN_OP_OR :
    *found |= _found;
    *score += _score;
    break;
  case GRN_OP_AND :
    *found &= _found;
    *score += _score;
    break;
  case GRN_OP_BUT :
    *found &= !_found;
    break;
  case GRN_OP_ADJUST :
    *score += _score;
    break;
  default :
    break;
  }
  return GRN_SUCCESS;
}