Beispiel #1
0
inline static void
get_pragma(grn_ctx *ctx, grn_query *q)
{
  char *start, *end = q->cur;
  while (end < q->str_end && *end == GRN_QUERY_PREFIX) {
    if (++end >= q->str_end) { break; }
    switch (*end) {
    case 'E' :
      start = ++end;
      q->escalation_threshold = grn_atoi(start, q->str_end, (const char **)&end);
      while (end < q->str_end && (isdigit(*end) || *end == '-')) { end++; }
      if (*end == ',') {
        start = ++end;
        q->escalation_decaystep = grn_atoi(start, q->str_end, (const char **)&end);
      }
      q->cur = end;
      break;
    case 'D' :
      start = ++end;
      while (end < q->str_end && *end != GRN_QUERY_PREFIX && !grn_isspace(end, q->encoding)) {
        end++;
      }
      if (end > start) {
        switch (*start) {
        case 'O' :
          q->default_op = GRN_OP_OR;
          break;
        case GRN_QUERY_AND :
          q->default_op = GRN_OP_AND;
          break;
        case GRN_QUERY_BUT :
          q->default_op = GRN_OP_BUT;
          break;
        case GRN_QUERY_ADJ_INC :
          q->default_op = GRN_OP_ADJUST;
          break;
        }
      }
      q->cur = end;
      break;
    case 'W' :
      start = ++end;
      end = (char *)get_weight_vector(ctx, q, start);
      q->cur = end;
      break;
    }
  }
}
Beispiel #2
0
static grn_obj *
proc_table_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_obj *buf = args[0];
    grn_expr_var *vars;
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (nvars == 6) {
        grn_obj *table;
        grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[1].value),
                                       GRN_BULK_CURR(&vars[1].value), NULL);
        if (GRN_TEXT_LEN(&vars[0].value)) {
            flags |= GRN_OBJ_PERSISTENT;
        }
        table = grn_table_create(ctx,
                                 GRN_TEXT_VALUE(&vars[0].value),
                                 GRN_TEXT_LEN(&vars[0].value),
                                 NULL, flags,
                                 grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[2].value),
                                             GRN_TEXT_LEN(&vars[2].value)),
                                 grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value),
                                             GRN_TEXT_LEN(&vars[3].value)));
        if (table) {
            grn_obj_set_info(ctx, table,
                             GRN_INFO_DEFAULT_TOKENIZER,
                             grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[4].value),
                                         GRN_TEXT_LEN(&vars[4].value)));
            grn_obj_unlink(ctx, table);
        }
        GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true");
    }
    return buf;
}
Beispiel #3
0
inline static grn_cell *
get_op(grn_query *q, grn_operator op, int weight)
{
  char *start, *end = q->cur;
  int mode, option;
  switch (*end) {
  case 'S' :
    mode = GRN_OP_SIMILAR;
    start = ++end;
    option = grn_atoi(start, q->str_end, (const char **)&end);
    if (start == end) { option = DEFAULT_SIMILARITY_THRESHOLD; }
    q->cur = end;
    break;
  case 'N' :
    mode = GRN_OP_NEAR;
    start = ++end;
    option = grn_atoi(start, q->str_end, (const char **)&end);
    if (start == end) { option = DEFAULT_MAX_INTERVAL; }
    q->cur = end;
    break;
  case 'n' :
    mode = GRN_OP_NEAR2;
    start = ++end;
    option = grn_atoi(start, q->str_end, (const char **)&end);
    if (start == end) { option = DEFAULT_MAX_INTERVAL; }
    q->cur = end;
    break;
  case 'T' :
    mode = GRN_OP_TERM_EXTRACT;
    start = ++end;
    option = grn_atoi(start, q->str_end, (const char **)&end);
    if (start == end) { option = DEFAULT_TERM_EXTRACT_POLICY; }
    q->cur = end;
    break;
  case 'X' : /* force exact mode */
    op = GRN_OP_AND;
    mode = GRN_OP_EXACT;
    option = 0;
    start = ++end;
    q->cur = end;
    break;
  default :
    return NIL;
  }
  return op_new(q, op, weight, mode, option);
}
Beispiel #4
0
grn_rc
grn_str2timeval(const char *str, uint32_t str_len, grn_timeval *tv)
{
  struct tm tm;
  const char *r1, *r2, *rend = str + str_len;
  uint32_t uv;
  memset(&tm, 0, sizeof(struct tm));

  tm.tm_year = (int)grn_atoui(str, rend, &r1) - 1900;
  if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-')) {
    return GRN_INVALID_ARGUMENT;
  }
  r1++;
  tm.tm_mon = (int)grn_atoui(r1, rend, &r1) - 1;
  if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') ||
      tm.tm_mon < 0 || tm.tm_mon >= 12) { return GRN_INVALID_ARGUMENT; }
  r1++;
  tm.tm_mday = (int)grn_atoui(r1, rend, &r1);
  if ((r1 + 1) >= rend || *r1 != ' ' ||
      tm.tm_mday < 1 || tm.tm_mday > 31) { return GRN_INVALID_ARGUMENT; }

  tm.tm_hour = (int)grn_atoui(++r1, rend, &r2);
  if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' ||
      tm.tm_hour < 0 || tm.tm_hour >= 24) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2 + 1;
  tm.tm_min = (int)grn_atoui(r1, rend, &r2);
  if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' ||
      tm.tm_min < 0 || tm.tm_min >= 60) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2 + 1;
  tm.tm_sec = (int)grn_atoui(r1, rend, &r2);
  if (r1 == r2 ||
      tm.tm_sec < 0 || tm.tm_sec > 61 /* leap 2sec */) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2;
  tm.tm_yday = -1;
  tm.tm_isdst = -1;

  /* tm_yday is set appropriately (0-365) on successful completion. */
  tv->tv_sec = mktime(&tm);
  if (tm.tm_yday == -1) { return GRN_INVALID_ARGUMENT; }
  if ((r1 + 1) < rend && *r1 == '.') { r1++; }
  uv = grn_atoi(r1, rend, &r2);
  while (r2 < r1 + 6) {
    uv *= 10;
    r2++;
  }
  if (uv >= GRN_TIME_USEC_PER_SEC) { return GRN_INVALID_ARGUMENT; }
  tv->tv_nsec = GRN_TIME_USEC_TO_NSEC(uv);
  return GRN_SUCCESS;
}
Beispiel #5
0
static grn_obj *
proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_obj *buf = args[0];
    grn_expr_var *vars;
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (nvars == 6) {
        grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[2].value),
                                       GRN_BULK_CURR(&vars[2].value), NULL);
        grn_obj *column, *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[0].value),
                                              GRN_TEXT_LEN(&vars[0].value));
        grn_obj *type = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value),
                                    GRN_TEXT_LEN(&vars[3].value));
        if (GRN_TEXT_LEN(&vars[1].value)) {
            flags |= GRN_OBJ_PERSISTENT;
        }
        column = grn_column_create(ctx, table,
                                   GRN_TEXT_VALUE(&vars[1].value),
                                   GRN_TEXT_LEN(&vars[1].value),
                                   NULL, flags, type);
        if (column) {
            if (GRN_TEXT_LEN(&vars[4].value)) {
                grn_obj sources, source_ids, **p, **pe;
                GRN_PTR_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL);
                GRN_UINT32_INIT(&source_ids, GRN_OBJ_VECTOR);
                grn_obj_columns(ctx, type,
                                GRN_TEXT_VALUE(&vars[4].value),
                                GRN_TEXT_LEN(&vars[4].value),
                                &sources);
                p = (grn_obj **)GRN_BULK_HEAD(&sources);
                pe = (grn_obj **)GRN_BULK_CURR(&sources);
                while (p < pe) {
                    grn_id source_id = grn_obj_id(ctx, *p++);
                    if (source_id) {
                        GRN_UINT32_PUT(ctx, &source_ids, source_id);
                    }
                }
                if (GRN_BULK_VSIZE(&source_ids)) {
                    grn_obj_set_info(ctx, column, GRN_INFO_SOURCE, &source_ids);
                }
                GRN_OBJ_FIN(ctx, &source_ids);
                GRN_OBJ_FIN(ctx, &sources);
            }
            grn_obj_unlink(ctx, column);
        }
        GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true");
    }
    return buf;
}
Beispiel #6
0
grn_rc
grn_str2timeval(const char *str, uint32_t str_len, grn_timeval *tv)
{
  struct tm tm;
  const char *r1, *r2, *rend = str + str_len;
  uint32_t uv;
  memset(&tm, 0, sizeof(struct tm));

  tm.tm_year = (int)grn_atoui(str, rend, &r1) - 1900;
  if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') ||
      tm.tm_year < 0) { return GRN_INVALID_ARGUMENT; }
  r1++;
  tm.tm_mon = (int)grn_atoui(r1, rend, &r1) - 1;
  if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') ||
      tm.tm_mon < 0 || tm.tm_mon >= 12) { return GRN_INVALID_ARGUMENT; }
  r1++;
  tm.tm_mday = (int)grn_atoui(r1, rend, &r1);
  if ((r1 + 1) >= rend || *r1 != ' ' ||
      tm.tm_mday < 1 || tm.tm_mday > 31) { return GRN_INVALID_ARGUMENT; }

  tm.tm_hour = (int)grn_atoui(++r1, rend, &r2);
  if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' ||
      tm.tm_hour < 0 || tm.tm_hour >= 24) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2 + 1;
  tm.tm_min = (int)grn_atoui(r1, rend, &r2);
  if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' ||
      tm.tm_min < 0 || tm.tm_min >= 60) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2 + 1;
  tm.tm_sec = (int)grn_atoui(r1, rend, &r2);
  if (r1 == r2 ||
      tm.tm_sec < 0 || tm.tm_sec > 61 /* leap 2sec */) {
    return GRN_INVALID_ARGUMENT;
  }
  r1 = r2;

  if ((tv->tv_sec = (int32_t) mktime(&tm)) == -1) { return GRN_INVALID_ARGUMENT; }
  if ((r1 + 1) < rend && *r1 == '.') { r1++; }
  uv = grn_atoi(r1, rend, &r2);
  while (r2 < r1 + 6) {
    uv *= 10;
    r2++;
  }
  if (uv >= 1000000) { return GRN_INVALID_ARGUMENT; }
  tv->tv_usec = uv;
  return GRN_SUCCESS;
}
Beispiel #7
0
static grn_obj *
proc_select(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
{
    uint32_t nvars;
    grn_expr_var *vars;
    grn_obj *outbuf = args[0];
    grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL);
    if (nvars == 15) {
        int offset = GRN_TEXT_LEN(&vars[7].value)
                     ? grn_atoi(GRN_TEXT_VALUE(&vars[7].value), GRN_BULK_CURR(&vars[7].value), NULL)
                     : 0;
        int limit = GRN_TEXT_LEN(&vars[8].value)
                    ? grn_atoi(GRN_TEXT_VALUE(&vars[8].value), GRN_BULK_CURR(&vars[8].value), NULL)
                    : DEFAULT_LIMIT;
        char *output_columns = GRN_TEXT_VALUE(&vars[6].value);
        uint32_t output_columns_len = GRN_TEXT_LEN(&vars[6].value);
        if (!output_columns_len) {
            output_columns = DEFAULT_OUTPUT_COLUMNS;
            output_columns_len = strlen(DEFAULT_OUTPUT_COLUMNS);
        }
        grn_search(ctx, outbuf, GET_OTYPE(&vars[14].value),
                   GRN_TEXT_VALUE(&vars[0].value), GRN_TEXT_LEN(&vars[0].value),
                   GRN_TEXT_VALUE(&vars[1].value), GRN_TEXT_LEN(&vars[1].value),
                   GRN_TEXT_VALUE(&vars[2].value), GRN_TEXT_LEN(&vars[2].value),
                   GRN_TEXT_VALUE(&vars[3].value), GRN_TEXT_LEN(&vars[3].value),
                   GRN_TEXT_VALUE(&vars[4].value), GRN_TEXT_LEN(&vars[4].value),
                   GRN_TEXT_VALUE(&vars[5].value), GRN_TEXT_LEN(&vars[5].value),
                   output_columns, output_columns_len,
                   offset, limit,
                   GRN_TEXT_VALUE(&vars[9].value), GRN_TEXT_LEN(&vars[9].value),
                   GRN_TEXT_VALUE(&vars[10].value), GRN_TEXT_LEN(&vars[10].value),
                   GRN_TEXT_VALUE(&vars[11].value), GRN_TEXT_LEN(&vars[11].value),
                   grn_atoi(GRN_TEXT_VALUE(&vars[12].value), GRN_BULK_CURR(&vars[12].value), NULL),
                   grn_atoi(GRN_TEXT_VALUE(&vars[13].value), GRN_BULK_CURR(&vars[13].value), NULL));
    }
    return outbuf;
}
Beispiel #8
0
static const char *
get_weight_vector(grn_ctx *ctx, grn_query *query, const char *source)
{
  const char *p;

  if (!query->opt.weight_vector &&
      !query->weight_set &&
      !(query->opt.weight_vector = GRN_CALLOC(sizeof(int) * DEFAULT_WEIGHT_VECTOR_SIZE))) {
    GRN_LOG(ctx, GRN_LOG_ALERT, "get_weight_vector malloc fail");
    return source;
  }
  for (p = source; p < query->str_end; ) {
    unsigned int key;
    int value;

    /* key, key is not zero */
    key = grn_atoui(p, query->str_end, &p);
    if (!key || key > GRN_ID_MAX) { break; }

    /* value */
    if (*p == ':') {
      p++;
      value = grn_atoi(p, query->str_end, &p);
    } else {
      value = 1;
    }

    if (query->weight_set) {
      int *pval;
      if (grn_hash_add(ctx, query->weight_set, &key, sizeof(unsigned int), (void **)&pval, NULL)) {
        *pval = value;
      }
    } else if (key < DEFAULT_WEIGHT_VECTOR_SIZE) {
      query->opt.weight_vector[key - 1] = value;
    } else {
      GRN_FREE(query->opt.weight_vector);
      query->opt.weight_vector = NULL;
      if (!(query->weight_set = grn_hash_create(ctx, NULL, sizeof(unsigned int), sizeof(int),
                                                0))) {
        return source;
      }
      p = source;           /* reparse */
      continue;
    }
    if (*p != ',') { break; }
    p++;
  }
  return p;
}
Beispiel #9
0
/*
  This function initializes a plugin. This function fails if there is no
  dictionary that uses the context encoding of groonga.
 */
grn_rc
GRN_PLUGIN_INIT(grn_ctx *ctx)
{
  {
    char env[GRN_ENV_BUFFER_SIZE];

    grn_getenv("GRN_MECAB_CHUNKED_TOKENIZE_ENABLED",
               env,
               GRN_ENV_BUFFER_SIZE);
    grn_mecab_chunked_tokenize_enabled = (env[0] && strcmp(env, "yes") == 0);
  }

  {
    char env[GRN_ENV_BUFFER_SIZE];

    grn_getenv("GRN_MECAB_CHUNK_SIZE_THRESHOLD",
               env,
               GRN_ENV_BUFFER_SIZE);
    if (env[0]) {
      int threshold = -1;
      const char *end;
      const char *rest;

      end = env + strlen(env);
      threshold = grn_atoi(env, end, &rest);
      if (end > env && end == rest) {
        grn_mecab_chunk_size_threshold = threshold;
      }
    }
  }

  sole_mecab = NULL;
  sole_mecab_mutex = grn_plugin_mutex_open(ctx);
  if (!sole_mecab_mutex) {
    GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
                     "[tokenizer][mecab] grn_plugin_mutex_open() failed");
    return ctx->rc;
  }

  check_mecab_dictionary_encoding(ctx);
  return ctx->rc;
}