inline static void get_pragma(grn_ctx *ctx, grn_query *q) { char *start, *end = q->cur; while (end < q->str_end && *end == GRN_QUERY_PREFIX) { if (++end >= q->str_end) { break; } switch (*end) { case 'E' : start = ++end; q->escalation_threshold = grn_atoi(start, q->str_end, (const char **)&end); while (end < q->str_end && (isdigit(*end) || *end == '-')) { end++; } if (*end == ',') { start = ++end; q->escalation_decaystep = grn_atoi(start, q->str_end, (const char **)&end); } q->cur = end; break; case 'D' : start = ++end; while (end < q->str_end && *end != GRN_QUERY_PREFIX && !grn_isspace(end, q->encoding)) { end++; } if (end > start) { switch (*start) { case 'O' : q->default_op = GRN_OP_OR; break; case GRN_QUERY_AND : q->default_op = GRN_OP_AND; break; case GRN_QUERY_BUT : q->default_op = GRN_OP_BUT; break; case GRN_QUERY_ADJ_INC : q->default_op = GRN_OP_ADJUST; break; } } q->cur = end; break; case 'W' : start = ++end; end = (char *)get_weight_vector(ctx, q, start); q->cur = end; break; } } }
static grn_obj * proc_table_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { uint32_t nvars; grn_obj *buf = args[0]; grn_expr_var *vars; grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL); if (nvars == 6) { grn_obj *table; grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[1].value), GRN_BULK_CURR(&vars[1].value), NULL); if (GRN_TEXT_LEN(&vars[0].value)) { flags |= GRN_OBJ_PERSISTENT; } table = grn_table_create(ctx, GRN_TEXT_VALUE(&vars[0].value), GRN_TEXT_LEN(&vars[0].value), NULL, flags, grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[2].value), GRN_TEXT_LEN(&vars[2].value)), grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value), GRN_TEXT_LEN(&vars[3].value))); if (table) { grn_obj_set_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[4].value), GRN_TEXT_LEN(&vars[4].value))); grn_obj_unlink(ctx, table); } GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true"); } return buf; }
inline static grn_cell * get_op(grn_query *q, grn_operator op, int weight) { char *start, *end = q->cur; int mode, option; switch (*end) { case 'S' : mode = GRN_OP_SIMILAR; start = ++end; option = grn_atoi(start, q->str_end, (const char **)&end); if (start == end) { option = DEFAULT_SIMILARITY_THRESHOLD; } q->cur = end; break; case 'N' : mode = GRN_OP_NEAR; start = ++end; option = grn_atoi(start, q->str_end, (const char **)&end); if (start == end) { option = DEFAULT_MAX_INTERVAL; } q->cur = end; break; case 'n' : mode = GRN_OP_NEAR2; start = ++end; option = grn_atoi(start, q->str_end, (const char **)&end); if (start == end) { option = DEFAULT_MAX_INTERVAL; } q->cur = end; break; case 'T' : mode = GRN_OP_TERM_EXTRACT; start = ++end; option = grn_atoi(start, q->str_end, (const char **)&end); if (start == end) { option = DEFAULT_TERM_EXTRACT_POLICY; } q->cur = end; break; case 'X' : /* force exact mode */ op = GRN_OP_AND; mode = GRN_OP_EXACT; option = 0; start = ++end; q->cur = end; break; default : return NIL; } return op_new(q, op, weight, mode, option); }
grn_rc grn_str2timeval(const char *str, uint32_t str_len, grn_timeval *tv) { struct tm tm; const char *r1, *r2, *rend = str + str_len; uint32_t uv; memset(&tm, 0, sizeof(struct tm)); tm.tm_year = (int)grn_atoui(str, rend, &r1) - 1900; if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-')) { return GRN_INVALID_ARGUMENT; } r1++; tm.tm_mon = (int)grn_atoui(r1, rend, &r1) - 1; if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') || tm.tm_mon < 0 || tm.tm_mon >= 12) { return GRN_INVALID_ARGUMENT; } r1++; tm.tm_mday = (int)grn_atoui(r1, rend, &r1); if ((r1 + 1) >= rend || *r1 != ' ' || tm.tm_mday < 1 || tm.tm_mday > 31) { return GRN_INVALID_ARGUMENT; } tm.tm_hour = (int)grn_atoui(++r1, rend, &r2); if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' || tm.tm_hour < 0 || tm.tm_hour >= 24) { return GRN_INVALID_ARGUMENT; } r1 = r2 + 1; tm.tm_min = (int)grn_atoui(r1, rend, &r2); if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' || tm.tm_min < 0 || tm.tm_min >= 60) { return GRN_INVALID_ARGUMENT; } r1 = r2 + 1; tm.tm_sec = (int)grn_atoui(r1, rend, &r2); if (r1 == r2 || tm.tm_sec < 0 || tm.tm_sec > 61 /* leap 2sec */) { return GRN_INVALID_ARGUMENT; } r1 = r2; tm.tm_yday = -1; tm.tm_isdst = -1; /* tm_yday is set appropriately (0-365) on successful completion. */ tv->tv_sec = mktime(&tm); if (tm.tm_yday == -1) { return GRN_INVALID_ARGUMENT; } if ((r1 + 1) < rend && *r1 == '.') { r1++; } uv = grn_atoi(r1, rend, &r2); while (r2 < r1 + 6) { uv *= 10; r2++; } if (uv >= GRN_TIME_USEC_PER_SEC) { return GRN_INVALID_ARGUMENT; } tv->tv_nsec = GRN_TIME_USEC_TO_NSEC(uv); return GRN_SUCCESS; }
static grn_obj * proc_column_create(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { uint32_t nvars; grn_obj *buf = args[0]; grn_expr_var *vars; grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL); if (nvars == 6) { grn_obj_flags flags = grn_atoi(GRN_TEXT_VALUE(&vars[2].value), GRN_BULK_CURR(&vars[2].value), NULL); grn_obj *column, *table = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[0].value), GRN_TEXT_LEN(&vars[0].value)); grn_obj *type = grn_ctx_get(ctx, GRN_TEXT_VALUE(&vars[3].value), GRN_TEXT_LEN(&vars[3].value)); if (GRN_TEXT_LEN(&vars[1].value)) { flags |= GRN_OBJ_PERSISTENT; } column = grn_column_create(ctx, table, GRN_TEXT_VALUE(&vars[1].value), GRN_TEXT_LEN(&vars[1].value), NULL, flags, type); if (column) { if (GRN_TEXT_LEN(&vars[4].value)) { grn_obj sources, source_ids, **p, **pe; GRN_PTR_INIT(&sources, GRN_OBJ_VECTOR, GRN_ID_NIL); GRN_UINT32_INIT(&source_ids, GRN_OBJ_VECTOR); grn_obj_columns(ctx, type, GRN_TEXT_VALUE(&vars[4].value), GRN_TEXT_LEN(&vars[4].value), &sources); p = (grn_obj **)GRN_BULK_HEAD(&sources); pe = (grn_obj **)GRN_BULK_CURR(&sources); while (p < pe) { grn_id source_id = grn_obj_id(ctx, *p++); if (source_id) { GRN_UINT32_PUT(ctx, &source_ids, source_id); } } if (GRN_BULK_VSIZE(&source_ids)) { grn_obj_set_info(ctx, column, GRN_INFO_SOURCE, &source_ids); } GRN_OBJ_FIN(ctx, &source_ids); GRN_OBJ_FIN(ctx, &sources); } grn_obj_unlink(ctx, column); } GRN_TEXT_PUTS(ctx, buf, ctx->rc ? "false" : "true"); } return buf; }
grn_rc grn_str2timeval(const char *str, uint32_t str_len, grn_timeval *tv) { struct tm tm; const char *r1, *r2, *rend = str + str_len; uint32_t uv; memset(&tm, 0, sizeof(struct tm)); tm.tm_year = (int)grn_atoui(str, rend, &r1) - 1900; if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') || tm.tm_year < 0) { return GRN_INVALID_ARGUMENT; } r1++; tm.tm_mon = (int)grn_atoui(r1, rend, &r1) - 1; if ((r1 + 1) >= rend || (*r1 != '/' && *r1 != '-') || tm.tm_mon < 0 || tm.tm_mon >= 12) { return GRN_INVALID_ARGUMENT; } r1++; tm.tm_mday = (int)grn_atoui(r1, rend, &r1); if ((r1 + 1) >= rend || *r1 != ' ' || tm.tm_mday < 1 || tm.tm_mday > 31) { return GRN_INVALID_ARGUMENT; } tm.tm_hour = (int)grn_atoui(++r1, rend, &r2); if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' || tm.tm_hour < 0 || tm.tm_hour >= 24) { return GRN_INVALID_ARGUMENT; } r1 = r2 + 1; tm.tm_min = (int)grn_atoui(r1, rend, &r2); if ((r2 + 1) >= rend || r1 == r2 || *r2 != ':' || tm.tm_min < 0 || tm.tm_min >= 60) { return GRN_INVALID_ARGUMENT; } r1 = r2 + 1; tm.tm_sec = (int)grn_atoui(r1, rend, &r2); if (r1 == r2 || tm.tm_sec < 0 || tm.tm_sec > 61 /* leap 2sec */) { return GRN_INVALID_ARGUMENT; } r1 = r2; if ((tv->tv_sec = (int32_t) mktime(&tm)) == -1) { return GRN_INVALID_ARGUMENT; } if ((r1 + 1) < rend && *r1 == '.') { r1++; } uv = grn_atoi(r1, rend, &r2); while (r2 < r1 + 6) { uv *= 10; r2++; } if (uv >= 1000000) { return GRN_INVALID_ARGUMENT; } tv->tv_usec = uv; return GRN_SUCCESS; }
static grn_obj * proc_select(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { uint32_t nvars; grn_expr_var *vars; grn_obj *outbuf = args[0]; grn_proc_get_info(ctx, user_data, &vars, &nvars, NULL); if (nvars == 15) { int offset = GRN_TEXT_LEN(&vars[7].value) ? grn_atoi(GRN_TEXT_VALUE(&vars[7].value), GRN_BULK_CURR(&vars[7].value), NULL) : 0; int limit = GRN_TEXT_LEN(&vars[8].value) ? grn_atoi(GRN_TEXT_VALUE(&vars[8].value), GRN_BULK_CURR(&vars[8].value), NULL) : DEFAULT_LIMIT; char *output_columns = GRN_TEXT_VALUE(&vars[6].value); uint32_t output_columns_len = GRN_TEXT_LEN(&vars[6].value); if (!output_columns_len) { output_columns = DEFAULT_OUTPUT_COLUMNS; output_columns_len = strlen(DEFAULT_OUTPUT_COLUMNS); } grn_search(ctx, outbuf, GET_OTYPE(&vars[14].value), GRN_TEXT_VALUE(&vars[0].value), GRN_TEXT_LEN(&vars[0].value), GRN_TEXT_VALUE(&vars[1].value), GRN_TEXT_LEN(&vars[1].value), GRN_TEXT_VALUE(&vars[2].value), GRN_TEXT_LEN(&vars[2].value), GRN_TEXT_VALUE(&vars[3].value), GRN_TEXT_LEN(&vars[3].value), GRN_TEXT_VALUE(&vars[4].value), GRN_TEXT_LEN(&vars[4].value), GRN_TEXT_VALUE(&vars[5].value), GRN_TEXT_LEN(&vars[5].value), output_columns, output_columns_len, offset, limit, GRN_TEXT_VALUE(&vars[9].value), GRN_TEXT_LEN(&vars[9].value), GRN_TEXT_VALUE(&vars[10].value), GRN_TEXT_LEN(&vars[10].value), GRN_TEXT_VALUE(&vars[11].value), GRN_TEXT_LEN(&vars[11].value), grn_atoi(GRN_TEXT_VALUE(&vars[12].value), GRN_BULK_CURR(&vars[12].value), NULL), grn_atoi(GRN_TEXT_VALUE(&vars[13].value), GRN_BULK_CURR(&vars[13].value), NULL)); } return outbuf; }
static const char * get_weight_vector(grn_ctx *ctx, grn_query *query, const char *source) { const char *p; if (!query->opt.weight_vector && !query->weight_set && !(query->opt.weight_vector = GRN_CALLOC(sizeof(int) * DEFAULT_WEIGHT_VECTOR_SIZE))) { GRN_LOG(ctx, GRN_LOG_ALERT, "get_weight_vector malloc fail"); return source; } for (p = source; p < query->str_end; ) { unsigned int key; int value; /* key, key is not zero */ key = grn_atoui(p, query->str_end, &p); if (!key || key > GRN_ID_MAX) { break; } /* value */ if (*p == ':') { p++; value = grn_atoi(p, query->str_end, &p); } else { value = 1; } if (query->weight_set) { int *pval; if (grn_hash_add(ctx, query->weight_set, &key, sizeof(unsigned int), (void **)&pval, NULL)) { *pval = value; } } else if (key < DEFAULT_WEIGHT_VECTOR_SIZE) { query->opt.weight_vector[key - 1] = value; } else { GRN_FREE(query->opt.weight_vector); query->opt.weight_vector = NULL; if (!(query->weight_set = grn_hash_create(ctx, NULL, sizeof(unsigned int), sizeof(int), 0))) { return source; } p = source; /* reparse */ continue; } if (*p != ',') { break; } p++; } return p; }
/* This function initializes a plugin. This function fails if there is no dictionary that uses the context encoding of groonga. */ grn_rc GRN_PLUGIN_INIT(grn_ctx *ctx) { { char env[GRN_ENV_BUFFER_SIZE]; grn_getenv("GRN_MECAB_CHUNKED_TOKENIZE_ENABLED", env, GRN_ENV_BUFFER_SIZE); grn_mecab_chunked_tokenize_enabled = (env[0] && strcmp(env, "yes") == 0); } { char env[GRN_ENV_BUFFER_SIZE]; grn_getenv("GRN_MECAB_CHUNK_SIZE_THRESHOLD", env, GRN_ENV_BUFFER_SIZE); if (env[0]) { int threshold = -1; const char *end; const char *rest; end = env + strlen(env); threshold = grn_atoi(env, end, &rest); if (end > env && end == rest) { grn_mecab_chunk_size_threshold = threshold; } } } sole_mecab = NULL; sole_mecab_mutex = grn_plugin_mutex_open(ctx); if (!sole_mecab_mutex) { GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "[tokenizer][mecab] grn_plugin_mutex_open() failed"); return ctx->rc; } check_mecab_dictionary_encoding(ctx); return ctx->rc; }