static grn_obj * command_echo(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, grn_user_data *user_data) { grn_obj *var; char *input = NULL; unsigned int input_len = 0; var = grn_plugin_proc_get_var(ctx, user_data, "input", -1); GRN_PLUGIN_LOG(ctx, GRN_LOG_NOTICE, "[echo] input = %.*s", (int)GRN_TEXT_LEN(var), GRN_TEXT_VALUE(var)); if(GRN_TEXT_LEN(var) != 0) { input = GRN_TEXT_VALUE(var); input_len = GRN_TEXT_LEN(var); } grn_ctx_output_array_open(ctx, "RESULT", 2); grn_ctx_output_cstr(ctx, input); grn_ctx_output_int64(ctx, input_len); grn_ctx_output_array_close(ctx); return NULL; }
static void parse_synonyms_file_line(grn_ctx *ctx, const char *line, size_t line_length, grn_obj *key, grn_obj *value) { size_t i = 0; if (is_comment_mark(line[i])) { return; } while (i < line_length) { char character = line[i]; i++; if (character == '\t') { break; } GRN_TEXT_PUTC(ctx, key, character); } if (i == line_length) { return; } GRN_TEXT_PUTS(ctx, value, "(("); while (i < line_length) { char character = line[i]; i++; if (character == '\t') { GRN_TEXT_PUTS(ctx, value, ") OR ("); } else { GRN_TEXT_PUTC(ctx, value, character); } } GRN_TEXT_PUTS(ctx, value, "))"); { grn_id id; void *value_location = NULL; id = grn_hash_add(ctx, synonyms, GRN_TEXT_VALUE(key), GRN_TEXT_LEN(key), &value_location, NULL); if (id == GRN_ID_NIL) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[plugin][query-expander][tsv] " "failed to register key: <%.*s>", (int)GRN_TEXT_LEN(key), GRN_TEXT_VALUE(key)); return; } if (GRN_TEXT_LEN(value) <= MAX_SYNONYM_BYTES - 1) { GRN_TEXT_PUTC(ctx, value, '\0'); } else { grn_bulk_truncate(ctx, value, MAX_SYNONYM_BYTES - 1); GRN_TEXT_PUTC(ctx, value, '\0'); } grn_memcpy(value_location, GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value)); } }
static grn_obj * command_tag_synonym(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, GNUC_UNUSED grn_user_data *user_data) { GNUC_UNUSED grn_obj *flags = grn_ctx_pop(ctx); grn_obj *newvalue = grn_ctx_pop(ctx); grn_obj *oldvalue = grn_ctx_pop(ctx); GNUC_UNUSED grn_obj *id = grn_ctx_pop(ctx); grn_obj buf; grn_obj record; grn_obj *domain; grn_obj *table; grn_obj *column; int i,n; if (GRN_BULK_VSIZE(newvalue) == 0 || GRN_INT32_VALUE(flags) == 0) { return NULL; } table = grn_ctx_at(ctx, oldvalue->header.domain); if (table && !is_table(table)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "hooked column must be reference type"); return NULL; } column = grn_obj_column(ctx, table, SYNONYM_COLUMN_NAME, SYNONYM_COLUMN_NAME_LEN); if (!column) { GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING, "[tag-synonym] " "couldn't open synonym column"); return NULL; } GRN_TEXT_INIT(&buf, 0); domain = grn_ctx_at(ctx, newvalue->header.domain); if (domain && is_string(domain)) { GRN_RECORD_INIT(&record, GRN_OBJ_VECTOR, oldvalue->header.domain); grn_table_tokenize(ctx, table, GRN_TEXT_VALUE(newvalue), GRN_TEXT_LEN(newvalue), &record, GRN_TRUE); } else if (newvalue->header.type == GRN_UVECTOR) { record = *newvalue; } if (is_string(domain) || newvalue->header.type == GRN_UVECTOR) { grn_obj value; GRN_RECORD_INIT(newvalue, GRN_OBJ_VECTOR, oldvalue->header.domain); GRN_UINT32_INIT(&value, 0); n = grn_vector_size(ctx, &record); for (i = 0; i < n; i++) { grn_id tid; tid = grn_uvector_get_element(ctx, &record, i, NULL); GRN_BULK_REWIND(&value); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); } grn_uvector_add_element(ctx, newvalue, tid, 0); } grn_obj_unlink(ctx, &value); } else { grn_id tid; grn_obj value; tid = GRN_RECORD_VALUE(newvalue); GRN_UINT32_INIT(&value, 0); grn_obj_get_value(ctx, column, tid, &value); if (GRN_UINT32_VALUE(&value)) { GRN_PLUGIN_LOG(ctx, GRN_LOG_INFO, "[tag-synonym] " "changed: tid %d -> %d", tid, GRN_UINT32_VALUE(&value)); tid = GRN_UINT32_VALUE(&value); GRN_BULK_REWIND(newvalue); GRN_RECORD_SET(ctx, newvalue, tid); } grn_obj_unlink(ctx, &value); } grn_obj_unlink(ctx, &buf); return NULL; }
static grn_obj * command_tag_synonym_add(grn_ctx *ctx, GNUC_UNUSED int nargs, GNUC_UNUSED grn_obj **args, grn_user_data *user_data) { grn_obj *var, *proc, *table, *column; unsigned int nhooks = 0; char *table_name = NULL; unsigned int table_len = 0; char *column_name = NULL; unsigned int column_len = 0; var = grn_plugin_proc_get_var(ctx, user_data, "table", -1); if (GRN_TEXT_LEN(var) != 0) { table_name = GRN_TEXT_VALUE(var); table_len = GRN_TEXT_LEN(var); } var = grn_plugin_proc_get_var(ctx, user_data, "column", -1); if (GRN_TEXT_LEN(var) != 0) { column_name = GRN_TEXT_VALUE(var); column_len = GRN_TEXT_LEN(var); } table = grn_ctx_get(ctx, table_name, table_len); column = grn_obj_column(ctx, table, column_name, column_len); { grn_obj *range; grn_obj *col; grn_id range_id; range_id = grn_obj_get_range(ctx, column); range = grn_ctx_at(ctx, range_id); if (!range) { GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR, "[tag-synonym] " "hooked column must be reference type"); return NULL; } col = grn_obj_column(ctx, range, SYNONYM_COLUMN_NAME, SYNONYM_COLUMN_NAME_LEN); if (!col) { GRN_PLUGIN_LOG(ctx, GRN_LOG_ERROR, "[tag-synonym] " "couldn't open synonym column"); return NULL; } } proc = grn_ctx_get(ctx, "tag_synonym", -1); { grn_obj data; default_set_value_hook_data hook_data = { grn_obj_id(ctx, proc), 0 }; GRN_TEXT_INIT(&data, GRN_OBJ_DO_SHALLOW_COPY); GRN_TEXT_SET_REF(&data, &hook_data, sizeof(hook_data)); grn_obj_add_hook(ctx, column, GRN_HOOK_SET, 0, proc, &data); grn_obj_unlink(ctx, &data); } //grn_ctx_output_array_open(ctx, "RESULT", 1); nhooks = grn_obj_get_nhooks(ctx, column, GRN_HOOK_SET); grn_ctx_output_int32(ctx, nhooks); //grn_ctx_output_array_close(ctx); return NULL; }
/* This function is called for a full text search query or a document to be indexed. This means that both short/long strings are given. The return value of this function is ignored. When an error occurs in this function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS). */ static grn_obj * mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { char *buf, *p; const char *s; grn_mecab_tokenizer *tokenizer; unsigned int bufsize; grn_tokenizer_query *query; grn_obj *normalized_query; const char *normalized_string; unsigned int normalized_string_length; query = grn_tokenizer_query_open(ctx, nargs, args); if (!query) { return NULL; } if (!sole_mecab) { grn_plugin_mutex_lock(ctx, sole_mecab_mutex); if (!sole_mecab) { sole_mecab = mecab_new2("-Owakati"); if (!sole_mecab) { GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, "[tokenizer][mecab] " "mecab_new2() failed on mecab_init(): %s", mecab_strerror(NULL)); } else { sole_mecab_encoding = get_mecab_encoding(sole_mecab); } } grn_plugin_mutex_unlock(ctx, sole_mecab_mutex); } if (!sole_mecab) { grn_tokenizer_query_close(ctx, query); return NULL; } if (query->encoding != sole_mecab_encoding) { grn_tokenizer_query_close(ctx, query); GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, "[tokenizer][mecab] " "MeCab dictionary charset (%s) does not match " "the table encoding: <%s>", grn_enctostr(sole_mecab_encoding), grn_enctostr(query->encoding)); return NULL; } if (!(tokenizer = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_mecab_tokenizer)))) { grn_tokenizer_query_close(ctx, query); GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, "[tokenizer][mecab] " "memory allocation to grn_mecab_tokenizer failed"); return NULL; } tokenizer->mecab = sole_mecab; tokenizer->query = query; normalized_query = query->normalized_query; grn_string_get_normalized(ctx, normalized_query, &normalized_string, &normalized_string_length, NULL); tokenizer->have_tokenized_delimiter = grn_tokenizer_have_tokenized_delimiter(ctx, normalized_string, normalized_string_length, query->encoding); if (tokenizer->have_tokenized_delimiter) { tokenizer->buf = NULL; tokenizer->next = normalized_string; tokenizer->end = tokenizer->next + normalized_string_length; } else { grn_plugin_mutex_lock(ctx, sole_mecab_mutex); s = mecab_sparse_tostr2(tokenizer->mecab, normalized_string, normalized_string_length); if (!s) { GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, "[tokenizer][mecab] " "mecab_sparse_tostr() failed len=%d err=%s", normalized_string_length, mecab_strerror(tokenizer->mecab)); } else { bufsize = strlen(s) + 1; if (!(buf = GRN_PLUGIN_MALLOC(ctx, bufsize))) { GRN_PLUGIN_LOG(ctx, GRN_LOG_ALERT, "[tokenizer][mecab] " "buffer allocation on mecab_init failed !"); } else { memcpy(buf, s, bufsize); } } grn_plugin_mutex_unlock(ctx, sole_mecab_mutex); if (!s || !buf) { grn_tokenizer_query_close(ctx, tokenizer->query); GRN_PLUGIN_FREE(ctx, tokenizer); return NULL; } /* A certain version of mecab returns trailing lf or spaces. */ for (p = buf + bufsize - 2; buf <= p && isspace(*(unsigned char *)p); p--) { *p = '\0'; } tokenizer->buf = buf; tokenizer->next = buf; tokenizer->end = p + 1; } user_data->ptr = tokenizer; grn_tokenizer_token_init(ctx, &(tokenizer->token)); return NULL; }