grn_rc grn_query_search(grn_ctx *ctx, grn_ii *i, grn_query *q, grn_hash *r, grn_operator op) { int p = q->escalation_threshold; // dump_query(q, q->expr, 0); // grn_log("escalation_threshold=%d", p); if (p >= 0 || (-p & 1)) { q->default_mode = GRN_OP_EXACT; exec_search(ctx, i, q, q->expr, r, op); GRN_LOG(ctx, GRN_LOG_INFO, "hits(exact)=%d", *r->n_entries); } if ((p >= 0) ? (p >= *r->n_entries) : (-p & 2)) { q->weight_offset -= q->escalation_decaystep; q->default_mode = GRN_OP_UNSPLIT; exec_search(ctx, i, q, q->expr, r, op); GRN_LOG(ctx, GRN_LOG_INFO, "hits(unsplit)=%d", *r->n_entries); } if ((p >= 0) ? (p >= *r->n_entries) : (-p & 4)) { q->weight_offset -= q->escalation_decaystep; q->default_mode = GRN_OP_PARTIAL; exec_search(ctx, i, q, q->expr, r, op); GRN_LOG(ctx, GRN_LOG_INFO, "hits(partial)=%d", *r->n_entries); } return GRN_SUCCESS; }
static void ngx_http_groonga_logger_reopen(grn_ctx *ctx, void *user_data) { GRN_LOG(ctx, GRN_LOG_NOTICE, "log will be closed."); ngx_reopen_files((ngx_cycle_t *)ngx_cycle, -1); GRN_LOG(ctx, GRN_LOG_NOTICE, "log opened."); }
static void default_logger_reopen(grn_ctx *ctx, void *user_data) { GRN_LOG(ctx, GRN_LOG_NOTICE, "log will be closed."); CRITICAL_SECTION_ENTER(default_logger_lock); if (default_logger_file) { fclose(default_logger_file); default_logger_file = NULL; } CRITICAL_SECTION_LEAVE(default_logger_lock); GRN_LOG(ctx, GRN_LOG_NOTICE, "log opened."); }
static grn_ra * _grn_ra_create(grn_ctx *ctx, grn_ra *ra, const char *path, unsigned int element_size) { grn_io *io; int max_segments, n_elm, w_elm; struct grn_ra_header *header; unsigned int actual_size; if (element_size > GRN_RA_SEGMENT_SIZE) { GRN_LOG(ctx, GRN_LOG_ERROR, "element_size too large (%d)", element_size); return NULL; } for (actual_size = 1; actual_size < element_size; actual_size *= 2) ; max_segments = ((GRN_ID_MAX + 1) / GRN_RA_SEGMENT_SIZE) * actual_size; io = grn_io_create(ctx, path, sizeof(struct grn_ra_header), GRN_RA_SEGMENT_SIZE, max_segments, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); if (!io) { return NULL; } header = grn_io_header(io); grn_io_set_type(io, GRN_COLUMN_FIX_SIZE); header->element_size = actual_size; n_elm = GRN_RA_SEGMENT_SIZE / header->element_size; for (w_elm = 22; (1 << w_elm) > n_elm; w_elm--); ra->io = io; ra->header = header; ra->element_mask = n_elm - 1; ra->element_width = w_elm; return ra; }
grn_query * grn_query_open(grn_ctx *ctx, const char *str, unsigned int str_len, grn_operator default_op, int max_exprs) { grn_query *q; int max_cells = max_exprs * 4; if (!(q = GRN_MALLOC(sizeof(grn_query) + max_cells * sizeof(grn_cell) + str_len + 1))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_query_open malloc fail"); return NULL; } q->header.type = GRN_QUERY; q->str = (char *)&q->cell_pool[max_cells]; memcpy(q->str, str, str_len); q->str[str_len] = '\0'; q->cur = q->str; q->str_end = q->str + str_len; q->default_op = default_op; q->encoding = ctx->encoding; q->max_exprs = max_exprs; q->max_cells = max_cells; q->cur_cell = 0; q->cur_expr = 0; q->escalation_threshold = GRN_DEFAULT_MATCH_ESCALATION_THRESHOLD; q->escalation_decaystep = DEFAULT_DECAYSTEP; q->weight_offset = 0; q->opt.weight_vector = NULL; q->weight_set = NULL; get_pragma(ctx, q); q->expr = get_expr(ctx, q); q->opt.vector_size = DEFAULT_WEIGHT_VECTOR_SIZE; q->opt.func = q->weight_set ? section_weight_cb : NULL; q->opt.func_arg = q->weight_set; q->snip_conds = NULL; return q; }
grn_rc grn_query_scan(grn_ctx *ctx, grn_query *q, const char **strs, unsigned int *str_lens, unsigned int nstrs, int flags, int *found, int *score) { unsigned int i; grn_rc rc; if (!q || !strs || !nstrs) { return GRN_INVALID_ARGUMENT; } *found = *score = 0; if (!q->snip_conds) { if ((rc = alloc_snip_conds(ctx, q))) { return rc; } flags |= GRN_QUERY_SCAN_ALLOCCONDS; } else if (flags & GRN_QUERY_SCAN_ALLOCCONDS) { GRN_LOG(ctx, GRN_LOG_WARNING, "invalid flags specified on grn_query_scan"); return GRN_INVALID_ARGUMENT; } for (i = 0; i < nstrs; i++) { grn_str *n; snip_cond *sc = q->snip_conds; int f = GRN_STR_WITH_CHECKS | GRN_STR_REMOVEBLANK; if (flags & GRN_QUERY_SCAN_NORMALIZE) { f |= GRN_STR_NORMALIZE; } n = grn_str_open(ctx, *(strs + i), *(str_lens + i), f); if (!n) { return GRN_NO_MEMORY_AVAILABLE; } if ((rc = scan_query(ctx, q, n, i + 1, q->expr, &sc, GRN_OP_OR, flags, found, score))) { grn_str_close(ctx, n); return rc; } flags &= ~GRN_QUERY_SCAN_ALLOCCONDS; grn_str_close(ctx, n); } return GRN_SUCCESS; }
grn_obj * grn_string_open_(grn_ctx *ctx, const char *str, unsigned int str_len, grn_obj *normalizer, int flags, grn_encoding encoding) { grn_string *string; grn_obj *obj; grn_bool is_normalizer_auto; if (!str || !str_len) { return NULL; } is_normalizer_auto = (normalizer == GRN_NORMALIZER_AUTO); if (is_normalizer_auto) { normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); if (!normalizer) { ERR(GRN_INVALID_ARGUMENT, "[string][open] NormalizerAuto normalizer isn't available"); return NULL; } } string = GRN_MALLOCN(grn_string, 1); if (!string) { if (is_normalizer_auto) { grn_obj_unlink(ctx, normalizer); } GRN_LOG(ctx, GRN_LOG_ALERT, "[string][open] failed to allocate memory"); return NULL; } obj = (grn_obj *)string; GRN_OBJ_INIT(obj, GRN_STRING, GRN_OBJ_ALLOCATED, GRN_ID_NIL); string->original = str; string->original_length_in_bytes = str_len; string->normalized = NULL; string->normalized_length_in_bytes = 0; string->n_characters = 0; string->checks = NULL; string->ctypes = NULL; string->encoding = encoding; string->flags = flags; if (!normalizer) { return (grn_obj *)grn_fake_string_open(ctx, string); } grn_normalizer_normalize(ctx, normalizer, (grn_obj *)string); if (ctx->rc) { grn_obj_close(ctx, obj); obj = NULL; } if (is_normalizer_auto) { grn_obj_unlink(ctx, normalizer); } return obj; }
static void report_set_column_value_failure(grn_ctx *ctx, grn_obj *key, const char *column_name, unsigned int column_name_size, grn_obj *column_value) { grn_obj key_inspected, column_value_inspected; GRN_TEXT_INIT(&key_inspected, 0); GRN_TEXT_INIT(&column_value_inspected, 0); grn_inspect_limited(ctx, &key_inspected, key); grn_inspect_limited(ctx, &column_value_inspected, column_value); GRN_LOG(ctx, GRN_LOG_ERROR, "[table][load] failed to set column value: %s: " "key: <%.*s>, column: <%.*s>, value: <%.*s>", ctx->errbuf, (int)GRN_TEXT_LEN(&key_inspected), GRN_TEXT_VALUE(&key_inspected), column_name_size, column_name, (int)GRN_TEXT_LEN(&column_value_inspected), GRN_TEXT_VALUE(&column_value_inspected)); GRN_OBJ_FIN(ctx, &key_inspected); GRN_OBJ_FIN(ctx, &column_value_inspected); }
grn_rc grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len, grn_encoding enc, grn_obj *normalizer, int flags) { const char *norm; unsigned int norm_blen; int f = GRN_STR_REMOVEBLANK; memset(sc, 0, sizeof(snip_cond)); if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len, normalizer, f))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on snip_cond_init failed!"); return GRN_NO_MEMORY_AVAILABLE; } grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL); if (!norm_blen) { grn_snip_cond_close(ctx, sc); return GRN_INVALID_ARGUMENT; } if (norm_blen != 1) { grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc); sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]]; sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0; } return GRN_SUCCESS; }
static grn_rc ngram_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data, uint8_t ngram_unit) { grn_obj *str; int nflags = GRN_STR_REMOVEBLANK|GRN_STR_WITH_CTYPES; grn_ngram_tokenizer *token; grn_obj_flags table_flags; if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; } if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return ctx->rc; } user_data->ptr = token; token->uni_alpha = 1; token->uni_digit = 1; token->uni_symbol = 1; token->ngram_unit = ngram_unit; token->overlap = 0; token->pos = 0; token->skip = 0; grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open"); return GRN_TOKENIZER_ERROR; } token->next = (unsigned char *)token->nstr->norm; token->end = token->next + token->nstr->norm_blen; token->ctypes = token->nstr->ctypes; token->len = token->nstr->length; GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return GRN_SUCCESS; }
void grn_output_array_open(grn_ctx *ctx, grn_obj *outbuf, grn_content_type output_type, const char *name, int nelements) { put_delimiter(ctx, outbuf, output_type); switch (output_type) { case GRN_CONTENT_JSON: GRN_TEXT_PUTC(ctx, outbuf, '['); break; case GRN_CONTENT_XML: GRN_TEXT_PUTC(ctx, outbuf, '<'); GRN_TEXT_PUTS(ctx, outbuf, name); GRN_TEXT_PUTC(ctx, outbuf, '>'); grn_vector_add_element(ctx, &ctx->impl->names, name, strlen(name), 0, GRN_DB_SHORT_TEXT); break; case GRN_CONTENT_TSV: if (DEPTH > 2) { GRN_TEXT_PUTS(ctx, outbuf, "[\t"); } break; case GRN_CONTENT_MSGPACK : #ifdef HAVE_MESSAGE_PACK if (nelements < 0) { GRN_LOG(ctx, GRN_LOG_DEBUG, "grn_output_array_open nelements (%d) for <%s>", nelements, name); } msgpack_pack_array(&ctx->impl->msgpacker, nelements); #endif break; case GRN_CONTENT_NONE: break; } INCR_DEPTH(0); }
static void load_synonyms(grn_ctx *ctx) { static char path_env[GRN_ENV_BUFFER_SIZE]; const char *path; grn_file_reader *file_reader; int number_of_lines; grn_encoding encoding; grn_obj line, key, value; grn_getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE", path_env, GRN_ENV_BUFFER_SIZE); if (path_env[0]) { path = path_env; } else { path = get_system_synonyms_file(); } file_reader = grn_file_reader_open(ctx, path); if (!file_reader) { GRN_LOG(ctx, GRN_LOG_WARNING, "[plugin][query-expander][tsv] " "synonyms file doesn't exist: <%s>", path); return; } GRN_TEXT_INIT(&line, 0); GRN_TEXT_INIT(&key, 0); GRN_TEXT_INIT(&value, 0); grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES); number_of_lines = 0; while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) { const char *line_value = GRN_TEXT_VALUE(&line); size_t line_length = GRN_TEXT_LEN(&line); if (line_length > 0 && line_value[line_length - 1] == '\n') { if (line_length > 1 && line_value[line_length - 2] == '\r') { line_length -= 2; } else { line_length -= 1; } } number_of_lines++; if (number_of_lines == 1) { encoding = guess_encoding(ctx, &line_value, &line_length); } GRN_BULK_REWIND(&key); GRN_BULK_REWIND(&value); parse_synonyms_file_line(ctx, line_value, line_length, &key, &value); GRN_BULK_REWIND(&line); } GRN_OBJ_FIN(ctx, &line); GRN_OBJ_FIN(ctx, &key); GRN_OBJ_FIN(ctx, &value); grn_file_reader_close(ctx, file_reader); }
bool mrn_hash_put(grn_ctx *ctx, grn_hash *hash, const char *key, grn_obj *value) { int added; bool succeed; void *buf; grn_hash_add(ctx, hash, (const char *)key, strlen(key), &buf, &added); // duplicate check if (added == 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "hash put duplicated (key=%s)", key); succeed = false; } else { // store address of value memcpy(buf, &value, sizeof(grn_obj *)); GRN_LOG(ctx, GRN_LOG_DEBUG, "hash put (key=%s)", key); succeed = true; } return succeed; }
static grn_rc alloc_snip_conds(grn_ctx *ctx, grn_query *q) { if (!(q->snip_conds = GRN_CALLOC(sizeof(snip_cond) * q->cur_expr))) { GRN_LOG(ctx, GRN_LOG_ALERT, "snip_cond allocation failed"); return GRN_NO_MEMORY_AVAILABLE; } return GRN_SUCCESS; }
static void grn_ctx_impl_clear_n_same_error_messagges(grn_ctx *ctx) { if (ctx->impl->n_same_error_messages == 0) { return; } GRN_LOG(ctx, GRN_LOG_NOTICE, "(%u same messages are truncated)", ctx->impl->n_same_error_messages); ctx->impl->n_same_error_messages = 0; }
bool mrn_hash_remove(grn_ctx *ctx, grn_hash *hash, const char *key) { bool succeed; grn_rc rc; grn_id id; id = grn_hash_get(ctx, hash, (const char*) key, strlen(key), NULL); if (id == GRN_ID_NIL) { GRN_LOG(ctx, GRN_LOG_WARNING, "hash remove not found (key=%s)", key); succeed = false; } else { rc = grn_hash_delete_by_id(ctx, hash, id, NULL); if (rc != GRN_SUCCESS) { GRN_LOG(ctx, GRN_LOG_ERROR, "hash remove error (key=%s)", key); succeed = false; } else { GRN_LOG(ctx, GRN_LOG_DEBUG, "hash remove (key=%s)", key); succeed = true; } } return succeed; }
grn_rc grn_com_event_poll(grn_ctx *ctx, grn_com_event *ev, int timeout) { int nevents; grn_com *com; #ifdef USE_SELECT uint32_t dummy; grn_sock *pfd; int nfds = 0; fd_set rfds; fd_set wfds; struct timeval tv; if (timeout >= 0) { tv.tv_sec = timeout / 1000; tv.tv_usec = (timeout % 1000) * 1000; } FD_ZERO(&rfds); FD_ZERO(&wfds); ctx->errlvl = GRN_OK; ctx->rc = GRN_SUCCESS; { grn_hash_cursor *cursor; cursor = grn_hash_cursor_open(ctx, ev->hash, NULL, 0, NULL, 0, 0, -1, 0); if (cursor) { grn_id id; while ((id = grn_hash_cursor_next(ctx, cursor))) { grn_hash_cursor_get_key_value(ctx, cursor, (void **)(&pfd), &dummy, (void **)(&com)); if ((com->events & GRN_COM_POLLIN)) { FD_SET(*pfd, &rfds); } if ((com->events & GRN_COM_POLLOUT)) { FD_SET(*pfd, &wfds); } # ifndef WIN32 if (*pfd > nfds) { nfds = *pfd; } # endif /* WIN32 */ } grn_hash_cursor_close(ctx, cursor); } } nevents = select(nfds + 1, &rfds, &wfds, NULL, (timeout >= 0) ? &tv : NULL); if (nevents < 0) { SOERR("select"); if (ctx->rc == GRN_INTERRUPTED_FUNCTION_CALL) { ERRCLR(ctx); } return ctx->rc; } if (timeout < 0 && !nevents) { GRN_LOG(ctx, GRN_LOG_NOTICE, "select returns 0 events"); } GRN_HASH_EACH(ctx, ev->hash, eh, &pfd, &dummy, &com, { if (FD_ISSET(*pfd, &rfds)) { grn_com_receiver(ctx, com); } });
static const char * get_weight_vector(grn_ctx *ctx, grn_query *query, const char *source) { const char *p; if (!query->opt.weight_vector && !query->weight_set && !(query->opt.weight_vector = GRN_CALLOC(sizeof(int) * DEFAULT_WEIGHT_VECTOR_SIZE))) { GRN_LOG(ctx, GRN_LOG_ALERT, "get_weight_vector malloc fail"); return source; } for (p = source; p < query->str_end; ) { unsigned int key; int value; /* key, key is not zero */ key = grn_atoui(p, query->str_end, &p); if (!key || key > GRN_ID_MAX) { break; } /* value */ if (*p == ':') { p++; value = grn_atoi(p, query->str_end, &p); } else { value = 1; } if (query->weight_set) { int *pval; if (grn_hash_add(ctx, query->weight_set, &key, sizeof(unsigned int), (void **)&pval, NULL)) { *pval = value; } } else if (key < DEFAULT_WEIGHT_VECTOR_SIZE) { query->opt.weight_vector[key - 1] = value; } else { GRN_FREE(query->opt.weight_vector); query->opt.weight_vector = NULL; if (!(query->weight_set = grn_hash_create(ctx, NULL, sizeof(unsigned int), sizeof(int), 0))) { return source; } p = source; /* reparse */ continue; } if (*p != ',') { break; } p++; } return p; }
bool mrn_hash_get(grn_ctx *ctx, grn_hash *hash, const char *key, grn_obj **value) { bool found; grn_id id; void *buf; id = grn_hash_get(ctx, hash, (const char *)key, strlen(key), &buf); // key not found if (id == GRN_ID_NIL) { GRN_LOG(ctx, GRN_LOG_DEBUG, "hash get not found (key=%s)", key); found = false; } else { // restore address of value memcpy(value, buf, sizeof(grn_obj *)); found = true; } return found; }
grn_rc grn_com_event_add(grn_ctx *ctx, grn_com_event *ev, grn_sock fd, int events, grn_com **com) { grn_com *c; /* todo : expand events */ if (!ev || *ev->hash->n_entries == ev->max_nevents) { if (ev) { GRN_LOG(ctx, GRN_LOG_ERROR, "too many connections (%d)", ev->max_nevents); } return GRN_INVALID_ARGUMENT; } #ifdef USE_EPOLL { struct epoll_event e; memset(&e, 0, sizeof(struct epoll_event)); e.data.fd = (fd); e.events = (uint32_t) events; if (epoll_ctl(ev->epfd, EPOLL_CTL_ADD, (fd), &e) == -1) { SERR("epoll_ctl"); return ctx->rc; } } #endif /* USE_EPOLL*/ #ifdef USE_KQUEUE { struct kevent e; /* todo: udata should have fd */ EV_SET(&e, (fd), events, EV_ADD, 0, 0, NULL); if (kevent(ev->kqfd, &e, 1, NULL, 0, NULL) == -1) { SERR("kevent"); return ctx->rc; } } #endif /* USE_KQUEUE */ { if (grn_hash_add(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c, NULL)) { c->ev = ev; c->fd = fd; c->events = events; if (com) { *com = c; } } } return ctx->rc; }
grn_rc grn_com_event_mod(grn_ctx *ctx, grn_com_event *ev, grn_sock fd, int events, grn_com **com) { grn_com *c; if (!ev) { return GRN_INVALID_ARGUMENT; } if (grn_hash_get(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c)) { if (c->fd != fd) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_com_event_mod fd unmatch " "%" GRN_FMT_SOCKET " != %" GRN_FMT_SOCKET, c->fd, fd); return GRN_OBJECT_CORRUPT; } if (com) { *com = c; } if (c->events != events) { #ifdef USE_EPOLL struct epoll_event e; memset(&e, 0, sizeof(struct epoll_event)); e.data.fd = (fd); e.events = (uint32_t) events; if (epoll_ctl(ev->epfd, EPOLL_CTL_MOD, (fd), &e) == -1) { SERR("epoll_ctl"); return ctx->rc; } #endif /* USE_EPOLL*/ #ifdef USE_KQUEUE // experimental struct kevent e[2]; EV_SET(&e[0], (fd), GRN_COM_POLLIN|GRN_COM_POLLOUT, EV_DELETE, 0, 0, NULL); EV_SET(&e[1], (fd), events, EV_ADD, 0, 0, NULL); if (kevent(ev->kqfd, e, 2, NULL, 0, NULL) == -1) { SERR("kevent"); return ctx->rc; } #endif /* USE_KQUEUE */ c->events = events; } return GRN_SUCCESS; } return GRN_INVALID_ARGUMENT; }
/* TODO: delete overlapping logic with exec_query */ static grn_rc snip_query(grn_ctx *ctx, grn_query *q, grn_snip *snip, grn_cell *c, grn_operator op, unsigned int n_tags, int c_but, const char **opentags, unsigned int *opentag_lens, const char **closetags, unsigned int *closetag_lens) { grn_cell *e, *ope = NIL; grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op; while (c != NIL) { POP(e, c); switch (e->header.type) { case GRN_CELL_OP : ope = e; op1 = ope->u.op.op; continue; case GRN_CELL_STR : if (ope != NIL) { q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode; } else { q->opt.mode = q->default_mode; } if (!(c_but ^ (*opp == GRN_OP_BUT))) { grn_rc rc; unsigned int i = snip->cond_len % n_tags; if ((rc = grn_snip_add_cond(ctx, snip, e->u.b.value, e->u.b.size, opentags[i], opentag_lens[i], closetags[i], closetag_lens[i]))) { return rc; } } break; case GRN_CELL_LIST : snip_query(ctx, q, snip, e, *opp, n_tags, (*opp == GRN_OP_BUT) ? c_but ^ 1 : c_but, opentags, opentag_lens, closetags, closetag_lens); break; default : GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query!! (%d)", e->header.type); break; } opp = &op1; ope = NIL; op1 = q->default_op; } return GRN_SUCCESS; }
void grn_report_index(grn_ctx *ctx, const char *action, const char *tag, grn_obj *index) { char index_name[GRN_TABLE_MAX_KEY_SIZE]; int index_name_size; if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) { return; } index_name_size = grn_obj_name(ctx, index, index_name, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL, "%s[index]%s <%.*s>", action, tag, index_name_size, index_name); }
grn_rc grn_com_event_del(grn_ctx *ctx, grn_com_event *ev, grn_sock fd) { if (!ev) { return GRN_INVALID_ARGUMENT; } { grn_com *c; grn_id id = grn_hash_get(ctx, ev->hash, &fd, sizeof(grn_sock), (void **)&c); if (id) { #ifdef USE_EPOLL if (!c->closed) { struct epoll_event e; memset(&e, 0, sizeof(struct epoll_event)); e.data.fd = fd; e.events = c->events; if (epoll_ctl(ev->epfd, EPOLL_CTL_DEL, fd, &e) == -1) { SERR("epoll_ctl"); return ctx->rc; } } #endif /* USE_EPOLL*/ #ifdef USE_KQUEUE struct kevent e; EV_SET(&e, (fd), c->events, EV_DELETE, 0, 0, NULL); if (kevent(ev->kqfd, &e, 1, NULL, 0, NULL) == -1) { SERR("kevent"); return ctx->rc; } #endif /* USE_KQUEUE */ return grn_hash_delete_by_id(ctx, ev->hash, id, NULL); } else { GRN_LOG(ctx, GRN_LOG_ERROR, "%04x| fd(%" GRN_FMT_SOCKET ") not found in ev(%p)", getpid(), fd, ev); return GRN_INVALID_ARGUMENT; } } }
void grn_report_table(grn_ctx *ctx, const char *action, const char *tag, grn_obj *table) { grn_obj description; grn_obj *target; if (!grn_logger_pass(ctx, GRN_REPORT_INDEX_LOG_LEVEL)) { return; } GRN_TEXT_INIT(&description, 0); for (target = table; target; target = grn_ctx_at(ctx, target->header.domain)) { char name[GRN_TABLE_MAX_KEY_SIZE]; int name_size; name_size = grn_obj_name(ctx, target, name, GRN_TABLE_MAX_KEY_SIZE); if (GRN_TEXT_LEN(&description) > 0) { GRN_TEXT_PUTS(ctx, &description, " -> "); } if (name_size == 0) { GRN_TEXT_PUTS(ctx, &description, "(temporary)"); } else { GRN_TEXT_PUTS(ctx, &description, "<"); GRN_TEXT_PUT(ctx, &description, name, name_size); GRN_TEXT_PUTS(ctx, &description, ">"); } } GRN_LOG(ctx, GRN_REPORT_INDEX_LOG_LEVEL, "%s[table]%s %.*s", action, tag, (int)GRN_TEXT_LEN(&description), GRN_TEXT_VALUE(&description)); GRN_OBJ_FIN(ctx, &description); }
static grn_rc mecab_init(grn_ctx *ctx, grn_obj *table, grn_proc_data *user_data) { grn_obj *str; int nflags = 0; char *buf, *s, *p; char mecab_err[256]; grn_obj_flags table_flags; grn_mecab_tokenizer *token; unsigned int bufsize, maxtrial = 10, len; if (!(str = grn_ctx_pop(ctx))) { return GRN_INVALID_ARGUMENT; } SOLE_MECAB_CONFIRM; if (!sole_mecab) { GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_new failed on grn_mecab_init"); return GRN_TOKENIZER_ERROR; } if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return ctx->rc; } user_data->ptr = token; token->mecab = sole_mecab; // if (!(token->mecab = mecab_new3())) { grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL); nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE); if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), nflags, token->encoding))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_str_open failed at grn_token_open"); return GRN_TOKENIZER_ERROR; } len = token->nstr->norm_blen; mecab_err[sizeof(mecab_err) - 1] = '\0'; for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) { if(!(buf = GRN_MALLOC(bufsize + 1))) { GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !"); GRN_FREE(token); return ctx->rc; } MUTEX_LOCK(sole_mecab_lock); s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize); if (!s) { strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1); } MUTEX_UNLOCK(sole_mecab_lock); if (s) { break; } GRN_FREE(buf); if (strstr(mecab_err, "output buffer overflow") == NULL) { break; } } if (!s) { GRN_LOG(ctx, GRN_LOG_ALERT, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s", len, bufsize, mecab_err); GRN_FREE(token); return GRN_TOKENIZER_ERROR; } // certain version of mecab returns trailing lf or spaces. for (p = buf + strlen(buf) - 1; buf <= p && (*p == '\n' || isspace(*(unsigned char *)p)); p--) { *p = '\0'; } //grn_log("sparsed='%s'", s); token->buf = (unsigned char *)buf; token->next = (unsigned char *)buf; token->end = (unsigned char *)buf + strlen(buf); GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY); GRN_UINT32_INIT(&token->stat_, 0); return GRN_SUCCESS; }
grn_rc grn_snip_exec(grn_ctx *ctx, grn_snip *snip, const char *string, unsigned int string_len, unsigned int *nresults, unsigned int *max_tagged_len) { size_t i; int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK; if (!snip || !string || !nresults || !max_tagged_len) { return GRN_INVALID_ARGUMENT; } GRN_API_ENTER; exec_clean(ctx, snip); *nresults = 0; snip->nstr = grn_string_open(ctx, string, string_len, snip->normalizer, f); if (!snip->nstr) { exec_clean(ctx, snip); GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !"); GRN_API_RETURN(ctx->rc); } for (i = 0; i < snip->cond_len; i++) { grn_bm_tunedbm(ctx, snip->cond + i, snip->nstr, snip->flags); } { _snip_tag_result *tag_result = snip->tag_result; _snip_result *snip_result = snip->snip_result; size_t last_end_offset = 0, last_last_end_offset = 0; unsigned int unfound_cond_count = snip->cond_len; *max_tagged_len = 0; while (1) { size_t tagged_len = 0, last_tag_end = 0; int_least8_t all_stop = 1, found_cond = 0; snip_result->tag_count = 0; while (1) { size_t min_start_offset = (size_t) -1; size_t max_end_offset = 0; snip_cond *cond = NULL; /* get condition which have minimum offset and is not stopped */ for (i = 0; i < snip->cond_len; i++) { if (snip->cond[i].stopflag == SNIPCOND_NONSTOP && (min_start_offset > snip->cond[i].start_offset || (min_start_offset == snip->cond[i].start_offset && max_end_offset < snip->cond[i].end_offset))) { min_start_offset = snip->cond[i].start_offset; max_end_offset = snip->cond[i].end_offset; cond = &snip->cond[i]; } } if (!cond) { break; } /* check whether condtion is the first condition in snippet */ if (snip_result->tag_count == 0) { /* skip condition if the number of rest snippet field is smaller than */ /* the number of unfound keywords. */ if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) { int_least8_t exclude_other_cond = 1; for (i = 0; i < snip->cond_len; i++) { if ((snip->cond + i) != cond && snip->cond[i].end_offset <= cond->start_offset + snip->width && snip->cond[i].count == 0) { exclude_other_cond = 0; } } if (exclude_other_cond) { grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); continue; } } snip_result->start_offset = cond->start_offset; snip_result->first_tag_result_idx = snip->tag_count; } else { if (cond->start_offset >= snip_result->start_offset + snip->width) { break; } /* check nesting to make valid HTML */ /* ToDo: allow <test><te>te</te><st>st</st></test> */ if (cond->start_offset < last_tag_end) { grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); continue; } } if (cond->end_offset > snip_result->start_offset + snip->width) { /* If a keyword gets across a snippet, */ /* it was skipped and never to be tagged. */ cond->stopflag = SNIPCOND_ACROSS; grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); } else { found_cond = 1; if (cond->count == 0) { unfound_cond_count--; } cond->count++; last_end_offset = cond->end_offset; tag_result->cond = cond; tag_result->start_offset = cond->start_offset; tag_result->end_offset = last_tag_end = cond->end_offset; snip_result->tag_count++; tag_result++; tagged_len += cond->opentag_len + cond->closetag_len; if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) { break; } grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); } } if (!found_cond) { break; } if (snip_result->start_offset + last_end_offset < snip->width) { snip_result->start_offset = 0; } else { snip_result->start_offset = MAX(MIN ((snip_result->start_offset + last_end_offset - snip->width) / 2, string_len - snip->width), last_last_end_offset); } snip_result->start_offset = grn_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1); snip_result->end_offset = snip_result->start_offset + snip->width; if (snip_result->end_offset < string_len) { snip_result->end_offset = grn_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1); } else { snip_result->end_offset = string_len; } last_last_end_offset = snip_result->end_offset; if (snip->mapping == (grn_snip_mapping *) -1) { tagged_len += count_mapped_chars(&string[snip_result->start_offset], &string[snip_result->end_offset]) + 1; } else { tagged_len += snip_result->end_offset - snip_result->start_offset + 1; } *max_tagged_len = MAX(*max_tagged_len, tagged_len); snip_result->last_tag_result_idx = snip->tag_count - 1; (*nresults)++; snip_result++; if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) { break; } for (i = 0; i < snip->cond_len; i++) { if (snip->cond[i].stopflag != SNIPCOND_STOP) { all_stop = 0; snip->cond[i].stopflag = SNIPCOND_NONSTOP; } } if (all_stop) { break; } } } snip->snip_count = *nresults; snip->string = string; snip->max_tagged_len = *max_tagged_len; GRN_API_RETURN(ctx->rc); }
grn_snip * grn_snip_open(grn_ctx *ctx, int flags, unsigned int width, unsigned int max_results, const char *defaultopentag, unsigned int defaultopentag_len, const char *defaultclosetag, unsigned int defaultclosetag_len, grn_snip_mapping *mapping) { int copy_tag; grn_snip *ret = NULL; if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open"); return NULL; } if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open"); GRN_FREE(ret); return NULL; } GRN_API_ENTER; ret->encoding = ctx->encoding; ret->flags = flags; ret->width = width; ret->max_results = max_results; ret->defaultopentag = NULL; ret->defaultclosetag = NULL; copy_tag = flags & GRN_SNIP_COPY_TAG; if (grn_snip_set_default_tag(ctx, &(ret->defaultopentag), &(ret->defaultopentag_len), defaultopentag, defaultopentag_len, copy_tag)) { GRN_FREE(ret); GRN_API_RETURN(NULL); } if (grn_snip_set_default_tag(ctx, &(ret->defaultclosetag), &(ret->defaultclosetag_len), defaultclosetag, defaultclosetag_len, copy_tag)) { if (copy_tag && ret->defaultopentag) { GRN_FREE((void *)ret->defaultopentag); } GRN_FREE(ret); GRN_API_RETURN(NULL); } ret->cond_len = 0; ret->mapping = mapping; ret->nstr = NULL; ret->tag_count = 0; ret->snip_count = 0; if (ret->flags & GRN_SNIP_NORMALIZE) { ret->normalizer = GRN_NORMALIZER_AUTO; } else { ret->normalizer = NULL; } GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP); { grn_obj *db; grn_id id; db = grn_ctx_db(ctx); id = grn_obj_register(ctx, db, NULL, 0); DB_OBJ(ret)->header.domain = GRN_ID_NIL; DB_OBJ(ret)->range = GRN_ID_NIL; grn_db_obj_init(ctx, db, id, DB_OBJ(ret)); } GRN_API_RETURN(ret); }
static void exec_search(grn_ctx *ctx, grn_ii *i, grn_query *q, grn_cell *c, grn_hash *r, grn_operator op) { grn_hash *s; grn_cell *e, *ope = NIL; int n = *r->n_entries; grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op; if (!n && op != GRN_OP_OR) { return; } if (n) { s = grn_hash_create(ctx, NULL, r->key_size, r->value_size, r->obj.header.flags); s->obj.header.impl_flags = 0; s->obj.header.domain = r->obj.header.domain; s->obj.range = r->obj.range; s->obj.max_n_subrecs = r->obj.max_n_subrecs; s->obj.subrec_size = r->obj.subrec_size; s->obj.subrec_offset = r->obj.subrec_offset; s->obj.id = r->obj.id; s->obj.db = r->obj.db; s->obj.source = r->obj.source; s->obj.source_size = r->obj.source_size; /* grn_hook_entry entry; for (entry = 0; entry < N_HOOK_ENTRIES; entry++) { s->obj.hooks[entry] = NULL; } */ } else { s = r; } while (c != NIL) { POP(e, c); switch (e->header.type) { case GRN_CELL_OP : if (opp == &op0 && e->u.op.op == GRN_OP_BUT) { POP(e, c); } else { ope = e; op1 = ope->u.op.op; } continue; case GRN_CELL_STR : if (ope != NIL) { q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode; q->opt.max_interval = q->opt.similarity_threshold = ope->u.op.option; if (!q->opt.weight_vector) { q->opt.vector_size = ope->u.op.weight + q->weight_offset; } if (ope->u.op.mode == GRN_OP_SIMILAR) { q->opt.max_interval = q->default_mode; } } else { q->opt.mode = q->default_mode; q->opt.max_interval = DEFAULT_MAX_INTERVAL; q->opt.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; if (!q->opt.weight_vector) { q->opt.vector_size = DEFAULT_WEIGHT + q->weight_offset; } } if (grn_ii_select(ctx, i, e->u.b.value, e->u.b.size, s, *opp, &q->opt)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_inv_select on exec_search failed !"); return; } break; case GRN_CELL_LIST : exec_search(ctx, i, q, e, s, *opp); break; default : GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query (%d)", e->header.type); break; } opp = &op1; ope = NIL; op1 = q->default_op; } if (n) { grn_table_setoperation(ctx, (grn_obj *)r, (grn_obj *)s, (grn_obj *)r, op); grn_hash_close(ctx, s); } }
/* TODO: delete overlapping logic with exec_query */ static grn_rc scan_query(grn_ctx *ctx, grn_query *q, grn_str *nstr, grn_id section, grn_cell *c, snip_cond **sc, grn_operator op, int flags, int *found, int *score) { int _found = 0, _score = 0; grn_cell *e, *ope = NIL; grn_operator op0 = GRN_OP_OR, *opp = &op0, op1 = q->default_op; while (c != NIL) { POP(e, c); switch (e->header.type) { case GRN_CELL_OP : if (opp == &op0 && e->u.op.op == GRN_OP_BUT) { POP(e, c); } else { ope = e; op1 = ope->u.op.op; } continue; case GRN_CELL_STR : if (ope != NIL) { q->opt.mode = ope->u.op.mode == -1 ? q->default_mode : ope->u.op.mode; q->opt.max_interval = q->opt.similarity_threshold = ope->u.op.option; if (!q->opt.weight_vector) { q->opt.vector_size = ope->u.op.weight + q->weight_offset; } } else { q->opt.mode = q->default_mode; q->opt.max_interval = DEFAULT_MAX_INTERVAL; q->opt.similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; if (!q->opt.weight_vector) { q->opt.vector_size = DEFAULT_WEIGHT + q->weight_offset; } } if ((flags & GRN_QUERY_SCAN_ALLOCCONDS)) { grn_rc rc; /* NOTE: GRN_SNIP_NORMALIZE = GRN_QUERY_SCAN_NORMALIZE */ if ((rc = grn_snip_cond_init(ctx, *sc, e->u.b.value, e->u.b.size, q->encoding, flags & GRN_SNIP_NORMALIZE))) { return rc; } } else { grn_snip_cond_reinit(*sc); } scan_keyword(*sc, nstr, section, *opp, &q->opt, &_found, &_score); (*sc)++; break; case GRN_CELL_LIST : scan_query(ctx, q, nstr, section, e, sc, *opp, flags, &_found, &_score); break; default : GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid object assigned in query! (%d)", e->header.type); break; } opp = &op1; ope = NIL; op1 = q->default_op; } switch (op) { case GRN_OP_OR : *found |= _found; *score += _score; break; case GRN_OP_AND : *found &= _found; *score += _score; break; case GRN_OP_BUT : *found &= !_found; break; case GRN_OP_ADJUST : *score += _score; break; default : break; } return GRN_SUCCESS; }