grn_id grn_token_next(grn_ctx *ctx, grn_token *token) { int status; grn_id tid = GRN_ID_NIL; grn_obj *table = token->table; grn_obj *tokenizer = token->tokenizer; while (token->status != GRN_TOKEN_DONE) { if (tokenizer) { grn_obj *curr_, *stat_; ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token->pctx.user_data); stat_ = grn_ctx_pop(ctx); curr_ = grn_ctx_pop(ctx); token->curr = (const unsigned char *)GRN_TEXT_VALUE(curr_); token->curr_size = GRN_TEXT_LEN(curr_); status = GRN_UINT32_VALUE(stat_); token->status = ((status & GRN_TOKENIZER_TOKEN_LAST) || (token->mode == GRN_TOKEN_GET && (status & GRN_TOKENIZER_TOKEN_REACH_END))) ? GRN_TOKEN_DONE : GRN_TOKEN_DOING; token->force_prefix = 0; if (token->curr_size == 0) { char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE]; int tokenizer_name_length; tokenizer_name_length = grn_obj_name(ctx, token->tokenizer, tokenizer_name, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_WARN, "[token_next] ignore an empty token: <%.*s>: <%.*s>", tokenizer_name_length, tokenizer_name, token->orig_blen, token->orig); continue; } if (token->curr_size > GRN_TABLE_MAX_KEY_SIZE) { GRN_LOG(ctx, GRN_WARN, "[token_next] ignore too long token. " "Token must be less than or equal to %d: <%d>(<%.*s>)", GRN_TABLE_MAX_KEY_SIZE, token->curr_size, token->curr_size, token->curr); continue; } if (status & GRN_TOKENIZER_TOKEN_UNMATURED) { if (status & GRN_TOKENIZER_TOKEN_OVERLAP) { if (token->mode == GRN_TOKEN_GET) { token->pos++; continue; } } else { if (status & GRN_TOKENIZER_TOKEN_LAST) { token->force_prefix = 1; } } } } else { token->status = GRN_TOKEN_DONE; } if (token->mode == GRN_TOKEN_ADD) { switch (table->header.type) { case GRN_TABLE_PAT_KEY : if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_pat_add(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL, NULL); grn_io_unlock(((grn_pat *)table)->io); } break; case GRN_TABLE_DAT_KEY : if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_dat_add(ctx, (grn_dat *)table, token->curr, token->curr_size, NULL, NULL); grn_io_unlock(((grn_dat *)table)->io); } break; case GRN_TABLE_HASH_KEY : if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_hash_add(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL, NULL); grn_io_unlock(((grn_hash *)table)->io); } break; case GRN_TABLE_NO_KEY : if (token->curr_size == sizeof(grn_id)) { tid = *((grn_id *)token->curr); } else { tid = GRN_ID_NIL; } break; } } else { switch (table->header.type) { case GRN_TABLE_PAT_KEY : tid = grn_pat_get(ctx, (grn_pat *)table, token->curr, token->curr_size, NULL); break; case GRN_TABLE_DAT_KEY : tid = grn_dat_get(ctx, (grn_dat *)table, token->curr, token->curr_size, NULL); break; case GRN_TABLE_HASH_KEY : tid = grn_hash_get(ctx, (grn_hash *)table, token->curr, token->curr_size, NULL); break; case GRN_TABLE_NO_KEY : if (token->curr_size == sizeof(grn_id)) { tid = *((grn_id *)token->curr); } else { tid = GRN_ID_NIL; } break; } } if (tid == GRN_ID_NIL && token->status != GRN_TOKEN_DONE) { token->status = GRN_TOKEN_NOT_FOUND; } token->pos++; break; } return tid; }
grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) { int status; grn_id tid = GRN_ID_NIL; grn_obj *table = token_cursor->table; grn_obj *tokenizer = token_cursor->tokenizer; while (token_cursor->status != GRN_TOKEN_CURSOR_DONE) { if (tokenizer) { grn_obj *curr_, *stat_; ((grn_proc *)tokenizer)->funcs[PROC_NEXT](ctx, 1, &table, &token_cursor->pctx.user_data); stat_ = grn_ctx_pop(ctx); curr_ = grn_ctx_pop(ctx); status = grn_token_cursor_next_apply_token_filters(ctx, token_cursor, curr_, stat_); token_cursor->status = ((status & GRN_TOKEN_LAST) || (token_cursor->mode == GRN_TOKENIZE_GET && (status & GRN_TOKEN_REACH_END))) ? GRN_TOKEN_CURSOR_DONE : GRN_TOKEN_CURSOR_DOING; token_cursor->force_prefix = GRN_FALSE; #define SKIP_FLAGS \ (GRN_TOKEN_SKIP | GRN_TOKEN_SKIP_WITH_POSITION) if (status & SKIP_FLAGS) { if (status & GRN_TOKEN_SKIP) { token_cursor->pos++; } if (token_cursor->status == GRN_TOKEN_CURSOR_DONE && tid == GRN_ID_NIL) { token_cursor->status = GRN_TOKEN_CURSOR_DONE_SKIP; break; } else { continue; } } #undef SKIP_FLAGS if (status & GRN_TOKEN_FORCE_PREFIX) { token_cursor->force_prefix = GRN_TRUE; } if (token_cursor->curr_size == 0) { if (token_cursor->status != GRN_TOKEN_CURSOR_DONE) { char tokenizer_name[GRN_TABLE_MAX_KEY_SIZE]; int tokenizer_name_length; tokenizer_name_length = grn_obj_name(ctx, token_cursor->tokenizer, tokenizer_name, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_WARN, "[token_next] ignore an empty token: <%.*s>: <%.*s>", tokenizer_name_length, tokenizer_name, token_cursor->orig_blen, token_cursor->orig); } continue; } if (token_cursor->curr_size > GRN_TABLE_MAX_KEY_SIZE) { GRN_LOG(ctx, GRN_WARN, "[token_next] ignore too long token. " "Token must be less than or equal to %d: <%d>(<%.*s>)", GRN_TABLE_MAX_KEY_SIZE, token_cursor->curr_size, token_cursor->curr_size, token_cursor->curr); continue; } if (status & GRN_TOKEN_UNMATURED) { if (status & GRN_TOKEN_OVERLAP) { if (token_cursor->mode == GRN_TOKENIZE_GET) { token_cursor->pos++; continue; } } else { if (status & GRN_TOKEN_REACH_END) { token_cursor->force_prefix = GRN_TRUE; } } } } else { token_cursor->status = GRN_TOKEN_CURSOR_DONE; } if (token_cursor->mode == GRN_TOKENIZE_ADD) { switch (table->header.type) { case GRN_TABLE_PAT_KEY : if (grn_io_lock(ctx, ((grn_pat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_pat_add(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_pat *)table)->io); } break; case GRN_TABLE_DAT_KEY : if (grn_io_lock(ctx, ((grn_dat *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_dat_add(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_dat *)table)->io); } break; case GRN_TABLE_HASH_KEY : if (grn_io_lock(ctx, ((grn_hash *)table)->io, grn_lock_timeout)) { tid = GRN_ID_NIL; } else { tid = grn_hash_add(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL, NULL); grn_io_unlock(((grn_hash *)table)->io); } break; case GRN_TABLE_NO_KEY : if (token_cursor->curr_size == sizeof(grn_id)) { tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } break; } } else if (token_cursor->mode != GRN_TOKENIZE_ONLY) { switch (table->header.type) { case GRN_TABLE_PAT_KEY : tid = grn_pat_get(ctx, (grn_pat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_DAT_KEY : tid = grn_dat_get(ctx, (grn_dat *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_HASH_KEY : tid = grn_hash_get(ctx, (grn_hash *)table, token_cursor->curr, token_cursor->curr_size, NULL); break; case GRN_TABLE_NO_KEY : if (token_cursor->curr_size == sizeof(grn_id)) { tid = *((grn_id *)token_cursor->curr); } else { tid = GRN_ID_NIL; } break; } } if (token_cursor->mode != GRN_TOKENIZE_ONLY && tid == GRN_ID_NIL && token_cursor->status != GRN_TOKEN_CURSOR_DONE) { token_cursor->status = GRN_TOKEN_CURSOR_NOT_FOUND; } token_cursor->pos++; break; } return tid; }