static void scan_keyword(snip_cond *sc, grn_str *str, grn_id section, grn_operator op, grn_select_optarg *optarg, int *found, int *score) { int tf; int w = 1; for (tf = 0; ; tf++) { grn_bm_tunedbm(sc, str, 0); if (sc->stopflag == SNIPCOND_STOP) { break; } } if (optarg->vector_size) { if (!optarg->weight_vector) { w = optarg->vector_size; } else if (section) { w = (section <= optarg->vector_size ? optarg->weight_vector[section - 1] : 0); } } switch (op) { case GRN_OP_OR : if (tf) { *found = 1; *score += w * tf; } break; case GRN_OP_AND : if (tf) { *score += w * tf; } else { *found = 0; } break; case GRN_OP_BUT : if (tf) { *found = 0; } break; case GRN_OP_ADJUST : *score += w * tf; default : break; } }
grn_rc grn_snip_exec(grn_ctx *ctx, grn_snip *snip, const char *string, unsigned int string_len, unsigned int *nresults, unsigned int *max_tagged_len) { size_t i; int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK; if (!snip || !string || !nresults || !max_tagged_len) { return GRN_INVALID_ARGUMENT; } GRN_API_ENTER; exec_clean(ctx, snip); *nresults = 0; snip->nstr = grn_string_open(ctx, string, string_len, snip->normalizer, f); if (!snip->nstr) { exec_clean(ctx, snip); GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !"); GRN_API_RETURN(ctx->rc); } for (i = 0; i < snip->cond_len; i++) { grn_bm_tunedbm(ctx, snip->cond + i, snip->nstr, snip->flags); } { _snip_tag_result *tag_result = snip->tag_result; _snip_result *snip_result = snip->snip_result; size_t last_end_offset = 0, last_last_end_offset = 0; unsigned int unfound_cond_count = snip->cond_len; *max_tagged_len = 0; while (1) { size_t tagged_len = 0, last_tag_end = 0; int_least8_t all_stop = 1, found_cond = 0; snip_result->tag_count = 0; while (1) { size_t min_start_offset = (size_t) -1; size_t max_end_offset = 0; snip_cond *cond = NULL; /* get condition which have minimum offset and is not stopped */ for (i = 0; i < snip->cond_len; i++) { if (snip->cond[i].stopflag == SNIPCOND_NONSTOP && (min_start_offset > snip->cond[i].start_offset || (min_start_offset == snip->cond[i].start_offset && max_end_offset < snip->cond[i].end_offset))) { min_start_offset = snip->cond[i].start_offset; max_end_offset = snip->cond[i].end_offset; cond = &snip->cond[i]; } } if (!cond) { break; } /* check whether condtion is the first condition in snippet */ if (snip_result->tag_count == 0) { /* skip condition if the number of rest snippet field is smaller than */ /* the number of unfound keywords. */ if (snip->max_results - *nresults <= unfound_cond_count && cond->count > 0) { int_least8_t exclude_other_cond = 1; for (i = 0; i < snip->cond_len; i++) { if ((snip->cond + i) != cond && snip->cond[i].end_offset <= cond->start_offset + snip->width && snip->cond[i].count == 0) { exclude_other_cond = 0; } } if (exclude_other_cond) { grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); continue; } } snip_result->start_offset = cond->start_offset; snip_result->first_tag_result_idx = snip->tag_count; } else { if (cond->start_offset >= snip_result->start_offset + snip->width) { break; } /* check nesting to make valid HTML */ /* ToDo: allow <test><te>te</te><st>st</st></test> */ if (cond->start_offset < last_tag_end) { grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); continue; } } if (cond->end_offset > snip_result->start_offset + snip->width) { /* If a keyword gets across a snippet, */ /* it was skipped and never to be tagged. */ cond->stopflag = SNIPCOND_ACROSS; grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); } else { found_cond = 1; if (cond->count == 0) { unfound_cond_count--; } cond->count++; last_end_offset = cond->end_offset; tag_result->cond = cond; tag_result->start_offset = cond->start_offset; tag_result->end_offset = last_tag_end = cond->end_offset; snip_result->tag_count++; tag_result++; tagged_len += cond->opentag_len + cond->closetag_len; if (++snip->tag_count >= MAX_SNIP_TAG_COUNT) { break; } grn_bm_tunedbm(ctx, cond, snip->nstr, snip->flags); } } if (!found_cond) { break; } if (snip_result->start_offset + last_end_offset < snip->width) { snip_result->start_offset = 0; } else { snip_result->start_offset = MAX(MIN ((snip_result->start_offset + last_end_offset - snip->width) / 2, string_len - snip->width), last_last_end_offset); } snip_result->start_offset = grn_snip_find_firstbyte(string, snip->encoding, snip_result->start_offset, 1); snip_result->end_offset = snip_result->start_offset + snip->width; if (snip_result->end_offset < string_len) { snip_result->end_offset = grn_snip_find_firstbyte(string, snip->encoding, snip_result->end_offset, -1); } else { snip_result->end_offset = string_len; } last_last_end_offset = snip_result->end_offset; if (snip->mapping == (grn_snip_mapping *) -1) { tagged_len += count_mapped_chars(&string[snip_result->start_offset], &string[snip_result->end_offset]) + 1; } else { tagged_len += snip_result->end_offset - snip_result->start_offset + 1; } *max_tagged_len = MAX(*max_tagged_len, tagged_len); snip_result->last_tag_result_idx = snip->tag_count - 1; (*nresults)++; snip_result++; if (*nresults == snip->max_results || snip->tag_count == MAX_SNIP_TAG_COUNT) { break; } for (i = 0; i < snip->cond_len; i++) { if (snip->cond[i].stopflag != SNIPCOND_STOP) { all_stop = 0; snip->cond[i].stopflag = SNIPCOND_NONSTOP; } } if (all_stop) { break; } } } snip->snip_count = *nresults; snip->string = string; snip->max_tagged_len = *max_tagged_len; GRN_API_RETURN(ctx->rc); }