N *Tokenizer<N, P>::lookup(const char *begin, const char *end, Allocator<N, P> *allocator, Lattice *lattice) const { CharInfo cinfo; N *result_node = 0; size_t mblen = 0; size_t clen = 0; end = static_cast<size_t>(end - begin) >= 65535 ? begin + 65535 : end; if (isPartial) { const size_t begin_pos = begin - lattice->sentence(); for (size_t n = begin_pos + 1; n < lattice->size(); ++n) { if (lattice->boundary_constraint(n) == MECAB_TOKEN_BOUNDARY) { end = lattice->sentence() + n; break; } } } const char *begin2 = property_.seekToOtherType(begin, end, space_, &cinfo, &mblen, &clen); Dictionary::result_type *daresults = allocator->mutable_results(); const size_t results_size = allocator->results_size(); for (std::vector<Dictionary *>::const_iterator it = dic_.begin(); it != dic_.end(); ++it) { const size_t n = (*it)->commonPrefixSearch( begin2, static_cast<size_t>(end - begin2), daresults, results_size); for (size_t i = 0; i < n; ++i) { size_t size = (*it)->token_size(daresults[i]); const Token *token = (*it)->token(daresults[i]); for (size_t j = 0; j < size; ++j) { N *new_node = allocator->newNode(); read_node_info(**it, *(token + j), &new_node); new_node->length = daresults[i].length; new_node->rlength = begin2 - begin + new_node->length; new_node->surface = begin2; new_node->stat = MECAB_NOR_NODE; new_node->char_type = cinfo.default_type; if (isPartial && !is_valid_node(lattice, new_node)) { continue; } new_node->bnext = result_node; result_node = new_node; } } } if (result_node && !cinfo.invoke) { return result_node; } const char *begin3 = begin2 + mblen; const char *group_begin3 = 0; if (begin3 > end) { ADDUNKNWON; if (result_node) { return result_node; } } if (cinfo.group) { const char *tmp = begin3; CharInfo fail; begin3 = property_.seekToOtherType(begin3, end, cinfo, &fail, &mblen, &clen); if (clen <= max_grouping_size_) { ADDUNKNWON; } group_begin3 = begin3; begin3 = tmp; } for (size_t i = 1; i <= cinfo.length; ++i) { if (begin3 > end) { break; } if (begin3 == group_begin3) { continue; } clen = i; ADDUNKNWON; if (!cinfo.isKindOf(property_.getCharInfo(begin3, end, &mblen))) { break; } begin3 += mblen; } if (!result_node) { ADDUNKNWON; } if (isPartial && !result_node) { begin3 = begin2; while (true) { cinfo = property_.getCharInfo(begin3, end, &mblen); begin3 += mblen; if (begin3 > end || lattice->boundary_constraint(begin3 - lattice->sentence()) != MECAB_INSIDE_TOKEN) { break; } } ADDUNKNWON; if (!result_node) { N *new_node = allocator->newNode(); new_node->char_type = cinfo.default_type; new_node->surface = begin2; new_node->length = begin3 - begin2; new_node->rlength = begin3 - begin; new_node->stat = MECAB_UNK_NODE; new_node->bnext = result_node; new_node->feature = lattice->feature_constraint(begin - lattice->sentence()); CHECK_DIE(new_node->feature); result_node = new_node; } } return result_node; }
N *Tokenizer<N, P>::lookup(const char *begin, const char *end, Allocator<N, P> *allocator) const { CharInfo cinfo; N *result_node = 0; size_t mblen = 0; size_t clen = 0; end = static_cast<size_t>(end - begin) >= 65535 ? begin + 65535 : end; const char *begin2 = property_.seekToOtherType(begin, end, space_, &cinfo, &mblen, &clen); Dictionary::result_type *daresults = allocator->mutable_results(); const size_t results_size = allocator->results_size(); for (std::vector<Dictionary *>::const_iterator it = dic_.begin(); it != dic_.end(); ++it) { const size_t n = (*it)->commonPrefixSearch( begin2, static_cast<size_t>(end - begin2), daresults, results_size); for (size_t i = 0; i < n; ++i) { size_t size = (*it)->token_size(daresults[i]); const Token *token = (*it)->token(daresults[i]); for (size_t j = 0; j < size; ++j) { N *new_node = allocator->newNode(); read_node_info(**it, *(token + j), &new_node); new_node->length = daresults[i].length; new_node->rlength = begin2 - begin + new_node->length; new_node->surface = begin2; new_node->stat = MECAB_NOR_NODE; new_node->char_type = cinfo.default_type; new_node->bnext = result_node; result_node = new_node; } } } if (result_node && !cinfo.invoke) { return result_node; } const char *begin3 = begin2 + mblen; const char *group_begin3 = 0; if (begin3 > end) { ADDUNKNWON; return result_node; } if (cinfo.group) { const char *tmp = begin3; CharInfo fail; begin3 = property_.seekToOtherType(begin3, end, cinfo, &fail, &mblen, &clen); if (clen <= max_grouping_size_) { ADDUNKNWON; } group_begin3 = begin3; begin3 = tmp; } for (size_t i = 1; i <= cinfo.length; ++i) { if (begin3 > end) { break; } if (begin3 == group_begin3) { continue; } clen = i; ADDUNKNWON; if (!cinfo.isKindOf(property_.getCharInfo(begin3, end, &mblen))) { break; } begin3 += mblen; } if (!result_node) { ADDUNKNWON; } return result_node; }