acseg_result_t * acseg_full_seg(acseg_index_t *acseg_index, acseg_str_t *text,int max_seek) { int j,current_pos,tmp_j; acseg_str_t atom,atom2,tmp_atom; acseg_result_t *seg_result; acseg_index_item_t *index_item, *s_index_item,* tmp_s_index_item; seg_result = acseg_result_init(); // int max_seek=5; int seeks=0; if (acseg_index->state != AC_INDEX_FIXED) { return seg_result; } current_pos=j = 0; index_item = acseg_index->root; while (j < text->len) { seeks=0; atom.data = &(text->data[j]); atom.len = get_mblen( ((u_char) atom.data[0]) ); tmp_atom = atom; tmp_s_index_item = s_index_item = find_child_index_item(index_item, &atom); while( tmp_s_index_item ==NULL && seeks<max_seek && current_pos <(text->len)){ atom2.data = &(text->data[current_pos+tmp_atom.len]); atom2.len = get_mblen( ((u_char) atom2.data[0]) ); print_atom(&atom2); tmp_s_index_item = find_child_index_item(index_item, &atom2); seeks++; if(tmp_s_index_item!=NULL){ current_pos = j = current_pos +tmp_atom.len; atom = atom2; s_index_item = tmp_s_index_item; break; } else{ current_pos = current_pos +tmp_atom.len; tmp_atom = atom2; } } while(s_index_item == NULL) { if (index_item == acseg_index->root) { s_index_item = index_item; break; } index_item = index_item->failure; s_index_item = find_child_index_item(index_item, &atom); } index_item = s_index_item; add_to_result(seg_result, index_item->output); add_to_result(seg_result, index_item->extra_outputs); current_pos = tmp_j = j = j + atom.len; } return seg_result; }
acseg_result_t * acseg_full_seg3(acseg_index_t *acseg_index, acseg_str_t *text) { int j; acseg_str_t atom; acseg_result_t *seg_result; acseg_index_item_t *index_item, *s_index_item; seg_result = acseg_result_init(); if (acseg_index->state != AC_INDEX_FIXED) { return seg_result; } j = 0; index_item = acseg_index->root; while (j < text->len) { atom.data = &(text->data[j]); atom.len = get_mblen( ((u_char) atom.data[0]) ); s_index_item = find_child_index_item(index_item, &atom); while(s_index_item == NULL) { if (index_item == acseg_index->root) { s_index_item = index_item; break; } index_item = index_item->failure; s_index_item = find_child_index_item(index_item, &atom); } index_item = s_index_item; add_to_result(seg_result, index_item->output); add_to_result(seg_result, index_item->extra_outputs); j = j + atom.len; } return seg_result; }
void acseg_index_fix(acseg_index_t *acseg_index) { mc_collector_t *local_mc; acseg_list_t *queue, *child_queue; acseg_rbtree_t *rbtree, *child_rbtree; acseg_index_item_t *index_item, *parent_failure; acseg_index_item_t *tmp_index_item, *child_item; local_mc = NULL; queue = acseg_list_init(&local_mc); child_queue = acseg_list_init(&local_mc); rbtree = acseg_index->root->childs_rbtree; add_all_item_to_queue(rbtree->root, rbtree->sentinel, queue, &local_mc); set_index_item_failure(queue, acseg_index->root); while ( (index_item = acseg_queue_pop(queue)) ){ child_rbtree = index_item->childs_rbtree; add_all_item_to_queue(child_rbtree->root, child_rbtree->sentinel, child_queue, &local_mc); child_item = acseg_queue_pop(child_queue); while (child_item) { acseg_queue_push(queue, child_item, &local_mc); parent_failure = index_item->failure; while (1) { tmp_index_item = find_child_index_item(parent_failure, &(child_item->atom)); if (tmp_index_item == NULL) { if (parent_failure == acseg_index->root) { tmp_index_item = parent_failure; break; } else { parent_failure = parent_failure->failure; } } else { break; } } child_item->failure = tmp_index_item; acseg_list_extend(child_item->extra_outputs, tmp_index_item->output, &(acseg_index->mc)); acseg_list_extend(child_item->extra_outputs, tmp_index_item->extra_outputs, &(acseg_index->mc)); child_item = acseg_queue_pop(child_queue); } } mc_destory(local_mc); acseg_index->state = AC_INDEX_FIXED; }