int main() { HEGGHANDLE hHandle = eggPath_open("file:///tmp/"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY hq = eggQuery_new_string("content", "is good", strlen("is good"), ANALYZER_CWSLEX); if(hq != EGG_NULL) { printf("query init OK! \n"); } //填0取所有结果,非0按填的值取个数 HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_WEIGHT); printf("eggTopCollector sortType is EGG_TOPSORT_WEIGHT \n"); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq); if (ret == EGG_TRUE) { //对最后结果进行排序 //EGG_TOPSORT_WEIGHT: 按document的weight排序 //EGG_TOPSORT_SCORE: 按查询关键字的相关度排序(打分排序) //EGG_TOPSORT_NOT: 不排序 HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t i = 0; printf("have hit %u documents\n", cnt); while (i != cnt) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[i].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("id : [%llu], content : [%s], weight : [%d]\n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val, eggDocument_get_weight(lp_eggDocument)); lp_field = 0; eggDocument_delete(lp_eggDocument); i++; } } eggTopCollector_delete(hTopCollector); eggQuery_delete(hq); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hHandle); return 0; }
PUBLIC P_NEW_BLOCK_ITEM eggAnalyzer_get_dictlist(char *analyzerName) { if(!analyzerName) { return EGG_NULL; } P_NEW_BLOCK_ITEM pBlockItem = NULL; HEGGTOPCOLLECTOR hTopCollector = eggSySRecorder_get_dict("dict", analyzerName); HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); HEGGINDEXREADER hIndexReader = eggSySRecorder_alloc_reader(); while(cnt--) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[cnt].idDoc, &lp_eggDocument); char* pDict = EGG_NULL; char* pKey = EGG_NULL; HEGGFIELD hField1 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTNAME); HEGGFIELD hField2 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTKEY); size32_t n_len1 = 0; size32_t n_len2 = 0; pDict = eggField_get_value(hField1, &n_len1); pKey = eggField_get_value(hField2, &n_len2); if(pDict && pKey) { char *lp_dict_buf = strndup(pDict, n_len1); char *lp_key_buf = strndup(pKey, n_len2); // printf("[%s], [%s]\n", lp_dict_buf, lp_key_buf); eggPrtLog_info("eggAnalyzer", "[%s], [%s]\n", lp_dict_buf, lp_key_buf); pBlockItem = BlockItemPushWord(pBlockItem, lp_dict_buf, lp_key_buf, "NR", 1000000); free(lp_dict_buf); free(lp_key_buf); } // free(pDict); //free(pKey); eggDocument_delete(lp_eggDocument); } eggTopCollector_delete(hTopCollector); eggSySRecorder_free_reader((void**)&hIndexReader); return pBlockItem; }
int main(int argc, char* argv[]) { HEGGHANDLE hHandle = eggPath_open("file:///egg/"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY hq = eggQuery_new_string("content", argv[1], strlen(argv[1]), ""); if(hq != EGG_NULL) { printf("query init OK! \n"); } HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq); if (ret == EGG_TRUE) { HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t i = 0; printf("have hit %u documents\n", cnt); while (i != cnt) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[i].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("id : [%llu], content : [%s], \n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val); lp_field = 0; eggDocument_delete(lp_eggDocument); i++; } } eggTopCollector_delete(hTopCollector); eggQuery_delete(hq); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hHandle); return 0; }
int main() { //ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new(); // ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new(); HEGGDIRECTORY hDirectory = eggDirectory_open("/ape/ImRoBot5/index/bbstest"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hDirectory); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY h1, h2, h3; h1 = eggQuery_new_string("title", "人", 3, ANALYZER_CWSLEX); //h2 = eggQuery_new_string("content", "new", 3, p_la); // h2 = eggQuery_new_string("body", "some description", 16, p_la); // h3 = eggQuery_new_int32("price", 199); // h2 = eggQuery_and(h3, h2); //h1 = eggQuery_or(h2, h1); // h3 = h2 = 0; HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); int ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1); if (ret == EGG_TRUE) { eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_SCORE); // eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_NOT); HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); printf("have hit %u documents\n", cnt); if (cnt > 0) { printf("last document: id[%llu]\n", lp_score_doc[cnt-1].idDoc); HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[cnt-1].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument,"title"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("last document: body[%.*s]\n", len, val); lp_field = 0; eggDocument_delete(lp_eggDocument); } } eggTopCollector_delete(hTopCollector); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggDirectory_close(hDirectory); ImLexAnalyzer_delete(p_la); }
void CeggItfTest::testIndexSearch(char* dir_path) { char key[1000] = {0}; type_t op = EGG_TOPSORT_SCORE; HEGGHANDLE hEggHandle = eggPath_open(dir_path); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); char fieldName[200] = ""; HEGGQUERY h1; h1 = getQuery(); char c; printf("key range search?(y/n)"); scanf("%c", &c); if(c == 'y') { printf("FieldName: "); scanf("%s", fieldName); int startPrice = 0; int endPrice = 0; printf("start Price: "); scanf("%d", &startPrice); printf("end Price: "); scanf("%d", &endPrice); HEGGQUERY h2 = 0; op = EGG_TOPSORT_ORDERBY; h2 = eggQuery_new_int32range(fieldName, startPrice, endPrice); h1 = eggQuery_and(h1, h2); } HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); switch (1) { case 1: eggTopCollector_set_orderby(hTopCollector, 2, "num1", 1, "num2", 1); break; case 2: eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_SCORE); break; default: eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_NOT); break; } struct timeval vstart, vend; gettimeofday(&vstart, 0); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1); gettimeofday(&vend, 0); printf("search_with_query time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)(vend.tv_usec - vstart.tv_usec)/1000000); if(ret ==EGG_FALSE) { printf("no key !\n"); exit(1); } // eggTopCollector_delete(hTopCollector); // eggQuery_delete(h1); // eggIndexSearcher_delete(hIndexSearcher); // eggIndexReader_close(hIndexReader); // eggPath_close(hEggHandle); // return ; if (0) { // deprecated HEGGQUERY hQuery_tmp = 0; //取时间范围 hQuery_tmp = eggQuery_new_stringrange("time", "1", "2"); //按时间排序 eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 1); //按相关度排序 //eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 0); eggQuery_delete(hQuery_tmp); } HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t idx = 0; printf("count : %d\n", cnt); // return ; #if(0) HEGGDOCUMENT* ppeggDocument = EGG_NULL; eggIndexReader_get_documentSet(hIndexReader, lp_score_doc, cnt, &ppeggDocument); while(idx != cnt) { printf("--------------------------\n"); HEGGFIELD lp_field = eggDocument_get_field(ppeggDocument[idx], "f_id"); unsigned int len = 0; if(lp_field) printf("count %d id : %lld \nf_id : %s ", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len)); eggDocument_delete(ppeggDocument[idx]); idx++; } #endif #if(0) while(idx != cnt && idx < 10000) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; printf("%lld ----\n", lp_score_doc[idx].idDoc); eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "f_id"); // HEGGFIELD lp_field2 = eggDocument_get_field(lp_eggDocument, "random"); //HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "num1"); //HEGGFIELD lp_field4 = eggDocument_get_field(lp_eggDocument, "num2"); // HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "spanfield2"); unsigned int len = 0; unsigned int len2 = 0; unsigned int len3 = 0; if(lp_field) printf("count %d id : %lld f_id: %s \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len) ); // if(lp_field3) // printf("count %d id : %lld content : %s weightfield: %d\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field3, &len), eggField_get_value(lp_field3, &len3)); /* { char **pkeywords; size16_t *pkeySz; // int **ppos = NULL; count_t nums; eggTopCollector_get_keyPosition(hTopCollector, EGGDID_DOCID(&lp_score_doc[idx].idDoc), "content", &pkeywords, &pkeySz, NULL, &nums); int i; for (i = 0; i < nums; i++) { printf("Key[%.*s]\n", pkeySz[i], pkeywords[i]); } free(pkeySz); for (i = 0; i < nums; i++) { free(pkeywords[i]); } free(pkeywords); } */ // lp_field = eggDocument_get_field(lp_eggDocument, "price"); // printf("date : [%s] \n", eggField_get_value(lp_field, &len)); eggDocument_delete(lp_eggDocument); idx++; // usleep(5000); } #endif eggTopCollector_delete(hTopCollector); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hEggHandle); }
void CeggItfTest::testIndexSearchIter(char* dir_path) { char key[1000] = {0}; type_t op = EGG_TOPSORT_SCORE; HEGGHANDLE hEggHandle = eggPath_open(dir_path); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); char fieldName[200] = ""; HEGGQUERY h1, h2; h1 = getQuery(); HEGGSEARCHITER lp_iter = eggIndexSearcher_get_queryiter(hIndexSearcher); count_t pagenum = 0; type_t op1; printf("result sort : 1. not sort, 2. score sort, 3. weight sort"); scanf("%d", &pagenum); if(pagenum == 2) { op1 = EGG_TOPSORT_SCORE; } else if(pagenum == 3) { op1 = EGG_TOPSORT_WEIGHT; } else { op1 = EGG_TOPSORT_NOT; } printf("set pagenum : "); scanf("%d", &pagenum); eggSearchIter_reset(lp_iter, pagenum); EBOOL ret = 0; struct timeval vstart, vend; while(!EGGITER_OVERFIRST(lp_iter) && !EGGITER_OVERLAST(lp_iter)) { HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); eggTopCollector_set_sorttype(hTopCollector, op1); gettimeofday(&vstart, 0); ret = eggIndexSearcher_search_with_queryiter(hIndexSearcher, hTopCollector, h1, lp_iter); gettimeofday(&vend, 0); printf("iterSearch time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)((vend.tv_usec - vstart.tv_usec))/1000000); if(ret ==EGG_FALSE) { printf("no key !\n"); exit(1); } HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t idx = 0; printf("count : %d\n", cnt); while(idx != cnt) { printf("count %d id : %lld \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)) ); /* HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned int len = 0; if(lp_field) printf("count %d id : %lld content : %s\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len)); eggDocument_delete(lp_eggDocument); */ idx++; } eggTopCollector_delete(hTopCollector); char c; printf("is jump result ? (y/n) "); getchar(); scanf("%c", &c); if(c == 'y') { int jumpcnt = 0; printf("jump cnt : "); scanf("%d", &jumpcnt); eggSearchIter_iter(lp_iter, jumpcnt); } } eggSearchIter_delete(lp_iter); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hEggHandle); }