HEGGRWSBAKERINFO eggRWSBakerInfo_new(char* path, long timestamp) { if(POINTER_IS_INVALID(path)) { return EGG_NULL; } HEGGRWSBAKERINFO lp_info = (HEGGRWSBAKERINFO)malloc(sizeof(EGGRWSBAKERINFO)); lp_info->status = 0; lp_info->thrId = 0; lp_info->timestamp = timestamp; EGG_BAKER_SET_IDLE(lp_info->status); EGG_BAKER_SET_READING(lp_info->status); lp_info->path = strdup(path); pthread_mutex_init( &lp_info->mutex, NULL); lp_info->reqCnt = 0; lp_info->baker = (HEGGRWSBAKER)malloc(sizeof(EGGRWSBAKER)); lp_info->timestamp = timestamp; lp_info->baker->next = EGG_NULL; HEGGHANDLE hEggHandle = eggPath_open(path); lp_info->baker->hWriter = eggIndexWriter_open(hEggHandle, "weightfield"); lp_info->baker->hReader = eggIndexReader_open(hEggHandle); lp_info->baker->hSearcher = eggIndexSearcher_new(lp_info->baker->hReader); eggPath_close(hEggHandle); return lp_info; }
PUBLIC EBOOL EGGAPI eggSySRecorder_init() { if(!POINTER_IS_INVALID(g_sysRecorder_handle)) { exit(-1); } g_sysRecorder_handle = (HEGGSYSRECORDER)malloc(sizeof(EGGSYSRECORDER)); memset(g_sysRecorder_handle, 0, sizeof(EGGSYSRECORDER)); pthread_mutex_init( &g_sysRecorder_handle->mutex, NULL); if(access(EGG_SYSRECORD_PATH, F_OK) != 0) { exit(-1); } g_sysRecorder_handle->path = (char*)malloc(1024); g_sysRecorder_handle->path[0] = 0; strcat(g_sysRecorder_handle->path, "/%%%"); strcat(g_sysRecorder_handle->path, EGG_SYSRECORD_PATH); HEGGHANDLE hHandle = eggDirectory_open(g_sysRecorder_handle->path); g_sysRecorder_handle->hIndexReader = eggIndexReader_open(hHandle); g_sysRecorder_handle->hIndexSearcher = eggIndexSearcher_new(g_sysRecorder_handle->hIndexReader); g_sysRecorder_handle->hIndexWriter = eggIndexWriter_open(hHandle, ""); eggPath_close(hHandle); return EGG_TRUE; }
int main() { HEGGHANDLE hHandle = eggPath_open("file:///tmp/"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY hq = eggQuery_new_string("content", "is good", strlen("is good"), ANALYZER_CWSLEX); if(hq != EGG_NULL) { printf("query init OK! \n"); } //填0取所有结果,非0按填的值取个数 HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_WEIGHT); printf("eggTopCollector sortType is EGG_TOPSORT_WEIGHT \n"); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq); if (ret == EGG_TRUE) { //对最后结果进行排序 //EGG_TOPSORT_WEIGHT: 按document的weight排序 //EGG_TOPSORT_SCORE: 按查询关键字的相关度排序(打分排序) //EGG_TOPSORT_NOT: 不排序 HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t i = 0; printf("have hit %u documents\n", cnt); while (i != cnt) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[i].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("id : [%llu], content : [%s], weight : [%d]\n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val, eggDocument_get_weight(lp_eggDocument)); lp_field = 0; eggDocument_delete(lp_eggDocument); i++; } } eggTopCollector_delete(hTopCollector); eggQuery_delete(hq); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hHandle); return 0; }
int main(int argc, char* argv[]) { HEGGHANDLE hHandle = eggPath_open("file:///egg/"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY hq = eggQuery_new_string("content", argv[1], strlen(argv[1]), ""); if(hq != EGG_NULL) { printf("query init OK! \n"); } HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq); if (ret == EGG_TRUE) { HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t i = 0; printf("have hit %u documents\n", cnt); while (i != cnt) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[i].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("id : [%llu], content : [%s], \n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val); lp_field = 0; eggDocument_delete(lp_eggDocument); i++; } } eggTopCollector_delete(hTopCollector); eggQuery_delete(hq); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hHandle); return 0; }
int main() { //ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new(); // ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new(); HEGGDIRECTORY hDirectory = eggDirectory_open("/ape/ImRoBot5/index/bbstest"); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hDirectory); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); HEGGQUERY h1, h2, h3; h1 = eggQuery_new_string("title", "人", 3, ANALYZER_CWSLEX); //h2 = eggQuery_new_string("content", "new", 3, p_la); // h2 = eggQuery_new_string("body", "some description", 16, p_la); // h3 = eggQuery_new_int32("price", 199); // h2 = eggQuery_and(h3, h2); //h1 = eggQuery_or(h2, h1); // h3 = h2 = 0; HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); int ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1); if (ret == EGG_TRUE) { eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_SCORE); // eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_NOT); HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); printf("have hit %u documents\n", cnt); if (cnt > 0) { printf("last document: id[%llu]\n", lp_score_doc[cnt-1].idDoc); HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[cnt-1].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument,"title"); unsigned len = 0; char *val = eggField_get_value(lp_field, &len); printf("last document: body[%.*s]\n", len, val); lp_field = 0; eggDocument_delete(lp_eggDocument); } } eggTopCollector_delete(hTopCollector); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggDirectory_close(hDirectory); ImLexAnalyzer_delete(p_la); }
void CeggItfTest::testExportDoc(char* dir_path) { void *hEggHandle = eggPath_open(dir_path); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle); offset64_t n_cursor = 0; HEGGDOCUMENT lp_eggDocument = EGG_NULL; while(lp_eggDocument = eggIndexReader_export_document(hIndexReader, &n_cursor)) { HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned int len = 0; if(lp_field) printf("%s", eggField_get_value(lp_field, &len)); eggDocument_delete(lp_eggDocument); } return ; }
void CeggItfTest::testIndexSearch(char* dir_path) { char key[1000] = {0}; type_t op = EGG_TOPSORT_SCORE; HEGGHANDLE hEggHandle = eggPath_open(dir_path); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); char fieldName[200] = ""; HEGGQUERY h1; h1 = getQuery(); char c; printf("key range search?(y/n)"); scanf("%c", &c); if(c == 'y') { printf("FieldName: "); scanf("%s", fieldName); int startPrice = 0; int endPrice = 0; printf("start Price: "); scanf("%d", &startPrice); printf("end Price: "); scanf("%d", &endPrice); HEGGQUERY h2 = 0; op = EGG_TOPSORT_ORDERBY; h2 = eggQuery_new_int32range(fieldName, startPrice, endPrice); h1 = eggQuery_and(h1, h2); } HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); switch (1) { case 1: eggTopCollector_set_orderby(hTopCollector, 2, "num1", 1, "num2", 1); break; case 2: eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_SCORE); break; default: eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_NOT); break; } struct timeval vstart, vend; gettimeofday(&vstart, 0); EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1); gettimeofday(&vend, 0); printf("search_with_query time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)(vend.tv_usec - vstart.tv_usec)/1000000); if(ret ==EGG_FALSE) { printf("no key !\n"); exit(1); } // eggTopCollector_delete(hTopCollector); // eggQuery_delete(h1); // eggIndexSearcher_delete(hIndexSearcher); // eggIndexReader_close(hIndexReader); // eggPath_close(hEggHandle); // return ; if (0) { // deprecated HEGGQUERY hQuery_tmp = 0; //取时间范围 hQuery_tmp = eggQuery_new_stringrange("time", "1", "2"); //按时间排序 eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 1); //按相关度排序 //eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 0); eggQuery_delete(hQuery_tmp); } HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t idx = 0; printf("count : %d\n", cnt); // return ; #if(0) HEGGDOCUMENT* ppeggDocument = EGG_NULL; eggIndexReader_get_documentSet(hIndexReader, lp_score_doc, cnt, &ppeggDocument); while(idx != cnt) { printf("--------------------------\n"); HEGGFIELD lp_field = eggDocument_get_field(ppeggDocument[idx], "f_id"); unsigned int len = 0; if(lp_field) printf("count %d id : %lld \nf_id : %s ", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len)); eggDocument_delete(ppeggDocument[idx]); idx++; } #endif #if(0) while(idx != cnt && idx < 10000) { HEGGDOCUMENT lp_eggDocument = EGG_NULL; printf("%lld ----\n", lp_score_doc[idx].idDoc); eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "f_id"); // HEGGFIELD lp_field2 = eggDocument_get_field(lp_eggDocument, "random"); //HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "num1"); //HEGGFIELD lp_field4 = eggDocument_get_field(lp_eggDocument, "num2"); // HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "spanfield2"); unsigned int len = 0; unsigned int len2 = 0; unsigned int len3 = 0; if(lp_field) printf("count %d id : %lld f_id: %s \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len) ); // if(lp_field3) // printf("count %d id : %lld content : %s weightfield: %d\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field3, &len), eggField_get_value(lp_field3, &len3)); /* { char **pkeywords; size16_t *pkeySz; // int **ppos = NULL; count_t nums; eggTopCollector_get_keyPosition(hTopCollector, EGGDID_DOCID(&lp_score_doc[idx].idDoc), "content", &pkeywords, &pkeySz, NULL, &nums); int i; for (i = 0; i < nums; i++) { printf("Key[%.*s]\n", pkeySz[i], pkeywords[i]); } free(pkeySz); for (i = 0; i < nums; i++) { free(pkeywords[i]); } free(pkeywords); } */ // lp_field = eggDocument_get_field(lp_eggDocument, "price"); // printf("date : [%s] \n", eggField_get_value(lp_field, &len)); eggDocument_delete(lp_eggDocument); idx++; // usleep(5000); } #endif eggTopCollector_delete(hTopCollector); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hEggHandle); }
void CeggItfTest::testIndexSearchIter(char* dir_path) { char key[1000] = {0}; type_t op = EGG_TOPSORT_SCORE; HEGGHANDLE hEggHandle = eggPath_open(dir_path); HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle); HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader); char fieldName[200] = ""; HEGGQUERY h1, h2; h1 = getQuery(); HEGGSEARCHITER lp_iter = eggIndexSearcher_get_queryiter(hIndexSearcher); count_t pagenum = 0; type_t op1; printf("result sort : 1. not sort, 2. score sort, 3. weight sort"); scanf("%d", &pagenum); if(pagenum == 2) { op1 = EGG_TOPSORT_SCORE; } else if(pagenum == 3) { op1 = EGG_TOPSORT_WEIGHT; } else { op1 = EGG_TOPSORT_NOT; } printf("set pagenum : "); scanf("%d", &pagenum); eggSearchIter_reset(lp_iter, pagenum); EBOOL ret = 0; struct timeval vstart, vend; while(!EGGITER_OVERFIRST(lp_iter) && !EGGITER_OVERLAST(lp_iter)) { HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0); eggTopCollector_set_sorttype(hTopCollector, op1); gettimeofday(&vstart, 0); ret = eggIndexSearcher_search_with_queryiter(hIndexSearcher, hTopCollector, h1, lp_iter); gettimeofday(&vend, 0); printf("iterSearch time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)((vend.tv_usec - vstart.tv_usec))/1000000); if(ret ==EGG_FALSE) { printf("no key !\n"); exit(1); } HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector); count_t cnt = eggTopCollector_total_hits(hTopCollector); index_t idx = 0; printf("count : %d\n", cnt); while(idx != cnt) { printf("count %d id : %lld \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)) ); /* HEGGDOCUMENT lp_eggDocument = EGG_NULL; eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument); HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content"); unsigned int len = 0; if(lp_field) printf("count %d id : %lld content : %s\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len)); eggDocument_delete(lp_eggDocument); */ idx++; } eggTopCollector_delete(hTopCollector); char c; printf("is jump result ? (y/n) "); getchar(); scanf("%c", &c); if(c == 'y') { int jumpcnt = 0; printf("jump cnt : "); scanf("%d", &jumpcnt); eggSearchIter_iter(lp_iter, jumpcnt); } } eggSearchIter_delete(lp_iter); eggQuery_delete(h1); eggIndexSearcher_delete(hIndexSearcher); eggIndexReader_close(hIndexReader); eggPath_close(hEggHandle); }
int main(int argc, char* argv[]) { if(argc != 2) { printf("argc error!\n"); exit(-1); } if(access(argv[1], F_OK)) { printf("path is error!\n"); exit(-1); } char fddPath[1024]; char iddPath[1024]; char idxBakPath[1024]; char idxPath[1024]; HEGGDIRECTORY lp_dir = eggDirectory_open(argv[1]); HEGGINDEXREADER hEggIndexReader = eggIndexReader_open(lp_dir); sprintf(fddPath, "%s/egg.fdd", argv[1]); sprintf(iddPath, "%s/egg.idd", argv[1]); sprintf(idxBakPath, "%s/eggbak.idx", argv[1]); sprintf(idxPath, "%s/egg.idx", argv[1]); HEGGFILE lp_egg_field = EggFile_open(fddPath); HEGGFILE lp_egg_id = EggFile_open(iddPath); HEGGFILE lp_egg_idx_bak = EggFile_open(idxBakPath); HEGGFILE lp_egg_idx = EggFile_open(idxPath); HEGGFIELDVIEW lp_field_view = eggFieldView_new(lp_egg_field); HEGGINDEXVIEW lp_index_view = eggIndexView_new(lp_egg_idx, NULL); HEGGIDVIEW lp_id_view = eggIdView_new(lp_egg_id); HEGGINDEXVIEW lp_index_bak_view = eggIndexView_new(lp_egg_idx_bak, NULL); fdid_t fdid = 0; struct eggIndexInfo *lp_index_info; int n_field = 0; int n_key_cnt = 0; while ((lp_index_info = eggFieldView_iter(lp_field_view, &fdid))) { eggIndexView_load_info(lp_index_view, lp_index_info, lp_field_view); struct eggIndexInfo *lp_index_info_bak = (struct eggIndexInfo*)malloc(sizeof(struct eggIndexInfo)); memcpy(lp_index_info_bak, lp_index_info, sizeof(struct eggIndexInfo)); lp_index_info_bak->rootOff = 0; lp_index_info_bak->leafOff = 0; eggIndexView_load_info(lp_index_bak_view, lp_index_info_bak, EGG_NULL); int cnt = 0; offset64_t n_iter_off = EGGINDEXVIEW_LEAFOFF(lp_index_view); while (n_iter_off) { HEGGINDEXNODEVIEW lp_node_view = eggIndexView_load_node(lp_index_view, n_iter_off); index_t n_index_iter = 0; while(n_index_iter != EGGINDEXNODEVIEW_RDCNT(lp_node_view)) { HEGGINDEXRECORD pRecord = EGGINDEXNODEVIEW_RECORD_INDEX(lp_node_view, n_index_iter); printf("KEY[%.*s] KEY CNT : %d\n", EGGINDEXRECORD_KSIZE(pRecord), EGGINDEXRECORD_KEY(pRecord), n_key_cnt++); eggIndexView_insert(lp_index_bak_view, EGGINDEXRECORD_KEY(pRecord), EGGINDEXRECORD_KSIZE(pRecord), EGGINDEXRECORD_VAL(pRecord), EGGINDEXRECORD_VSIZE(pRecord) ); cnt++; n_index_iter++; } size16_t n_node_size = sizeof(EGGINDEXNODE) + lp_index_view->hInfo->rdSize * (lp_index_view->hInfo->rdCnt + 1); n_iter_off = eggIndexNodeView_get_nextoff(lp_node_view); eggIndexNodeView_delete(lp_node_view); } eggFieldView_release_indexinfo(lp_field_view, lp_index_info_bak->fid, lp_index_info_bak); // free(lp_index_info); printf("field %d[%d]\n", n_field, cnt); printf("------------------------------------\n"); n_field++; } return 0; }