예제 #1
0
파일: eggsearch1.c 프로젝트: baifanmvp/egg3
int main()
{
    HEGGHANDLE hHandle = eggPath_open("file:///tmp/");
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle);
      
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    
    HEGGQUERY hq = eggQuery_new_string("content", "is good", strlen("is good"), ANALYZER_CWSLEX);
    if(hq != EGG_NULL)
    {
        printf("query init OK! \n");
    }
    //填0取所有结果,非0按填的值取个数
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    
    eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_WEIGHT);
    printf("eggTopCollector sortType is EGG_TOPSORT_WEIGHT \n");
    
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq);
    if (ret == EGG_TRUE)
    {
        //对最后结果进行排序
        //EGG_TOPSORT_WEIGHT:  按document的weight排序
        //EGG_TOPSORT_SCORE: 按查询关键字的相关度排序(打分排序)
        //EGG_TOPSORT_NOT:  不排序

 
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        index_t i = 0;
        printf("have hit %u documents\n", cnt);

        while (i != cnt)
        {
            HEGGDOCUMENT lp_eggDocument = EGG_NULL;
           
            eggIndexReader_get_document(hIndexReader,
                                        lp_score_doc[i].idDoc, &lp_eggDocument);
           
            HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content");
            unsigned len = 0;
            char *val = eggField_get_value(lp_field, &len);
            printf("id : [%llu], content : [%s], weight : [%d]\n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val, eggDocument_get_weight(lp_eggDocument));
            lp_field = 0;
            eggDocument_delete(lp_eggDocument);
            
            i++;
        }
    }
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(hq);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);
    eggPath_close(hHandle);

    return 0;
}
예제 #2
0
PUBLIC  P_NEW_BLOCK_ITEM eggAnalyzer_get_dictlist(char *analyzerName)
{
    if(!analyzerName)
    {
        return EGG_NULL;
    }
    
    P_NEW_BLOCK_ITEM pBlockItem = NULL;
    
    HEGGTOPCOLLECTOR hTopCollector = eggSySRecorder_get_dict("dict", analyzerName);


    HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
    count_t cnt =  eggTopCollector_total_hits(hTopCollector);
    HEGGINDEXREADER hIndexReader = eggSySRecorder_alloc_reader();
    while(cnt--)
    {
        HEGGDOCUMENT lp_eggDocument = EGG_NULL;
        eggIndexReader_get_document(hIndexReader, lp_score_doc[cnt].idDoc, &lp_eggDocument);
        char* pDict = EGG_NULL;
        char* pKey = EGG_NULL;
        
	HEGGFIELD hField1 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTNAME);
	HEGGFIELD hField2 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTKEY);
	size32_t n_len1 = 0;
	size32_t n_len2 = 0;
	pDict = eggField_get_value(hField1, &n_len1);
	pKey = eggField_get_value(hField2, &n_len2);


        
        if(pDict && pKey)
        {
	  char *lp_dict_buf = strndup(pDict, n_len1);
	  char *lp_key_buf = strndup(pKey, n_len2);
//	  printf("[%s], [%s]\n", lp_dict_buf, lp_key_buf);
	  eggPrtLog_info("eggAnalyzer", "[%s], [%s]\n", lp_dict_buf, lp_key_buf);
	  pBlockItem = BlockItemPushWord(pBlockItem, lp_dict_buf, lp_key_buf, "NR", 1000000);
	  free(lp_dict_buf);
	  free(lp_key_buf);
            
        }
	//        free(pDict);
        //free(pKey);
        eggDocument_delete(lp_eggDocument);

    }

    eggTopCollector_delete(hTopCollector);
    eggSySRecorder_free_reader((void**)&hIndexReader);

    return pBlockItem;
}
예제 #3
0
int main(int argc, char* argv[])
{
    HEGGHANDLE hHandle = eggPath_open("file:///egg/");
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle);
      
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    
    HEGGQUERY hq = eggQuery_new_string("content", argv[1], strlen(argv[1]), "");
    if(hq != EGG_NULL)
    {
        printf("query init OK! \n");
    }
    
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    
    
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq);
    if (ret == EGG_TRUE)
    {

 
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        index_t i = 0;
        printf("have hit %u documents\n", cnt);

        while (i != cnt)
        {
            HEGGDOCUMENT lp_eggDocument = EGG_NULL;
           
            eggIndexReader_get_document(hIndexReader,
                                        lp_score_doc[i].idDoc, &lp_eggDocument);
           
            HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content");
            unsigned len = 0;
            char *val = eggField_get_value(lp_field, &len);
            printf("id : [%llu], content : [%s], \n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val);
            lp_field = 0;
            eggDocument_delete(lp_eggDocument);
            
            i++;
        }
    }
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(hq);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);
    eggPath_close(hHandle);

    return 0;
}
예제 #4
0
   int main()
{
   //ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new();
   // ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new();

   HEGGDIRECTORY hDirectory = eggDirectory_open("/ape/ImRoBot5/index/bbstest");
   HEGGINDEXREADER hIndexReader = eggIndexReader_open(hDirectory);
   HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
   HEGGQUERY h1, h2, h3;
   h1 = eggQuery_new_string("title", "人", 3, ANALYZER_CWSLEX);
   //h2 = eggQuery_new_string("content", "new", 3, p_la);
   // h2 = eggQuery_new_string("body", "some description", 16, p_la);
   // h3 = eggQuery_new_int32("price", 199);
   // h2 = eggQuery_and(h3, h2);
   //h1 = eggQuery_or(h2, h1);
   // h3 = h2 = 0;
   HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
   int ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1);
   if (ret == EGG_TRUE)
   {
        eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_SCORE);
        // eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_NOT);
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        printf("have hit %u documents\n", cnt);

        if (cnt > 0)
        {
             printf("last document: id[%llu]\n", lp_score_doc[cnt-1].idDoc);
             HEGGDOCUMENT lp_eggDocument = EGG_NULL;
             eggIndexReader_get_document(hIndexReader,
                                lp_score_doc[cnt-1].idDoc, &lp_eggDocument);
             HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument,"title");
             unsigned len = 0;
             char *val = eggField_get_value(lp_field, &len);
             printf("last document: body[%.*s]\n", len, val);
             lp_field = 0;
             eggDocument_delete(lp_eggDocument);
        }
   }

   eggTopCollector_delete(hTopCollector);
   eggQuery_delete(h1);
   eggIndexSearcher_delete(hIndexSearcher);
   eggIndexReader_close(hIndexReader);
   eggDirectory_close(hDirectory);
   ImLexAnalyzer_delete(p_la);
}
예제 #5
0
void CeggItfTest::testIndexSearch(char* dir_path)
{
    char key[1000] = {0};
    type_t op = EGG_TOPSORT_SCORE;
    HEGGHANDLE hEggHandle = eggPath_open(dir_path);
    
    
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle);
    
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    char fieldName[200] = "";
    HEGGQUERY h1;
    
    h1 = getQuery();
    
    char c;
    printf("key range search?(y/n)");
    scanf("%c", &c);
    if(c == 'y')
    {
        printf("FieldName: ");
        scanf("%s", fieldName);
        

        int startPrice = 0;
        int endPrice = 0;
        printf("start Price: ");
        scanf("%d", &startPrice);
        printf("end Price: ");
        scanf("%d", &endPrice);
        HEGGQUERY h2 = 0;
        op = EGG_TOPSORT_ORDERBY;
        h2 = eggQuery_new_int32range(fieldName, startPrice, endPrice);
        h1 = eggQuery_and(h1, h2);
        
    }
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    switch (1) {
    case 1:
            
        eggTopCollector_set_orderby(hTopCollector, 2, "num1", 1,
                                    "num2", 1);
        break;
    case 2:
        eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_SCORE);
        break;
    default:
        eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_NOT);
        break;
    }
        
    
    struct timeval vstart, vend;
    gettimeofday(&vstart, 0);
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1);
    gettimeofday(&vend, 0);
    printf("search_with_query time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)(vend.tv_usec - vstart.tv_usec)/1000000);
    if(ret ==EGG_FALSE)
    {
        printf("no key !\n");
        exit(1);
    }

    // eggTopCollector_delete(hTopCollector);
    // eggQuery_delete(h1);
    // eggIndexSearcher_delete(hIndexSearcher);
    // eggIndexReader_close(hIndexReader);

    // eggPath_close(hEggHandle);    

    //     return ;
    if (0)
    {                           // deprecated
        HEGGQUERY hQuery_tmp = 0;
        //取时间范围
        hQuery_tmp = eggQuery_new_stringrange("time", "1", "2");
        //按时间排序
        eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 1);
        //按相关度排序
        //eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 0);
        eggQuery_delete(hQuery_tmp);
    }
    
    HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
    count_t cnt =  eggTopCollector_total_hits(hTopCollector);
    index_t idx = 0;
    printf("count : %d\n", cnt);
//        return ;
    #if(0)
    HEGGDOCUMENT* ppeggDocument = EGG_NULL;
    eggIndexReader_get_documentSet(hIndexReader, lp_score_doc, cnt, &ppeggDocument);
    while(idx != cnt)

    {
        printf("--------------------------\n");
        HEGGFIELD lp_field = eggDocument_get_field(ppeggDocument[idx], "f_id");
        unsigned int len = 0;
        if(lp_field)
            printf("count %d id : %lld \nf_id : %s ", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len));

        eggDocument_delete(ppeggDocument[idx]);
        idx++;
        
    }
    #endif
    
    #if(0)
    while(idx != cnt && idx < 10000)
    {
        HEGGDOCUMENT lp_eggDocument = EGG_NULL;
        printf("%lld ----\n", lp_score_doc[idx].idDoc);
       	eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument);

        HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "f_id");
	//	HEGGFIELD lp_field2 = eggDocument_get_field(lp_eggDocument, "random");
	//HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "num1");
	//HEGGFIELD lp_field4 = eggDocument_get_field(lp_eggDocument, "num2");
        //      HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "spanfield2");        
        unsigned int len = 0;
        unsigned int len2 = 0;
        unsigned int len3 = 0;                
        if(lp_field)
            printf("count %d id : %lld  f_id: %s \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)),  eggField_get_value(lp_field, &len) );

//        if(lp_field3)
//	  printf("count %d id : %lld content : %s weightfield: %d\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field3, &len), eggField_get_value(lp_field3, &len3));
/*
        {
            char **pkeywords;
            size16_t *pkeySz;
//          int **ppos = NULL;
            count_t nums;
            
            eggTopCollector_get_keyPosition(hTopCollector,
                                            EGGDID_DOCID(&lp_score_doc[idx].idDoc),
                                            "content", &pkeywords, &pkeySz,
                                            NULL, &nums);
            int i;
            for (i = 0; i < nums; i++)
            {

                printf("Key[%.*s]\n", pkeySz[i], pkeywords[i]);
            }
            free(pkeySz);
            for (i = 0; i < nums; i++)
            {
                free(pkeywords[i]);
            }
            free(pkeywords);
        }
*/
//        lp_field = eggDocument_get_field(lp_eggDocument, "price");
        
//        printf("date : [%s] \n", eggField_get_value(lp_field, &len));

        eggDocument_delete(lp_eggDocument);
        idx++;
        // usleep(5000);
    }
    #endif
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(h1);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);

    eggPath_close(hEggHandle);    
}