Ejemplo n.º 1
0
PUBLIC  P_NEW_BLOCK_ITEM eggAnalyzer_get_dictlist(char *analyzerName)
{
    if(!analyzerName)
    {
        return EGG_NULL;
    }
    
    P_NEW_BLOCK_ITEM pBlockItem = NULL;
    
    HEGGTOPCOLLECTOR hTopCollector = eggSySRecorder_get_dict("dict", analyzerName);


    HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
    count_t cnt =  eggTopCollector_total_hits(hTopCollector);
    HEGGINDEXREADER hIndexReader = eggSySRecorder_alloc_reader();
    while(cnt--)
    {
        HEGGDOCUMENT lp_eggDocument = EGG_NULL;
        eggIndexReader_get_document(hIndexReader, lp_score_doc[cnt].idDoc, &lp_eggDocument);
        char* pDict = EGG_NULL;
        char* pKey = EGG_NULL;
        
	HEGGFIELD hField1 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTNAME);
	HEGGFIELD hField2 = eggDocument_get_field(lp_eggDocument, EGG_SYS_DICTKEY);
	size32_t n_len1 = 0;
	size32_t n_len2 = 0;
	pDict = eggField_get_value(hField1, &n_len1);
	pKey = eggField_get_value(hField2, &n_len2);


        
        if(pDict && pKey)
        {
	  char *lp_dict_buf = strndup(pDict, n_len1);
	  char *lp_key_buf = strndup(pKey, n_len2);
//	  printf("[%s], [%s]\n", lp_dict_buf, lp_key_buf);
	  eggPrtLog_info("eggAnalyzer", "[%s], [%s]\n", lp_dict_buf, lp_key_buf);
	  pBlockItem = BlockItemPushWord(pBlockItem, lp_dict_buf, lp_key_buf, "NR", 1000000);
	  free(lp_dict_buf);
	  free(lp_key_buf);
            
        }
	//        free(pDict);
        //free(pKey);
        eggDocument_delete(lp_eggDocument);

    }

    eggTopCollector_delete(hTopCollector);
    eggSySRecorder_free_reader((void**)&hIndexReader);

    return pBlockItem;
}
Ejemplo n.º 2
0
EBOOL eggFieldWeight_rewrite_block(HEGGFIELDWEIGHT hFieldWeight, HEGGFIELD hField, did_t id, fweight_t* pBaseOff, HEGGWEIGHTBLOCK hWeightBlock)
{
    char* lp_field_name = eggField_get_name(hField);
    type_t type = eggField_get_type(hField);
    size32_t n_val_len = 0;
    char* lp_field_val = eggField_get_value(hField, &n_val_len);
    
    count_t n_max_cnt = ((id / WEIGHT_NODECOUNT_LIMIT) + 1) * WEIGHT_NODECOUNT_LIMIT;
    
    int n_buf_len = sizeof(EGGWEIGHTBLOCK) + sizeof(EGGWNODE) * (n_max_cnt);

    HEGGWEIGHTBLOCK lp_block_buf = (HEGGWEIGHTBLOCK)malloc(n_buf_len);
        
    memset(lp_block_buf, 0, n_buf_len);

    memcpy(lp_block_buf, hWeightBlock, sizeof(EGGWEIGHTBLOCK));
        
    ViewStream_read_nolock(hFieldWeight->hViewStream, (lp_block_buf) + 1,
                           sizeof(EGGWNODE) * hWeightBlock->aCnt, *pBaseOff + sizeof(EGGWEIGHTBLOCK));

    ViewStream_free_area(hFieldWeight->hViewStream, *pBaseOff,
                        sizeof(EGGWEIGHTBLOCK) + n_val_len * hWeightBlock->aCnt );
        
    lp_block_buf->aCnt =  n_max_cnt;
    hWeightBlock->aCnt =  n_max_cnt;
    *pBaseOff = ViewStream_write(hFieldWeight->hViewStream, lp_block_buf, n_buf_len);


    eggFieldView_set_fieldweight(hFieldWeight->hFieldView, lp_field_name, *pBaseOff);

    free(lp_block_buf);

    return EGG_TRUE;
    
}
Ejemplo n.º 3
0
int main()
{
    HEGGHANDLE hHandle = eggPath_open("file:///tmp/");
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle);
      
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    
    HEGGQUERY hq = eggQuery_new_string("content", "is good", strlen("is good"), ANALYZER_CWSLEX);
    if(hq != EGG_NULL)
    {
        printf("query init OK! \n");
    }
    //填0取所有结果,非0按填的值取个数
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    
    eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_WEIGHT);
    printf("eggTopCollector sortType is EGG_TOPSORT_WEIGHT \n");
    
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq);
    if (ret == EGG_TRUE)
    {
        //对最后结果进行排序
        //EGG_TOPSORT_WEIGHT:  按document的weight排序
        //EGG_TOPSORT_SCORE: 按查询关键字的相关度排序(打分排序)
        //EGG_TOPSORT_NOT:  不排序

 
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        index_t i = 0;
        printf("have hit %u documents\n", cnt);

        while (i != cnt)
        {
            HEGGDOCUMENT lp_eggDocument = EGG_NULL;
           
            eggIndexReader_get_document(hIndexReader,
                                        lp_score_doc[i].idDoc, &lp_eggDocument);
           
            HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content");
            unsigned len = 0;
            char *val = eggField_get_value(lp_field, &len);
            printf("id : [%llu], content : [%s], weight : [%d]\n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val, eggDocument_get_weight(lp_eggDocument));
            lp_field = 0;
            eggDocument_delete(lp_eggDocument);
            
            i++;
        }
    }
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(hq);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);
    eggPath_close(hHandle);

    return 0;
}
Ejemplo n.º 4
0
int main(int argc, char* argv[])
{
    HEGGHANDLE hHandle = eggPath_open("file:///egg/");
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hHandle);
      
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    
    HEGGQUERY hq = eggQuery_new_string("content", argv[1], strlen(argv[1]), "");
    if(hq != EGG_NULL)
    {
        printf("query init OK! \n");
    }
    
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    
    
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, hq);
    if (ret == EGG_TRUE)
    {

 
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        index_t i = 0;
        printf("have hit %u documents\n", cnt);

        while (i != cnt)
        {
            HEGGDOCUMENT lp_eggDocument = EGG_NULL;
           
            eggIndexReader_get_document(hIndexReader,
                                        lp_score_doc[i].idDoc, &lp_eggDocument);
           
            HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content");
            unsigned len = 0;
            char *val = eggField_get_value(lp_field, &len);
            printf("id : [%llu], content : [%s], \n", EGGDID_DOCID(&lp_score_doc[i].idDoc), val);
            lp_field = 0;
            eggDocument_delete(lp_eggDocument);
            
            i++;
        }
    }
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(hq);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);
    eggPath_close(hHandle);

    return 0;
}
Ejemplo n.º 5
0
   int main()
{
   //ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new();
   // ImLexAnalyzer* p_la = (ImLexAnalyzer*)ImCnLexAnalyzer_new();

   HEGGDIRECTORY hDirectory = eggDirectory_open("/ape/ImRoBot5/index/bbstest");
   HEGGINDEXREADER hIndexReader = eggIndexReader_open(hDirectory);
   HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
   HEGGQUERY h1, h2, h3;
   h1 = eggQuery_new_string("title", "人", 3, ANALYZER_CWSLEX);
   //h2 = eggQuery_new_string("content", "new", 3, p_la);
   // h2 = eggQuery_new_string("body", "some description", 16, p_la);
   // h3 = eggQuery_new_int32("price", 199);
   // h2 = eggQuery_and(h3, h2);
   //h1 = eggQuery_or(h2, h1);
   // h3 = h2 = 0;
   HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
   int ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1);
   if (ret == EGG_TRUE)
   {
        eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_SCORE);
        // eggTopCollector_normalized(hTopCollector, EGG_TOPSORT_NOT);
        HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
        count_t cnt =  eggTopCollector_total_hits(hTopCollector);
        printf("have hit %u documents\n", cnt);

        if (cnt > 0)
        {
             printf("last document: id[%llu]\n", lp_score_doc[cnt-1].idDoc);
             HEGGDOCUMENT lp_eggDocument = EGG_NULL;
             eggIndexReader_get_document(hIndexReader,
                                lp_score_doc[cnt-1].idDoc, &lp_eggDocument);
             HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument,"title");
             unsigned len = 0;
             char *val = eggField_get_value(lp_field, &len);
             printf("last document: body[%.*s]\n", len, val);
             lp_field = 0;
             eggDocument_delete(lp_eggDocument);
        }
   }

   eggTopCollector_delete(hTopCollector);
   eggQuery_delete(h1);
   eggIndexSearcher_delete(hIndexSearcher);
   eggIndexReader_close(hIndexReader);
   eggDirectory_close(hDirectory);
   ImLexAnalyzer_delete(p_la);
}
Ejemplo n.º 6
0
EBOOL eggFieldWeight_block_init(HEGGFIELDWEIGHT hFieldWeight, HEGGFIELD hField, did_t id)
{
    //printf("----------eggFieldWeight_block_init---------- \n");
    eggPrtLog_info("eggFieldWeight", "eggFieldWeight_block_init\n");
    char* lp_field_name = eggField_get_name(hField);
    type_t type = eggField_get_type(hField);
    size32_t n_val_len = 0;
    char* lp_field_val = eggField_get_value(hField, &n_val_len);
    count_t n_max_cnt = ((id / WEIGHT_NODECOUNT_LIMIT) + 1) * WEIGHT_NODECOUNT_LIMIT;
    
    int n_buf_len = sizeof(EGGWEIGHTBLOCK) + sizeof(EGGWNODE) * (n_max_cnt);
    HEGGWEIGHTBLOCK lp_block_buf = (HEGGWEIGHTBLOCK)malloc(n_buf_len);
    memset(lp_block_buf, 0, n_buf_len);
    
    lp_block_buf->eCnt = 1;
    lp_block_buf->maxId = id;
    lp_block_buf->aCnt = n_max_cnt;
    lp_block_buf->type = type;
    

    if(eggFieldView_find(hFieldWeight->hFieldView, lp_field_name, &lp_block_buf->fid) != EGG_TRUE)
    {
        free(lp_block_buf);

        return EGG_FALSE;
    }

    EGGWNODE st_wnode = {0};
    st_wnode.flag = WEIGHT_NODE_VALID;
    
    memcpy(st_wnode.val, lp_field_val, n_val_len);

    memcpy((char*)(lp_block_buf + 1) + ((int)id - 1) * sizeof(EGGWNODE), &st_wnode, sizeof(EGGWNODE));
    
    offset64_t n_base_off = ViewStream_write(hFieldWeight->hViewStream, lp_block_buf, n_buf_len);


    eggFieldView_set_fieldweight(hFieldWeight->hFieldView, lp_field_name, n_base_off);

    free(lp_block_buf);
    return EGG_TRUE;
    
}
Ejemplo n.º 7
0
void CeggItfTest::testExportDoc(char* dir_path)
{
    void *hEggHandle = eggPath_open(dir_path);
        
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle);
    offset64_t n_cursor = 0;
    HEGGDOCUMENT lp_eggDocument = EGG_NULL;

    while(lp_eggDocument = eggIndexReader_export_document(hIndexReader, &n_cursor))
    {
        HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "content");
        unsigned int len = 0;
        
        if(lp_field)
            printf("%s", eggField_get_value(lp_field, &len));

        eggDocument_delete(lp_eggDocument);
    }
    return ;
}
Ejemplo n.º 8
0
void CeggItfTest::testIndexSearch(char* dir_path)
{
    char key[1000] = {0};
    type_t op = EGG_TOPSORT_SCORE;
    HEGGHANDLE hEggHandle = eggPath_open(dir_path);
    
    
    HEGGINDEXREADER hIndexReader = eggIndexReader_open(hEggHandle);
    
    HEGGINDEXSEARCHER hIndexSearcher = eggIndexSearcher_new(hIndexReader);
    char fieldName[200] = "";
    HEGGQUERY h1;
    
    h1 = getQuery();
    
    char c;
    printf("key range search?(y/n)");
    scanf("%c", &c);
    if(c == 'y')
    {
        printf("FieldName: ");
        scanf("%s", fieldName);
        

        int startPrice = 0;
        int endPrice = 0;
        printf("start Price: ");
        scanf("%d", &startPrice);
        printf("end Price: ");
        scanf("%d", &endPrice);
        HEGGQUERY h2 = 0;
        op = EGG_TOPSORT_ORDERBY;
        h2 = eggQuery_new_int32range(fieldName, startPrice, endPrice);
        h1 = eggQuery_and(h1, h2);
        
    }
    HEGGTOPCOLLECTOR hTopCollector = eggTopCollector_new(0);
    switch (1) {
    case 1:
            
        eggTopCollector_set_orderby(hTopCollector, 2, "num1", 1,
                                    "num2", 1);
        break;
    case 2:
        eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_SCORE);
        break;
    default:
        eggTopCollector_set_sorttype(hTopCollector, EGG_TOPSORT_NOT);
        break;
    }
        
    
    struct timeval vstart, vend;
    gettimeofday(&vstart, 0);
    EBOOL ret = eggIndexSearcher_search_with_query(hIndexSearcher, hTopCollector, h1);
    gettimeofday(&vend, 0);
    printf("search_with_query time : %f\n", (double)(vend.tv_sec - vstart.tv_sec) + (double)(vend.tv_usec - vstart.tv_usec)/1000000);
    if(ret ==EGG_FALSE)
    {
        printf("no key !\n");
        exit(1);
    }

    // eggTopCollector_delete(hTopCollector);
    // eggQuery_delete(h1);
    // eggIndexSearcher_delete(hIndexSearcher);
    // eggIndexReader_close(hIndexReader);

    // eggPath_close(hEggHandle);    

    //     return ;
    if (0)
    {                           // deprecated
        HEGGQUERY hQuery_tmp = 0;
        //取时间范围
        hQuery_tmp = eggQuery_new_stringrange("time", "1", "2");
        //按时间排序
        eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 1);
        //按相关度排序
        //eggIndexSearcher_filter(hIndexSearcher, hTopCollector, hQuery_tmp, 0);
        eggQuery_delete(hQuery_tmp);
    }
    
    HEGGSCOREDOC lp_score_doc = eggTopCollector_top_docs(hTopCollector);
    count_t cnt =  eggTopCollector_total_hits(hTopCollector);
    index_t idx = 0;
    printf("count : %d\n", cnt);
//        return ;
    #if(0)
    HEGGDOCUMENT* ppeggDocument = EGG_NULL;
    eggIndexReader_get_documentSet(hIndexReader, lp_score_doc, cnt, &ppeggDocument);
    while(idx != cnt)

    {
        printf("--------------------------\n");
        HEGGFIELD lp_field = eggDocument_get_field(ppeggDocument[idx], "f_id");
        unsigned int len = 0;
        if(lp_field)
            printf("count %d id : %lld \nf_id : %s ", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field, &len));

        eggDocument_delete(ppeggDocument[idx]);
        idx++;
        
    }
    #endif
    
    #if(0)
    while(idx != cnt && idx < 10000)
    {
        HEGGDOCUMENT lp_eggDocument = EGG_NULL;
        printf("%lld ----\n", lp_score_doc[idx].idDoc);
       	eggIndexReader_get_document(hIndexReader, lp_score_doc[idx].idDoc, &lp_eggDocument);

        HEGGFIELD lp_field = eggDocument_get_field(lp_eggDocument, "f_id");
	//	HEGGFIELD lp_field2 = eggDocument_get_field(lp_eggDocument, "random");
	//HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "num1");
	//HEGGFIELD lp_field4 = eggDocument_get_field(lp_eggDocument, "num2");
        //      HEGGFIELD lp_field3 = eggDocument_get_field(lp_eggDocument, "spanfield2");        
        unsigned int len = 0;
        unsigned int len2 = 0;
        unsigned int len3 = 0;                
        if(lp_field)
            printf("count %d id : %lld  f_id: %s \n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)),  eggField_get_value(lp_field, &len) );

//        if(lp_field3)
//	  printf("count %d id : %lld content : %s weightfield: %d\n", idx, EGGDID_DOCID(&(lp_score_doc[idx].idDoc)), eggField_get_value(lp_field3, &len), eggField_get_value(lp_field3, &len3));
/*
        {
            char **pkeywords;
            size16_t *pkeySz;
//          int **ppos = NULL;
            count_t nums;
            
            eggTopCollector_get_keyPosition(hTopCollector,
                                            EGGDID_DOCID(&lp_score_doc[idx].idDoc),
                                            "content", &pkeywords, &pkeySz,
                                            NULL, &nums);
            int i;
            for (i = 0; i < nums; i++)
            {

                printf("Key[%.*s]\n", pkeySz[i], pkeywords[i]);
            }
            free(pkeySz);
            for (i = 0; i < nums; i++)
            {
                free(pkeywords[i]);
            }
            free(pkeywords);
        }
*/
//        lp_field = eggDocument_get_field(lp_eggDocument, "price");
        
//        printf("date : [%s] \n", eggField_get_value(lp_field, &len));

        eggDocument_delete(lp_eggDocument);
        idx++;
        // usleep(5000);
    }
    #endif
    eggTopCollector_delete(hTopCollector);
    eggQuery_delete(h1);
    eggIndexSearcher_delete(hIndexSearcher);
    eggIndexReader_close(hIndexReader);

    eggPath_close(hEggHandle);    
}
Ejemplo n.º 9
0
EBOOL eggFieldWeight_add(HEGGFIELDWEIGHT hFieldWeight, HEGGFIELD hField, did_t id)
{
    if (POINTER_IS_INVALID(hFieldWeight))
    {
        return EGG_FALSE;
    }
    if (id == 0)
    {
        return EGG_TRUE;
    }

    EGGWEIGHTBLOCK st_weight_block = {0};
    
    char* lp_field_name = eggField_get_name(hField);
    type_t type = eggField_get_type(hField);
    size32_t n_val_len = 0;
    char* lp_field_val = eggField_get_value(hField, &n_val_len);
    if(n_val_len > 8)
    {
        //printf("eggField value len over 8 byte!\n");
        eggPrtLog_error("eggFieldWeight", "eggField value len over 8 byte!\n");
        return EGG_FALSE;
    }
    //printf("add id : %llu [%d] \n", id, *(int*)lp_field_val);

    pthread_mutex_lock(&hFieldWeight->mutex);
    
    fweight_t n_base_off = eggFieldView_get_fieldweight(hFieldWeight->hFieldView, lp_field_name);
    
    if(!n_base_off)
    {
        //printf("----------eggFieldWeight_block_init---------- %llu , %s\n", n_base_off, lp_field_name);
        EBOOL ret = eggFieldWeight_block_init(hFieldWeight, hField, id);
        pthread_mutex_unlock(&hFieldWeight->mutex);
        return ret;
    }
    
    ViewStream_read_nolock(hFieldWeight->hViewStream, &st_weight_block, sizeof(st_weight_block), n_base_off);
    
    
    if(WEIGHTNODE_FULL(&st_weight_block, id))
    {
        // printf("eggFieldWeight_rewrite_block [%llu] [%llu]  \n", st_weight_block.aCnt, id);
        eggFieldWeight_rewrite_block(hFieldWeight, hField, id, &n_base_off, &st_weight_block);
    }
    
    offset64_t n_node_off = WEIGHTNODE_OFFSET(n_base_off, id - 1);

    
    EGGWNODE st_wnode = {0};
    ViewStream_read_nolock(hFieldWeight->hViewStream, &st_wnode, sizeof(st_wnode), n_node_off);
    if(st_wnode.flag == WEIGHT_NODE_INVALID)
    {
        st_weight_block.eCnt++;    
    }
    
    st_wnode.flag = WEIGHT_NODE_VALID;
    memcpy(st_wnode.val, lp_field_val, n_val_len);
    ViewStream_update_nolock(hFieldWeight->hViewStream, &st_wnode, sizeof(EGGWNODE), n_node_off);
    
    st_weight_block.maxId = st_weight_block.maxId < id ? id : st_weight_block.maxId;
    ViewStream_update_nolock(hFieldWeight->hViewStream, &st_weight_block, sizeof(EGGWEIGHTBLOCK), n_base_off);
    
    pthread_mutex_unlock(&hFieldWeight->mutex);
    
    return EGG_TRUE;
}