Пример #1
0
void test_tow_hashvalue(uint32_t limit)
{
   MEM_POOL* mem_pool = mem_pool_init(MB_SIZE);
   struct hash_index_config config;
   config.row_limit = limit;
   strcpy(config.work_space,"/tmp/hash_compress_test");
   system("rm -rf /tmp/hash_compress_test");
   mkdirs(config.work_space);
   init_profile(1000,mem_pool);
   
   struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool);


   uint32_t i;
   struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool,sizeof(struct low_data_struct));
   get_low_data(data,mem_pool);
   
   for(i=0; i<config.row_limit/2;i++)
   {   
   	   strcpy((char*)data->data,"1");
	   hash_index_insert(hash_index,data,i);
   }

   for(i=config.row_limit/2; i<config.row_limit;i++)
   {   
   	   strcpy((char*)data->data,"2");
	   hash_index_insert(hash_index,data,i);
   }

   	struct hash_compress_manager* hash_compress = hash_compress_load(hash_index,10,mem_pool);
	
   	struct rowid_list* rlist_a;
	struct rowid_list* rlist_b;

    get_low_data(data,mem_pool);

	strcpy((char*)data->data,"1");
	rlist_a = hash_index_query(hash_index,data,mem_pool);
	rlist_b = hash_compress_query(hash_compress,data,mem_pool);

	ASSERT_EQ(rowid_list_equals(rlist_a,rlist_b),1);

	strcpy((char*)data->data,"2");
	rlist_a = hash_index_query(hash_index,data,mem_pool);
	rlist_b = hash_compress_query(hash_compress,data,mem_pool);

	ASSERT_EQ(rowid_list_equals(rlist_a,rlist_b),1);

    hash_index_release(hash_index);
    hash_compress_release(hash_compress);
}
Пример #2
0
void test_multi_hashvalue(uint32_t limit)
{
   MEM_POOL* mem_pool = mem_pool_init(MB_SIZE);
   int32_t ret;
   struct hash_index_config config;
   config.row_limit = limit;
   strcpy(config.work_space,"/tmp/hash_compress_test");
   system("rm -rf /tmp/hash_compress_test");
   mkdirs(config.work_space);
   init_profile(1000,mem_pool);
   
   struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool);


   uint32_t i;
   struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool,sizeof(struct low_data_struct));
   get_low_data(data,mem_pool);
   
   for(i=0; i<config.row_limit;i++)
   {   sprintf((char*)data->data,"%d",i);
	   ret = hash_index_insert(hash_index,data,i);
	   ASSERT_EQ(0,ret);
   }

   verify_muti_hashvalue(hash_index,mem_pool);
   hash_index_release(hash_index);
}
Пример #3
0
TEST(HASHINDEX_TEST, HandleNoneZeroInput)  {
    MEM_POOL* mem_pool = mem_pool_init(MB_SIZE);
    struct hash_index_config config;

    system("rm -rf /tmp/hashindex_test");
    char dir_path[] = "/tmp/hashindex_test";
    mkdirs(dir_path);
    config.row_limit = BUCKET_NUM;
    strcpy(config.work_space,dir_path);

    init_profile(1000,mem_pool);
    int32_t ret;

    //插入一个值
    struct low_data_struct insert_data;
    get_low_data(&insert_data,mem_pool);

    struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool);
    ret = hash_index_insert(hash_index,&insert_data,0);
    ASSERT_EQ(ret,0);

    //查询
    struct rowid_list* rowids;

    uint32_t i;
    struct rowid_list_node* p;

    rowids = hash_index_query(hash_index,&insert_data,mem_pool);

    for(i = 0, p = rowids->head; i < rowids->rowid_num; i++)
    {
        if(i != 0 && i%ROWID_ARRAY_SIZE == 0)
        {
            p = p->next;
        }
        ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],0);
        break;
    }

    //再插入一个值
    hash_index_insert(hash_index,&insert_data,1);

    rowids = hash_index_query(hash_index,&insert_data,mem_pool);


    for(i = 0, p = rowids->head; i < rowids->rowid_num; i++)
    {
        if(i != 0 && i%ROWID_ARRAY_SIZE == 0)
        {
            p = p->next;

            if(i==0)
                ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],1);

            if(i==1)
            {
                ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],0);
                break;
            }
        }
    }

    //插入BUCKET_NUM个
    for(i=2; i<BUCKET_NUM+1; i++)
    {
        sprintf((char*)insert_data.data,"ali%u",i);
        ret = hash_index_insert(hash_index,&insert_data,i);
        ASSERT_EQ(ret,0);
    }

    //再插一个报冲突了
    sprintf((char*)insert_data.data,"ali%u",i);
    ret = hash_index_insert(hash_index,&insert_data,i);
    ASSERT_EQ(ret,ERROR_HASH_CONFLICT);

    //插入一个空值,OK
    insert_data.len = 0;
    ret = hash_index_insert(hash_index,&insert_data,i);
    ASSERT_EQ(ret,0);

    rowids = hash_index_query(hash_index,&insert_data,mem_pool);

    ASSERT_EQ(rowids->rowid_num,1);
    ASSERT_EQ(rowids->head->rowid_array[0],5);

    hash_index_release(hash_index);
    mem_pool_destroy(mem_pool);
}
Пример #4
0
TEST(RECOVER_TEST_2, HandleNoneZeroInput)  {
    MEM_POOL* mem_pool = mem_pool_init(MB_SIZE);
    struct hash_index_config config;

    system("rm -rf /tmp/hashindex_test");
    char dir_path[] = "/tmp/hashindex_test";
    mkdirs(dir_path);
    config.row_limit = ROW_LIMIT;
    strcpy(config.work_space,dir_path);

    init_profile(1000,mem_pool);
    int32_t ret;

    struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool);

    //插入18条数据,只恢复16条
    uint32_t i;
    struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool, sizeof(struct low_data_struct));
    memset(data, 0, sizeof(struct low_data_struct));

    for(i=0; i<18; i++)
    {
        if(i != 0 && i%2 == 1)
            continue;

        data->len = 5;
        data->data = mem_pool_malloc(mem_pool,5);
        data->type = HI_TYPE_STRING;
        data->field_name = (char*)mem_pool_malloc(mem_pool,20);
        memset(data->field_name,0,20);
        strcpy(data->field_name,"HI_TYPE_STRING");
        memset(data->data,0,5);
        ret = hash_index_insert(hash_index,data,i);
        ASSERT_EQ(0, ret);
        ret = hash_index_insert(hash_index,data,i+1);
        ASSERT_EQ(0, ret);
    }

    struct doc_row_unit* doc = NULL;

    ASSERT_EQ(0, hash_index_recover(hash_index, 16));

    for(i=0; i<ROW_LIMIT; i++)
    {
        doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i);
    }

    //验证
    for(i=0; i<16; i++)
    {
        doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i);
        ASSERT_EQ(i, doc->doc_id);
    }

    for(i=16; i<ROW_LIMIT; i++)
    {
        doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i);
        ASSERT_EQ(0, doc->doc_id);
        ASSERT_EQ(0,doc->next);
    }

    struct hash_bucket* hbucket = NULL;
    uint32_t bucket_no = 0;

    for(i=0; i<ROW_LIMIT+1; i++)
    {
        hbucket = hash_index->mem_mmaped + i;
        doc = NEXT_DOC_ROW_STRUCT(hash_index->doclist, hbucket->offset);

        if(hbucket->hash_value == 0)
        {
            continue;
        }

        while(!(doc->next & 0x80000000))
        {
            ASSERT_GT(16, doc->doc_id);
            doc = NEXT_DOC_ROW_STRUCT(hash_index->doclist, doc->next);
        }

        bucket_no = doc->next & 0x7fffffff;
        ASSERT_EQ(i,bucket_no);
    }

    hash_index_release(hash_index);
    mem_pool_destroy(mem_pool);
}
Пример #5
0
int32_t index_field_insert(struct index_field_manager* index_field,struct low_data_struct* data,uint32_t docid)
{
	int32_t ret;

	//拒绝插入的条件
	if(index_field == NULL) 
	{
		log_warn("此列未初始化%s",index_field->field_name);
		return ERROR_FIELD_NOT_WORK;
	}
	
	if(index_field->flag != NULL && (Mile_AtomicGetPtr(index_field->flag) & INDEX_FIELD_COMPRESS))
    	return ERROR_INDEX_FIELD_COMPRESSED;

	switch(index_field->index_type)
	{
		case HI_KEY_ALG_FULLTEXT:
			{
				/*全文列插入hash*/
				PROFILER_BEGIN("dyhash index insert");
				if((ret = dyhash_index_insert(index_field->dyhash_index,data,docid)) < 0)
				{
					PROFILER_END();
					return ret;
				}
				PROFILER_END();

				return MILE_RETURN_SUCCESS;

			}
		case HI_KEY_ALG_HASH:
			{
				/*哈希列插入hash*/
				PROFILER_BEGIN("hash index insert");
				if((ret = hash_index_insert(index_field->hash_index,data,docid)) < 0)
				{
					PROFILER_END();
					return ret;
				}
				PROFILER_END();

				return MILE_RETURN_SUCCESS;
			}
		case HI_KEY_ALG_BTREE:
			{
				return MILE_RETURN_SUCCESS;
			}
		case HI_KEY_ALG_FILTER:
			{
				//如果是字符串,需要对数据进行预处理
				if(data->type == HI_TYPE_STRING)
				{
					struct low_data_struct hash_data;
					
					PROFILER_BEGIN("get hash value");
					uint64_t hash_value = get_hash_value(data);
					PROFILER_END();
					
					hash_data.data = &hash_value;
					hash_data.len = get_unit_size(HI_TYPE_LONGLONG);
					hash_data.type = HI_TYPE_LONGLONG;
					hash_data.field_name = data->field_name;

					if(*index_field->max_len < get_unit_size(HI_TYPE_LONGLONG))
					{
						*index_field->max_len = get_unit_size(HI_TYPE_LONGLONG);
						msync(index_field->max_len,sizeof(uint32_t),MS_SYNC);
					}

					PROFILER_BEGIN("filter index insert");
					if((ret = filter_index_insert(index_field->filter_index,&hash_data,docid) < 0) )
					{
						PROFILER_END();
						return ret;
					}
					PROFILER_END();
					
				}
				else
				{
					if(data->len > get_unit_size(HI_TYPE_LONGLONG))
					{
						log_error("数据长度超过8个字节,len:%u",data->len);
						return ERROR_INSERT_FAILDED;
			
					}
					if(*index_field->max_len < data->len)
					{
						*index_field->max_len = data->len;
						msync(index_field->max_len,sizeof(uint32_t),MS_SYNC);
					}
					
					PROFILER_BEGIN("filter index insert");
					if((ret = filter_index_insert(index_field->filter_index,data,docid) < 0) )
					{
						PROFILER_END();
						return ret;
					}
					PROFILER_END();
				}

				return MILE_RETURN_SUCCESS;
			}
		default:
			log_error("该列的索引类型不正确,%d",index_field->index_type);
			return ERROR_NOT_SUPPORT_INDEX;
	}
}
Пример #6
0
/* Write an extent that is guaranteed to lie within a single virtual (and
 * physical) block. */
static int write_one_block(const void *buf, uint32_t len, uint64_t offset)
{
    int err;
    char fingerprint[FINGERPRINT_SIZE];
    uint64_t vbn = offset / BLOCK_SIZE;
    uint64_t hash_log_address;
    struct hash_log_entry new_entry;

    assert((offset % BLOCK_SIZE)+ len <= BLOCK_SIZE);

    SEEK_TO_BLOCK_MAP(fd, vbn);
    err = read(fd, fingerprint, FINGERPRINT_SIZE);
    assert(err == FINGERPRINT_SIZE);

    if (!fingerprint_is_zero(fingerprint)) {
        /* We need to decrement the refcount for the old fingerprint. */
        decrement_refcount(fingerprint);
    }

    if (len != BLOCK_SIZE) {
        /* We need to read in the existing block and apply our changes to it so
         * that we can determine the fingerprint. */
        void *newbuf = malloc(BLOCK_SIZE);
        read_one_block(newbuf, BLOCK_SIZE, 0);
        memcpy((char *)newbuf + (offset % BLOCK_SIZE), buf, len);
        SHA1(newbuf, BLOCK_SIZE, (unsigned char *)fingerprint);
        free(newbuf);
    } else
        SHA1(buf, BLOCK_SIZE, (unsigned char *)fingerprint);

    /* Compute the fingerprint of the new block and update the block map. */
    SEEK_TO_BLOCK_MAP(fd, vbn);
    err = write(fd, fingerprint, FINGERPRINT_SIZE);
    assert(err == FINGERPRINT_SIZE);

    /* See if this fingerprint is already stored. */
    hash_log_address = hash_index_lookup(fingerprint);
    if (hash_log_address == (uint64_t) -1) {
        /* This block is new. */
        new_entry.pbn = physical_block_new();
        new_entry.ref_count = 1;
        hash_log_address = hash_log_new();
        hash_index_insert(fingerprint, hash_log_address);
        SEEK_TO_HASH_LOG(fd, hash_log_address);
        err = write(fd, &new_entry, sizeof(struct hash_log_entry));
        assert(err == sizeof(struct hash_log_entry));
        SEEK_TO_DATA_LOG(fd, new_entry.pbn, offset % BLOCK_SIZE);
        err = write(fd, buf, len);
        assert(err == (int)len);
    } else {
        /* This block has already been stored. We just need to increment the
         * refcount. */
        SEEK_TO_HASH_LOG(fd, hash_log_address);
        err = read(fd, &new_entry, sizeof(struct hash_log_entry));
        assert(err == sizeof(struct hash_log_entry));
        new_entry.ref_count += 1;
        SEEK_TO_HASH_LOG(fd, hash_log_address);
        err = write(fd, &new_entry, sizeof(struct hash_log_entry));
        assert(err == sizeof(struct hash_log_entry));
    }

    return 0;
}