void test_tow_hashvalue(uint32_t limit) { MEM_POOL* mem_pool = mem_pool_init(MB_SIZE); struct hash_index_config config; config.row_limit = limit; strcpy(config.work_space,"/tmp/hash_compress_test"); system("rm -rf /tmp/hash_compress_test"); mkdirs(config.work_space); init_profile(1000,mem_pool); struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool); uint32_t i; struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool,sizeof(struct low_data_struct)); get_low_data(data,mem_pool); for(i=0; i<config.row_limit/2;i++) { strcpy((char*)data->data,"1"); hash_index_insert(hash_index,data,i); } for(i=config.row_limit/2; i<config.row_limit;i++) { strcpy((char*)data->data,"2"); hash_index_insert(hash_index,data,i); } struct hash_compress_manager* hash_compress = hash_compress_load(hash_index,10,mem_pool); struct rowid_list* rlist_a; struct rowid_list* rlist_b; get_low_data(data,mem_pool); strcpy((char*)data->data,"1"); rlist_a = hash_index_query(hash_index,data,mem_pool); rlist_b = hash_compress_query(hash_compress,data,mem_pool); ASSERT_EQ(rowid_list_equals(rlist_a,rlist_b),1); strcpy((char*)data->data,"2"); rlist_a = hash_index_query(hash_index,data,mem_pool); rlist_b = hash_compress_query(hash_compress,data,mem_pool); ASSERT_EQ(rowid_list_equals(rlist_a,rlist_b),1); hash_index_release(hash_index); hash_compress_release(hash_compress); }
void test_multi_hashvalue(uint32_t limit) { MEM_POOL* mem_pool = mem_pool_init(MB_SIZE); int32_t ret; struct hash_index_config config; config.row_limit = limit; strcpy(config.work_space,"/tmp/hash_compress_test"); system("rm -rf /tmp/hash_compress_test"); mkdirs(config.work_space); init_profile(1000,mem_pool); struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool); uint32_t i; struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool,sizeof(struct low_data_struct)); get_low_data(data,mem_pool); for(i=0; i<config.row_limit;i++) { sprintf((char*)data->data,"%d",i); ret = hash_index_insert(hash_index,data,i); ASSERT_EQ(0,ret); } verify_muti_hashvalue(hash_index,mem_pool); hash_index_release(hash_index); }
TEST(HASHINDEX_TEST, HandleNoneZeroInput) { MEM_POOL* mem_pool = mem_pool_init(MB_SIZE); struct hash_index_config config; system("rm -rf /tmp/hashindex_test"); char dir_path[] = "/tmp/hashindex_test"; mkdirs(dir_path); config.row_limit = BUCKET_NUM; strcpy(config.work_space,dir_path); init_profile(1000,mem_pool); int32_t ret; //插入一个值 struct low_data_struct insert_data; get_low_data(&insert_data,mem_pool); struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool); ret = hash_index_insert(hash_index,&insert_data,0); ASSERT_EQ(ret,0); //查询 struct rowid_list* rowids; uint32_t i; struct rowid_list_node* p; rowids = hash_index_query(hash_index,&insert_data,mem_pool); for(i = 0, p = rowids->head; i < rowids->rowid_num; i++) { if(i != 0 && i%ROWID_ARRAY_SIZE == 0) { p = p->next; } ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],0); break; } //再插入一个值 hash_index_insert(hash_index,&insert_data,1); rowids = hash_index_query(hash_index,&insert_data,mem_pool); for(i = 0, p = rowids->head; i < rowids->rowid_num; i++) { if(i != 0 && i%ROWID_ARRAY_SIZE == 0) { p = p->next; if(i==0) ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],1); if(i==1) { ASSERT_EQ(p->rowid_array[i%ROWID_ARRAY_SIZE],0); break; } } } //插入BUCKET_NUM个 for(i=2; i<BUCKET_NUM+1; i++) { sprintf((char*)insert_data.data,"ali%u",i); ret = hash_index_insert(hash_index,&insert_data,i); ASSERT_EQ(ret,0); } //再插一个报冲突了 sprintf((char*)insert_data.data,"ali%u",i); ret = hash_index_insert(hash_index,&insert_data,i); ASSERT_EQ(ret,ERROR_HASH_CONFLICT); //插入一个空值,OK insert_data.len = 0; ret = hash_index_insert(hash_index,&insert_data,i); ASSERT_EQ(ret,0); rowids = hash_index_query(hash_index,&insert_data,mem_pool); ASSERT_EQ(rowids->rowid_num,1); ASSERT_EQ(rowids->head->rowid_array[0],5); hash_index_release(hash_index); mem_pool_destroy(mem_pool); }
TEST(RECOVER_TEST_2, HandleNoneZeroInput) { MEM_POOL* mem_pool = mem_pool_init(MB_SIZE); struct hash_index_config config; system("rm -rf /tmp/hashindex_test"); char dir_path[] = "/tmp/hashindex_test"; mkdirs(dir_path); config.row_limit = ROW_LIMIT; strcpy(config.work_space,dir_path); init_profile(1000,mem_pool); int32_t ret; struct hash_index_manager* hash_index = hash_index_init(&config,mem_pool); //插入18条数据,只恢复16条 uint32_t i; struct low_data_struct* data = (struct low_data_struct*)mem_pool_malloc(mem_pool, sizeof(struct low_data_struct)); memset(data, 0, sizeof(struct low_data_struct)); for(i=0; i<18; i++) { if(i != 0 && i%2 == 1) continue; data->len = 5; data->data = mem_pool_malloc(mem_pool,5); data->type = HI_TYPE_STRING; data->field_name = (char*)mem_pool_malloc(mem_pool,20); memset(data->field_name,0,20); strcpy(data->field_name,"HI_TYPE_STRING"); memset(data->data,0,5); ret = hash_index_insert(hash_index,data,i); ASSERT_EQ(0, ret); ret = hash_index_insert(hash_index,data,i+1); ASSERT_EQ(0, ret); } struct doc_row_unit* doc = NULL; ASSERT_EQ(0, hash_index_recover(hash_index, 16)); for(i=0; i<ROW_LIMIT; i++) { doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i); } //验证 for(i=0; i<16; i++) { doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i); ASSERT_EQ(i, doc->doc_id); } for(i=16; i<ROW_LIMIT; i++) { doc = GET_DOC_ROW_STRUCT(hash_index->doclist, i); ASSERT_EQ(0, doc->doc_id); ASSERT_EQ(0,doc->next); } struct hash_bucket* hbucket = NULL; uint32_t bucket_no = 0; for(i=0; i<ROW_LIMIT+1; i++) { hbucket = hash_index->mem_mmaped + i; doc = NEXT_DOC_ROW_STRUCT(hash_index->doclist, hbucket->offset); if(hbucket->hash_value == 0) { continue; } while(!(doc->next & 0x80000000)) { ASSERT_GT(16, doc->doc_id); doc = NEXT_DOC_ROW_STRUCT(hash_index->doclist, doc->next); } bucket_no = doc->next & 0x7fffffff; ASSERT_EQ(i,bucket_no); } hash_index_release(hash_index); mem_pool_destroy(mem_pool); }
int32_t index_field_insert(struct index_field_manager* index_field,struct low_data_struct* data,uint32_t docid) { int32_t ret; //拒绝插入的条件 if(index_field == NULL) { log_warn("此列未初始化%s",index_field->field_name); return ERROR_FIELD_NOT_WORK; } if(index_field->flag != NULL && (Mile_AtomicGetPtr(index_field->flag) & INDEX_FIELD_COMPRESS)) return ERROR_INDEX_FIELD_COMPRESSED; switch(index_field->index_type) { case HI_KEY_ALG_FULLTEXT: { /*全文列插入hash*/ PROFILER_BEGIN("dyhash index insert"); if((ret = dyhash_index_insert(index_field->dyhash_index,data,docid)) < 0) { PROFILER_END(); return ret; } PROFILER_END(); return MILE_RETURN_SUCCESS; } case HI_KEY_ALG_HASH: { /*哈希列插入hash*/ PROFILER_BEGIN("hash index insert"); if((ret = hash_index_insert(index_field->hash_index,data,docid)) < 0) { PROFILER_END(); return ret; } PROFILER_END(); return MILE_RETURN_SUCCESS; } case HI_KEY_ALG_BTREE: { return MILE_RETURN_SUCCESS; } case HI_KEY_ALG_FILTER: { //如果是字符串,需要对数据进行预处理 if(data->type == HI_TYPE_STRING) { struct low_data_struct hash_data; PROFILER_BEGIN("get hash value"); uint64_t hash_value = get_hash_value(data); PROFILER_END(); hash_data.data = &hash_value; hash_data.len = get_unit_size(HI_TYPE_LONGLONG); hash_data.type = HI_TYPE_LONGLONG; hash_data.field_name = data->field_name; if(*index_field->max_len < get_unit_size(HI_TYPE_LONGLONG)) { *index_field->max_len = get_unit_size(HI_TYPE_LONGLONG); msync(index_field->max_len,sizeof(uint32_t),MS_SYNC); } PROFILER_BEGIN("filter index insert"); if((ret = filter_index_insert(index_field->filter_index,&hash_data,docid) < 0) ) { PROFILER_END(); return ret; } PROFILER_END(); } else { if(data->len > get_unit_size(HI_TYPE_LONGLONG)) { log_error("数据长度超过8个字节,len:%u",data->len); return ERROR_INSERT_FAILDED; } if(*index_field->max_len < data->len) { *index_field->max_len = data->len; msync(index_field->max_len,sizeof(uint32_t),MS_SYNC); } PROFILER_BEGIN("filter index insert"); if((ret = filter_index_insert(index_field->filter_index,data,docid) < 0) ) { PROFILER_END(); return ret; } PROFILER_END(); } return MILE_RETURN_SUCCESS; } default: log_error("该列的索引类型不正确,%d",index_field->index_type); return ERROR_NOT_SUPPORT_INDEX; } }
/* Write an extent that is guaranteed to lie within a single virtual (and * physical) block. */ static int write_one_block(const void *buf, uint32_t len, uint64_t offset) { int err; char fingerprint[FINGERPRINT_SIZE]; uint64_t vbn = offset / BLOCK_SIZE; uint64_t hash_log_address; struct hash_log_entry new_entry; assert((offset % BLOCK_SIZE)+ len <= BLOCK_SIZE); SEEK_TO_BLOCK_MAP(fd, vbn); err = read(fd, fingerprint, FINGERPRINT_SIZE); assert(err == FINGERPRINT_SIZE); if (!fingerprint_is_zero(fingerprint)) { /* We need to decrement the refcount for the old fingerprint. */ decrement_refcount(fingerprint); } if (len != BLOCK_SIZE) { /* We need to read in the existing block and apply our changes to it so * that we can determine the fingerprint. */ void *newbuf = malloc(BLOCK_SIZE); read_one_block(newbuf, BLOCK_SIZE, 0); memcpy((char *)newbuf + (offset % BLOCK_SIZE), buf, len); SHA1(newbuf, BLOCK_SIZE, (unsigned char *)fingerprint); free(newbuf); } else SHA1(buf, BLOCK_SIZE, (unsigned char *)fingerprint); /* Compute the fingerprint of the new block and update the block map. */ SEEK_TO_BLOCK_MAP(fd, vbn); err = write(fd, fingerprint, FINGERPRINT_SIZE); assert(err == FINGERPRINT_SIZE); /* See if this fingerprint is already stored. */ hash_log_address = hash_index_lookup(fingerprint); if (hash_log_address == (uint64_t) -1) { /* This block is new. */ new_entry.pbn = physical_block_new(); new_entry.ref_count = 1; hash_log_address = hash_log_new(); hash_index_insert(fingerprint, hash_log_address); SEEK_TO_HASH_LOG(fd, hash_log_address); err = write(fd, &new_entry, sizeof(struct hash_log_entry)); assert(err == sizeof(struct hash_log_entry)); SEEK_TO_DATA_LOG(fd, new_entry.pbn, offset % BLOCK_SIZE); err = write(fd, buf, len); assert(err == (int)len); } else { /* This block has already been stored. We just need to increment the * refcount. */ SEEK_TO_HASH_LOG(fd, hash_log_address); err = read(fd, &new_entry, sizeof(struct hash_log_entry)); assert(err == sizeof(struct hash_log_entry)); new_entry.ref_count += 1; SEEK_TO_HASH_LOG(fd, hash_log_address); err = write(fd, &new_entry, sizeof(struct hash_log_entry)); assert(err == sizeof(struct hash_log_entry)); } return 0; }