std::tuple<vector<unsigned int>, vector<unsigned int>, vector<unsigned int>> compute_G_P_L_EI_arrays(const vector<T> &lst, const vector<vector<unsigned int>> &prefix_summed_bucket_table, const vector<unsigned int> &t_primes_summed, const vector<unsigned int> &t_primes_exscanned, unsigned int (*key_func)(const T&), const unsigned int k, const unsigned int offset) { vector<unsigned int> G_EI(lst.size(), 0), P_EI(lst.size(), 0), L_EI(lst.size(), 0); for (int i=0; i < lst.size(); ++i) { // Get the current digit unsigned int key = key_func(lst[i]); unsigned int d_i = GET_DIGIT(key, k, offset); // Sum up the T''s to get the total number of elements with digit smaller than the current digit for (int j=0; j < d_i; ++j) { G_EI[i] += t_primes_summed[j]; } // Get the number of elements with the same digit, but on processors with smaller rank P_EI[i] = t_primes_exscanned[d_i]; // Get the number of elements with the same digit on the same processor and LEFT of this element (hence the -1) L_EI[i] = prefix_summed_bucket_table[d_i][i] - 1; } return make_tuple(G_EI, P_EI, L_EI); }
int SHash::sh_fd_find(const void *key, void *value) { char buf[1024]; unsigned int next, crc32; unsigned int hash_value = 0; struct shm_hash_head *head = (struct shm_hash_head *)m_head; unsigned int offset; void *new_key; int ret_len; key_func(func_type, key, head->key_len, (void **)&new_key, &ret_len); crc32 = crc32sum((char *)new_key, ret_len) % head->hash; offset = m_pos + sizeof(struct shm_hash_head) + crc32 * sizeof(int); lseek(fd, offset, SEEK_SET); read(fd, &hash_value, sizeof(hash_value)); if (hash_value == 0) return _SHASH_NOT_FOUND; unsigned int rec_pos = sh_fd_get_rec(head); unsigned int rec_seek = sh_fd_get_pos(hash_value); for (;;) { memset(buf, 0, key_len + value_len + sizeof(int)); offset = m_pos + rec_pos + rec_seek; lseek(fd, offset, SEEK_SET); read(fd, buf, key_len + value_len + sizeof(int)); if (sh_record_is_used(buf)) { if (cmp_func(func_type, sh_get_key(buf), key, key_len) == 0) { memcpy(value, sh_get_value(buf), value_len); return _SHASH_FOUND; } } if ((next = sh_get_next(buf)) == 0) break; rec_seek = sh_fd_get_pos(next); } return _SHASH_NOT_FOUND; }
/* * _SHASH_FOUND * _SHASH_NOT_FOUND * _SHASH_NOT_FOUND_NEXT * _SHASH_SYS_ERROR */ int SHash::sh_sys_find(const void *key, void **value, unsigned int *crc32, void **used, void **find) { void *p; unsigned int next; int *hash; void *new_key; int ret_len; struct shm_hash_head *head = sh_get_head(); void *rec = sh_get_rec(head); key_func(func_type, key, key_len, (void **)&new_key, &ret_len); *crc32 = crc32sum((char *)new_key, ret_len) % head->hash; hash = sh_get_hash(head) + *crc32; if (*hash == 0) { return _SHASH_NOT_FOUND; } *find = p = sh_get_pos(rec, *hash); *used = NULL; for (;;) { *find = p; if (sh_record_is_used(*find)) { if (cmp_func(func_type, sh_get_key(p), key, key_len) == 0) { if (value) *value = sh_get_value(p); return _SHASH_FOUND; } } else { if (*used == NULL) *used = *find; } if ((next = sh_get_next(p)) == 0) break; p = sh_get_pos(rec, next); } return _SHASH_NOT_FOUND_NEXT; }
int main(int argc, char* argv[]) { OPHeap* heap; void *rhh; struct timeval i_start, i_end, q_start, q_end; int num_power, opt; int repeat = 1; RunKey key_func = run_short_keys; int k_len = 6; uint64_t num; double load = 0.8; bool print_stat = false; HTFunnel* funnel; HTDestroy_t rhh_destroy = (HTDestroy_t)HTDestroy; HashFunc rhh_put = HTFunnelInsertWrap; HashFunc rhh_get = HTFunnelGetWrap; HTPrintStat_t rhh_printstat = (HTPrintStat_t)HTPrintStat; OPHash hasher = city; size_t funnel_slotsize = 1ULL << 12; size_t funnel_partition_size = 1ULL << 12; num_power = 20; while ((opt = getopt(argc, argv, "a:b:n:r:k:l:f:ph")) > -1) { switch (opt) { case 'a': funnel_slotsize = 1ULL << atoi(optarg); break; case 'b': funnel_partition_size = 1ULL << atoi(optarg); break; case 'n': num_power = atoi(optarg); break; case 'r': repeat = atoi(optarg); break; case 'k': if (!strcmp("s_string", optarg)) { key_func = run_short_keys; k_len = 6; } else if (!strcmp("m_string", optarg)) { key_func = run_mid_keys; k_len = 32; } else if (!strcmp("l_string", optarg)) { key_func = run_long_keys; k_len = 256; } else if (!strcmp("l_int", optarg)) { key_func = run_long_int; k_len = 8; } else help(argv[0]); break; case 'l': load = atof(optarg); break; case 'f': if (!strcmp("murmur3", optarg)) { printf("using murmur3 hasher\n"); hasher = murmur3; } else if (!strcmp("spooky", optarg)) { printf("using spookyhash\n"); hasher = spooky; } else if (!strcmp("city", optarg)) { printf("using cityhash\n"); hasher = city; } else if (!strcmp("farm", optarg)) { printf("using farmhash\n"); hasher = farm; } else help(argv[0]); break; case 'p': print_stat = true; break; case 'h': case '?': default: help(argv[0]); } } num = 1UL << num_power; printf("running elements %" PRIu64 "\n", num); heap = OPHeapOpenTmp(); for (int i = 0; i < repeat; i++) { printf("attempt %d\n", i + 1); rhh = HTNew(heap, num, load, k_len, 8); funnel = HTFunnelNewCustom(rhh, hasher, NULL, funnel_slotsize, funnel_partition_size); gettimeofday(&i_start, NULL); key_func(num_power, rhh_put, funnel, hasher); HTFunnelInsertFlush(funnel); gettimeofday(&i_end, NULL); HTFunnelDestroy(funnel); printf("insert finished\n"); funnel = HTFunnelNewCustom(rhh, hasher, funnel_sum_val, funnel_slotsize, funnel_partition_size); gettimeofday(&q_start, NULL); key_func(num_power, rhh_get, funnel, hasher); HTFunnelGetFlush(funnel); gettimeofday(&q_end, NULL); HTFunnelDestroy(funnel); print_timediff("Funnel Insert time: ", i_start, i_end); print_timediff("Funnel Query time: ", q_start, q_end); if (print_stat) { rhh_printstat(rhh); } rhh_destroy(rhh); } printf("objcnt: %d val_sum: %" PRIu64 "\n", objcnt, val_sum); OPHeapClose(heap); return 0; }
int clump(DBC* orig, DB* ldb, DB* first, DB* second, DB* match, DB* prim){ int i, write_cycle, changed, m=1, ret=0, no_matches; double val; int(*key_func)(DB*, const DBT*, const DBT*, DBT*); DBC* prim_cur_i, prim_cur_j, *first_cur, *second_cur, *match_cur; DBC* fs[2]; //DBC* carray[3]; DBT match_key; DBT ldb_key, ldb_dat; DBT dummy_dat; DBT key_i, pkey_i, data_i; DBT key_j, pkey_j, data_j; db_recno_t m_count; void* old; char invnum_buf[16]; char *tagp; DBT_CLEAR(key_i); DBT_CLEAR(pkey_i); DBT_CLEAR(data_i); DBT_CLEAR(key_j); DBT_CLEAR(pkey_j); DBT_CLEAR(data_j); DBT_CLEAR(match_key); DBT_CLEAR(dummy_dat); DBT_CLEAR(ldb_key); DBT_CLEAR(ldb_dat); match_key.data = &m; match_key.size = sizeof(int); ret = first->cursor(first, NULL, &first_cur, 0); ret = second->cursor(second, NULL, &second_cur, 0); if(ret) printf("Cursor creation problem! %d\n", ret); fs[0] = first_cur; fs[1] = second_cur; /* match->cursor(match, NULL, &match_cur, 0); no_matches = match_cur->get(match_cur, &match_key, &dummy_dat, DB_SET); printf("likelihood!: %g\n", *(double*)dummy_dat.data); match_cur->count(match_cur, &m_count, 0); printf("matches: %u\n", (size_t)m_count); */ //return(0); changed=1; while(changed){ //Repeat until none of the tags change. //printf("again!\n"); changed=0; orig->dup(orig, &prim_cur_i, DB_POSITION); prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_CURRENT); //primary get. do { //Check for a tag tagp = has_tag((DbRecord*)data_i.data); if(tagp==NULL){ apply_tag((DbRecord*)data_i.data, NULL); tagp = has_tag((DbRecord*)data_i.data); if(tagp == NULL){ printf("SERIOUS PROBLEM in tag application. Aborting.\n "); exit(1); } //prim->put(prim, NULL, &pkey_i, &data_i, 0); } //memcpy(invnum_buf, tagp, 16); //printf("invnum_buf: %s\n", invnum_buf); //key_i.data = invnum_buf; //key_i.size = strlen(invnum_buf); for(write_cycle=0; write_cycle<2; ++write_cycle){ //In the first pass, find the minimum tag that this record is associated with //In the second pass, write that tag to all records. if(write_cycle) prim->put(prim, NULL, &pkey_i, &data_i, 0); for(i=0; i<2; ++i){ //For each pass here, look for the record being the first in the comparison //then the second in the comparison if(DB_NOTFOUND == (ret = fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_SET))){ //printf("join failed!\n"); continue; } do{ //printf("Keys: %s, ", (char*)key_i.data); //printf("Sim: %f, ", *(double*)dummy_dat.data); if(*(double*)dummy_dat.data < PR_T){ // printf("\n"); continue; } key_func = i ? first_index : second_index; key_func(first /*dummy*/, &key_i, &dummy_dat /*dummy*/, &pkey_j); old = pkey_j.data; //pkey_j.flags = DB_DBT_USERMEM; //printf("ldb_key: %s\n", (char*)key_i.data); //printf("pkey_j: %lu\n", *(u_long*)pkey_j.data); prim->get(prim, NULL, &pkey_j, &data_j, 0); if(!write_cycle){ if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data) > 0){ apply_tag((DbRecord*)data_i.data, has_tag((DbRecord*)data_j.data)); //printf("\tNew Min: %s\n", has_tag((DbRecord*)data_i.data)); changed=1; } free(old); continue; } //printf("Old Invnum_N: %s, ", ((DbRecord*)data_j.data)->Invnum_N); if(tagcmp((DbRecord*)data_i.data, (DbRecord*)data_j.data)!=0){ apply_tag(((DbRecord*)data_j.data), has_tag((DbRecord*)data_i.data)); prim->put(prim, NULL, &pkey_j, &data_j, 0); prim->get(prim, NULL, &pkey_j, &data_j, 0); changed=1; } //printf("New Invnum_N: %s\n", ((DbRecord*)data_j.data)->Invnum_N); //free(pkey_j.data); free(old); } while(DB_NOTFOUND != fs[i]->pget(fs[i], &pkey_i, &key_i, &dummy_dat, DB_NEXT_DUP)); }//First, second idx }//Write cycle } while(DB_NOTFOUND != prim_cur_i->pget(prim_cur_i, &key_i, &pkey_i, &data_i, DB_NEXT_DUP)); }//changed first_cur->close(first_cur); second_cur->close(second_cur); return(0); }