int *compact_hash_init(int ncells, uint isize, uint jsize, uint report_level){ hash_ncells = 0; write_hash_collisions = 0; read_hash_collisions = 0; hash_queries = 0; hash_report_level = report_level; hash_stride = isize; int *hash = NULL; if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method; uint compact_hash_size = (uint)((double)ncells*hash_mult); uint perfect_hash_size = (uint)(isize*jsize); if (hash_method == METHOD_UNSET){ float hash_mem_factor = 20.0; float hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size; if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC; if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n", hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size); } int do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1; if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n", do_compact_hash,hash_method,perfect_hash_size,compact_hash_size); if (do_compact_hash) { hashtablesize = compact_hash_size; AA = (ulong)(1.0+(double)(prime-1)*drand48()); BB = (ulong)(0.0+(double)(prime-1)*drand48()); if (AA > prime-1 || BB > prime-1) exit(0); if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB); hash = (int *)genvector(2*hashtablesize,sizeof(int)); for (uint ii = 0; ii<2*hashtablesize; ii+=2){ hash[ii] = -1; } } else { hashtablesize = perfect_hash_size; hash = (int *)genvector(hashtablesize,sizeof(int)); for (uint ii = 0; ii<hashtablesize; ii++){ hash[ii] = -1; } } if (hash_report_level >= 2) { printf("Hash table size %u perfect hash table size %u memory savings %u by percentage %lf\n", hashtablesize,isize*jsize,isize*jsize-hashtablesize, (double)hashtablesize/(double)(isize*jsize)); } return(hash); }
cl_mem gpu_compact_hash_init(ulong ncells, int imaxsize, int jmaxsize, int gpu_hash_method, uint hash_report_level_in, ulong *gpu_hash_table_size, ulong *hashsize, cl_mem *dev_hash_header_in) { hash_report_level = hash_report_level_in; uint gpu_compact_hash_size = (uint)((double)ncells*hash_mult); uint gpu_perfect_hash_size = (uint)(imaxsize*jmaxsize); if (gpu_hash_method == METHOD_UNSET) { float gpu_hash_mem_factor = 20.0; float gpu_hash_mem_ratio = (double)gpu_perfect_hash_size/(double)gpu_compact_hash_size; if (mem_opt_factor != 1.0) gpu_hash_mem_factor /= (mem_opt_factor*0.2); gpu_hash_method = (gpu_hash_mem_ratio < gpu_hash_mem_factor) ? PERFECT_HASH : QUADRATIC; } int gpu_do_compact_hash = (gpu_hash_method == PERFECT_HASH) ? 0 : 1; ulong gpu_AA = 1; ulong gpu_BB = 0; if (gpu_do_compact_hash){ (*gpu_hash_table_size) = gpu_compact_hash_size; gpu_AA = (ulong)(1.0+(double)(prime-1)*drand48()); gpu_BB = (ulong)(0.0+(double)(prime-1)*drand48()); //if ( gpu_AA > prime-1 || gpu_BB > prime-1) exit(0); (*hashsize) = 2*gpu_compact_hash_size; } else { (*gpu_hash_table_size) = gpu_perfect_hash_size; (*hashsize) = gpu_perfect_hash_size; } hashtablesize = (*hashsize); const uint TILE_SIZE = 128; cl_command_queue command_queue = ezcl_get_command_queue(); cl_mem dev_hash = ezcl_malloc(NULL, "dev_hash", hashsize, sizeof(cl_int), CL_MEM_READ_WRITE, 0); ulong *gpu_hash_header = (ulong *)genvector(hash_header_size, sizeof(ulong)); gpu_hash_header[0] = (ulong)gpu_hash_method; gpu_hash_header[1] = (*gpu_hash_table_size); gpu_hash_header[2] = gpu_AA; gpu_hash_header[3] = gpu_BB; dev_hash_header = ezcl_malloc(NULL, "dev_hash_header", &hash_header_size, sizeof(cl_ulong), CL_MEM_READ_WRITE, 0); ezcl_enqueue_write_buffer(command_queue, dev_hash_header, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &gpu_hash_header[0], NULL); genvectorfree(gpu_hash_header); (*dev_hash_header_in) = dev_hash_header; size_t hash_local_work_size = MIN((*hashsize), TILE_SIZE); size_t hash_global_work_size = (((*hashsize)+hash_local_work_size - 1) /hash_local_work_size) * hash_local_work_size; ezcl_set_kernel_arg(kernel_hash_init, 0, sizeof(cl_int), (void *)hashsize); ezcl_set_kernel_arg(kernel_hash_init, 1, sizeof(cl_mem), (void *)&dev_hash); ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_init, 1, NULL, &hash_global_work_size, &hash_local_work_size, NULL); return(dev_hash); }
int *compact_hash_init_openmp(int ncells, uint isize, uint jsize, uint report_level){ static int *hash = NULL; #pragma omp barrier #pragma omp master { hash_ncells = 0; write_hash_collisions = 0; read_hash_collisions = 0; hash_queries = 0; hash_report_level = report_level; hash_stride = isize; if (choose_hash_method != METHOD_UNSET) hash_method = choose_hash_method; uint compact_hash_size = (uint)((double)ncells*hash_mult); uint perfect_hash_size = (uint)(isize*jsize); if (hash_method == METHOD_UNSET){ float hash_mem_factor = 20.0; float hash_mem_ratio = (double)perfect_hash_size/(double)compact_hash_size; if (mem_opt_factor != 1.0) hash_mem_factor /= (mem_opt_factor*0.2); hash_method = (hash_mem_ratio < hash_mem_factor) ? PERFECT_HASH : QUADRATIC; //hash_method = QUADRATIC; if (hash_report_level >= 2) printf("DEBUG hash_method %d hash_mem_ratio %f hash_mem_factor %f mem_opt_factor %f perfect_hash_size %u compact_hash_size %u\n", hash_method,hash_mem_ratio,hash_mem_factor,mem_opt_factor,perfect_hash_size,compact_hash_size); } int do_compact_hash = (hash_method == PERFECT_HASH) ? 0 : 1; if (hash_report_level >= 2) printf("DEBUG do_compact_hash %d hash_method %d perfect_hash_size %u compact_hash_size %u\n", do_compact_hash,hash_method,perfect_hash_size,compact_hash_size); if (do_compact_hash) { hashtablesize = compact_hash_size; //srand48(0); AA = (ulong)(1.0+(double)(prime-1)*drand48()); BB = (ulong)(0.0+(double)(prime-1)*drand48()); if (AA > prime-1 || BB > prime-1) exit(0); if (hash_report_level > 1) printf("Factors AA %lu BB %lu\n",AA,BB); hash = (int *)genvector(2*hashtablesize,sizeof(int)); //#ifdef _OPENMP //#pragma omp parallel for //#endif for (uint ii = 0; ii<hashtablesize; ii++){ hash[2*ii] = -1; } if (hash_method == LINEAR){ if (hash_report_level == 0){ read_hash = read_hash_linear; write_hash = write_hash_linear_openmp; } else if (hash_report_level == 1){ read_hash = read_hash_linear_report_level_1; write_hash = write_hash_linear_openmp_report_level_1; } else if (hash_report_level == 2){ read_hash = read_hash_linear_report_level_2; write_hash = write_hash_linear_openmp_report_level_2; } else if (hash_report_level == 3){ read_hash = read_hash_linear_report_level_3; write_hash = write_hash_linear_openmp_report_level_3; } } else if (hash_method == QUADRATIC) { if (hash_report_level == 0){ read_hash = read_hash_quadratic; write_hash = write_hash_quadratic_openmp; } else if (hash_report_level == 1){ read_hash = read_hash_quadratic_report_level_1; write_hash = write_hash_quadratic_openmp_report_level_1; } else if (hash_report_level == 2){ read_hash = read_hash_quadratic_report_level_2; write_hash = write_hash_quadratic_openmp_report_level_2; } else if (hash_report_level == 3){ read_hash = read_hash_quadratic_report_level_3; write_hash = write_hash_quadratic_openmp_report_level_3; } } else if (hash_method == PRIME_JUMP) { if (hash_report_level == 0){ read_hash = read_hash_primejump; write_hash = write_hash_primejump_openmp; } else if (hash_report_level == 1){ read_hash = read_hash_primejump_report_level_1; write_hash = write_hash_primejump_openmp_report_level_1; } else if (hash_report_level == 2){ read_hash = read_hash_primejump_report_level_2; write_hash = write_hash_primejump_openmp_report_level_2; } else if (hash_report_level == 3){ read_hash = read_hash_primejump_report_level_3; write_hash = write_hash_primejump_openmp_report_level_3; } } } else { hashtablesize = perfect_hash_size; hash = (int *)genvector(hashtablesize,sizeof(int)); #ifdef _OPENMP #pragma omp parallel for #endif for (uint ii = 0; ii<hashtablesize; ii++){ hash[ii] = -1; } read_hash = read_hash_perfect; write_hash = write_hash_perfect; } if (hash_report_level >= 2) { printf("Hash table size %u perfect hash table size %u memory savings %u by percentage %lf\n", hashtablesize,isize*jsize,isize*jsize-hashtablesize, (double)hashtablesize/(double)(isize*jsize)); } } #pragma omp barrier return(hash); }