oskar_Imager* oskar_imager_create(int imager_precision, int* status) { oskar_Imager* h = 0; h = (oskar_Imager*) calloc(1, sizeof(oskar_Imager)); /* Create timers. */ h->tmr_grid_finalise = oskar_timer_create(OSKAR_TIMER_NATIVE); h->tmr_grid_update = oskar_timer_create(OSKAR_TIMER_NATIVE); h->tmr_init = oskar_timer_create(OSKAR_TIMER_NATIVE); h->tmr_read = oskar_timer_create(OSKAR_TIMER_NATIVE); h->tmr_write = oskar_timer_create(OSKAR_TIMER_NATIVE); h->mutex = oskar_mutex_create(); /* Create scratch arrays. */ h->imager_prec = imager_precision; h->uu_im = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->vv_im = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->ww_im = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->uu_tmp = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->vv_tmp = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->ww_tmp = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->vis_im = oskar_mem_create(imager_precision | OSKAR_COMPLEX, OSKAR_CPU, 0, status); h->weight_im = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->weight_tmp = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status); h->time_im = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0, status); /* Check data type. */ if (imager_precision != OSKAR_SINGLE && imager_precision != OSKAR_DOUBLE) { *status = OSKAR_ERR_BAD_DATA_TYPE; return h; } /* Get number of devices available, and device location. */ oskar_device_set_require_double_precision(imager_precision == OSKAR_DOUBLE); h->num_gpus_avail = oskar_device_count(0, &h->dev_loc); /* Set sensible defaults. */ oskar_imager_set_gpus(h, -1, 0, status); oskar_imager_set_num_devices(h, -1); oskar_imager_set_algorithm(h, "FFT", status); oskar_imager_set_image_type(h, "I", status); oskar_imager_set_weighting(h, "Natural", status); oskar_imager_set_ms_column(h, "DATA", status); oskar_imager_set_default_direction(h); oskar_imager_set_generate_w_kernels_on_gpu(h, 1); oskar_imager_set_fov(h, 1.0); oskar_imager_set_size(h, 256, status); oskar_imager_set_uv_filter_max(h, DBL_MAX); return h; }
int benchmark(int num_elements, int num_directions, OpType op_type, int loc, int precision, bool evaluate_2d, int niter, double& time_taken) { int status = 0; int type = precision | OSKAR_COMPLEX; oskar_Mem *beam = 0, *signal = 0, *z = 0, *z_i = 0; oskar_Mem *x = oskar_mem_create(precision, loc, num_directions, &status); oskar_Mem *y = oskar_mem_create(precision, loc, num_directions, &status); oskar_Mem *x_i = oskar_mem_create(precision, loc, num_elements, &status); oskar_Mem *y_i = oskar_mem_create(precision, loc, num_elements, &status); oskar_Mem *weights = oskar_mem_create(type, loc, num_elements, &status); if (!evaluate_2d) { z = oskar_mem_create(precision, loc, num_directions, &status); z_i = oskar_mem_create(precision, loc, num_elements, &status); } if (op_type == O2C) beam = oskar_mem_create(type, loc, num_directions, &status); else if (op_type == C2C || op_type == M2M) { int num_signals = num_directions * num_elements; if (op_type == C2C) { beam = oskar_mem_create(type, loc, num_directions, &status); signal = oskar_mem_create(type, loc, num_signals, &status); } else { type |= OSKAR_MATRIX; beam = oskar_mem_create(type, loc, num_directions, &status); signal = oskar_mem_create(type, loc, num_signals, &status); } } oskar_Timer *tmr = oskar_timer_create(OSKAR_TIMER_NATIVE); if (!status) { oskar_timer_start(tmr); for (int i = 0; i < niter; ++i) { oskar_dftw(num_elements, 2.0 * M_PI, x_i, y_i, z_i, weights, num_directions, x, y, z, signal, beam, &status); } time_taken = oskar_timer_elapsed(tmr); } // Free memory. oskar_timer_free(tmr); oskar_mem_free(x, &status); oskar_mem_free(y, &status); oskar_mem_free(z, &status); oskar_mem_free(x_i, &status); oskar_mem_free(y_i, &status); oskar_mem_free(z_i, &status); oskar_mem_free(weights, &status); oskar_mem_free(beam, &status); oskar_mem_free(signal, &status); return status; }
oskar_Simulator* oskar_simulator_create(int precision, int* status) { oskar_Simulator* h = 0; h = (oskar_Simulator*) calloc(1, sizeof(oskar_Simulator)); h->prec = precision; h->tmr_sim = oskar_timer_create(OSKAR_TIMER_NATIVE); h->tmr_write = oskar_timer_create(OSKAR_TIMER_NATIVE); h->temp = oskar_mem_create(precision, OSKAR_CPU, 0, status); h->mutex = oskar_mutex_create(); /* Set sensible defaults. */ h->max_sources_per_chunk = 16384; oskar_simulator_set_gpus(h, -1, 0, status); oskar_simulator_set_num_devices(h, -1); oskar_simulator_set_correlation_type(h, "Cross-correlations", status); oskar_simulator_set_horizon_clip(h, 1); oskar_simulator_set_source_flux_range(h, 0.0, DBL_MAX); oskar_simulator_set_max_times_per_block(h, 10); return h; }
int benchmark(int num_stations, int num_sources, int type, int jones_type, int loc, int use_extended, int use_time_ave, int niter, std::vector<double>& times) { int status = 0; oskar_Timer* timer; timer = oskar_timer_create(loc == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_OMP); // Set up a test sky model, telescope model and Jones matrices. oskar_Telescope* tel = oskar_telescope_create(type, loc, num_stations, &status); oskar_Sky* sky = oskar_sky_create(type, loc, num_sources, &status); oskar_Jones* J = oskar_jones_create(jones_type, loc, num_stations, num_sources, &status); oskar_telescope_set_channel_bandwidth(tel, 1e6); oskar_telescope_set_time_average(tel, (double) use_time_ave); oskar_sky_set_use_extended(sky, use_extended); // Memory for visibility coordinates and output visibility slice. oskar_Mem *vis, *u, *v, *w; vis = oskar_mem_create(jones_type, loc, oskar_telescope_num_baselines(tel), &status); u = oskar_mem_create(type, loc, num_stations, &status); v = oskar_mem_create(type, loc, num_stations, &status); w = oskar_mem_create(type, loc, num_stations, &status); // Run benchmark. times.resize(niter); for (int i = 0; i < niter; ++i) { oskar_timer_start(timer); oskar_cross_correlate(vis, oskar_sky_num_sources(sky), J, sky, tel, u, v, w, 0.0, 100e6, &status); times[i] = oskar_timer_elapsed(timer); } // Free memory. oskar_mem_free(u, &status); oskar_mem_free(v, &status); oskar_mem_free(w, &status); oskar_mem_free(vis, &status); oskar_jones_free(J, &status); oskar_telescope_free(tel, &status); oskar_sky_free(sky, &status); oskar_timer_free(timer); return status; }
static void set_up_device_data(oskar_BeamPattern* h, int* status) { int i, beam_type, max_src, max_size, auto_power, cross_power, raw_data; if (*status) return; /* Get local variables. */ max_src = h->max_chunk_size; max_size = h->num_active_stations * max_src; beam_type = h->prec | OSKAR_COMPLEX; if (h->pol_mode == OSKAR_POL_MODE_FULL) beam_type |= OSKAR_MATRIX; raw_data = h->ixr_txt || h->ixr_fits || h->voltage_raw_txt || h->voltage_amp_txt || h->voltage_phase_txt || h->voltage_amp_fits || h->voltage_phase_fits; auto_power = h->auto_power_fits || h->auto_power_txt; cross_power = h->cross_power_raw_txt || h->cross_power_amp_fits || h->cross_power_phase_fits || h->cross_power_amp_txt || h->cross_power_phase_txt; /* Expand the number of devices to the number of selected GPUs, * if required. */ if (h->num_devices < h->num_gpus) oskar_beam_pattern_set_num_devices(h, h->num_gpus); for (i = 0; i < h->num_devices; ++i) { int dev_loc, i_stokes; DeviceData* d = &h->d[i]; if (*status) break; /* Select the device. */ if (i < h->num_gpus) { oskar_device_set(h->gpu_ids[i], status); dev_loc = OSKAR_GPU; } else { dev_loc = OSKAR_CPU; } /* Device memory. */ d->previous_chunk_index = -1; if (!d->tel) { d->jones_data = oskar_mem_create(beam_type, dev_loc, max_size, status); d->x = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->y = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->z = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->tel = oskar_telescope_create_copy(h->tel, dev_loc, status); d->work = oskar_station_work_create(h->prec, dev_loc, status); } /* Host memory. */ if (!d->jones_data_cpu[0] && raw_data) { d->jones_data_cpu[0] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); d->jones_data_cpu[1] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); } /* Auto-correlation beam output arrays. */ for (i_stokes = 0; i_stokes < 4; ++i_stokes) { if (!h->stokes[i_stokes]) continue; if (!d->auto_power[i_stokes] && auto_power) { /* Device memory. */ d->auto_power[i_stokes] = oskar_mem_create(beam_type, dev_loc, max_size, status); /* Host memory. */ d->auto_power_cpu[i_stokes][0] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); d->auto_power_cpu[i_stokes][1] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_single_axis == 'T') d->auto_power_time_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_single_axis == 'C') d->auto_power_channel_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_time_and_channel) d->auto_power_channel_and_time_avg[i_stokes] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); } /* Cross-correlation beam output arrays. */ if (!d->cross_power[i_stokes] && cross_power) { if (h->num_active_stations < 2) { oskar_log_error(h->log, "Cannot create cross-power beam " "using less than two active stations."); *status = OSKAR_ERR_INVALID_ARGUMENT; break; } /* Device memory. */ d->cross_power[i_stokes] = oskar_mem_create( beam_type, dev_loc, max_src, status); /* Host memory. */ d->cross_power_cpu[i_stokes][0] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); d->cross_power_cpu[i_stokes][1] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_single_axis == 'T') d->cross_power_time_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_single_axis == 'C') d->cross_power_channel_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_time_and_channel) d->cross_power_channel_and_time_avg[i_stokes] = oskar_mem_create(beam_type, OSKAR_CPU, max_src, status); } if (d->auto_power[i_stokes]) oskar_mem_clear_contents(d->auto_power[i_stokes], status); if (d->cross_power[i_stokes]) oskar_mem_clear_contents(d->cross_power[i_stokes], status); } /* Timers. */ if (!d->tmr_compute) d->tmr_compute = oskar_timer_create(OSKAR_TIMER_NATIVE); } }
static void set_up_device_data(oskar_Simulator* h, int* status) { int i, dev_loc, complx, vistype, num_stations, num_src; if (*status) return; /* Get local variables. */ num_stations = oskar_telescope_num_stations(h->tel); num_src = h->max_sources_per_chunk; complx = (h->prec) | OSKAR_COMPLEX; vistype = complx; if (oskar_telescope_pol_mode(h->tel) == OSKAR_POL_MODE_FULL) vistype |= OSKAR_MATRIX; /* Expand the number of devices to the number of selected GPUs, * if required. */ if (h->num_devices < h->num_gpus) oskar_simulator_set_num_devices(h, h->num_gpus); for (i = 0; i < h->num_devices; ++i) { DeviceData* d = &h->d[i]; d->previous_chunk_index = -1; /* Select the device. */ if (i < h->num_gpus) { oskar_device_set(h->gpu_ids[i], status); dev_loc = OSKAR_GPU; } else { dev_loc = OSKAR_CPU; } /* Timers. */ if (!d->tmr_compute) { d->tmr_compute = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_copy = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_clip = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_E = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_K = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_join = oskar_timer_create(OSKAR_TIMER_NATIVE); d->tmr_correlate = oskar_timer_create(OSKAR_TIMER_NATIVE); } /* Visibility blocks. */ if (!d->vis_block) { d->vis_block = oskar_vis_block_create_from_header(dev_loc, h->header, status); d->vis_block_cpu[0] = oskar_vis_block_create_from_header(OSKAR_CPU, h->header, status); d->vis_block_cpu[1] = oskar_vis_block_create_from_header(OSKAR_CPU, h->header, status); } oskar_vis_block_clear(d->vis_block, status); oskar_vis_block_clear(d->vis_block_cpu[0], status); oskar_vis_block_clear(d->vis_block_cpu[1], status); /* Device scratch memory. */ if (!d->tel) { d->u = oskar_mem_create(h->prec, dev_loc, num_stations, status); d->v = oskar_mem_create(h->prec, dev_loc, num_stations, status); d->w = oskar_mem_create(h->prec, dev_loc, num_stations, status); d->chunk = oskar_sky_create(h->prec, dev_loc, num_src, status); d->chunk_clip = oskar_sky_create(h->prec, dev_loc, num_src, status); d->tel = oskar_telescope_create_copy(h->tel, dev_loc, status); d->J = oskar_jones_create(vistype, dev_loc, num_stations, num_src, status); d->R = oskar_type_is_matrix(vistype) ? oskar_jones_create(vistype, dev_loc, num_stations, num_src, status) : 0; d->E = oskar_jones_create(vistype, dev_loc, num_stations, num_src, status); d->K = oskar_jones_create(complx, dev_loc, num_stations, num_src, status); d->Z = 0; d->station_work = oskar_station_work_create(h->prec, dev_loc, status); } } }
TEST(Mem, random_uniform) { int seed = 1; int c1 = 437; int c2 = 0; int c3 = 0xDECAFBAD; int n = 544357; int status = 0; double max_err = 0.0, avg_err = 0.0; oskar_Mem* v_cpu_f = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU, n, &status); oskar_Mem* v_gpu_f = oskar_mem_create(OSKAR_SINGLE, OSKAR_GPU, n, &status); oskar_Mem* v_cpu_d = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, n, &status); oskar_Mem* v_gpu_d = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, n, &status); oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_CUDA); // Run in single precision. oskar_timer_start(tmr); oskar_mem_random_uniform(v_cpu_f, seed, c1, c2, c3, &status); report_time(n, "uniform", "single", "CPU", oskar_timer_elapsed(tmr)); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_timer_start(tmr); oskar_mem_random_uniform(v_gpu_f, seed, c1, c2, c3, &status); report_time(n, "uniform", "single", "GPU", oskar_timer_elapsed(tmr)); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Check consistency between CPU and GPU results. oskar_mem_evaluate_relative_error(v_gpu_f, v_cpu_f, 0, &max_err, &avg_err, 0, &status); EXPECT_LT(max_err, 1e-5); EXPECT_LT(avg_err, 1e-5); // Run in double precision. oskar_timer_start(tmr); oskar_mem_random_uniform(v_cpu_d, seed, c1, c2, c3, &status); report_time(n, "uniform", "double", "CPU", oskar_timer_elapsed(tmr)); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_timer_start(tmr); oskar_mem_random_uniform(v_gpu_d, seed, c1, c2, c3, &status); report_time(n, "uniform", "double", "GPU", oskar_timer_elapsed(tmr)); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Check consistency between CPU and GPU results. oskar_mem_evaluate_relative_error(v_gpu_d, v_cpu_d, 0, &max_err, &avg_err, 0, &status); EXPECT_LT(max_err, 1e-10); EXPECT_LT(avg_err, 1e-10); // Check consistency between single and double precision. oskar_mem_evaluate_relative_error(v_cpu_f, v_cpu_d, 0, &max_err, &avg_err, 0, &status); EXPECT_LT(max_err, 1e-5); EXPECT_LT(avg_err, 1e-5); if (save) { FILE* fhan = fopen("random_uniform.txt", "w"); oskar_mem_save_ascii(fhan, 4, n, &status, v_cpu_f, v_gpu_f, v_cpu_d, v_gpu_d); fclose(fhan); } // Free memory. oskar_mem_free(v_cpu_f, &status); oskar_mem_free(v_gpu_f, &status); oskar_mem_free(v_cpu_d, &status); oskar_mem_free(v_gpu_d, &status); oskar_timer_free(tmr); }
void runTest(int prec1, int prec2, int loc1, int loc2, int matrix, int extended, double time_average) { int num_baselines, status = 0, type; oskar_Mem *vis1, *vis2; oskar_Timer *timer1, *timer2; double time1, time2, frequency = 100e6; // Create the timers. timer1 = oskar_timer_create(loc1 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); timer2 = oskar_timer_create(loc2 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); // Run first part. createTestData(prec1, loc1, matrix); num_baselines = oskar_telescope_num_baselines(tel); type = prec1 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; vis1 = oskar_mem_create(type, loc1, num_baselines, &status); oskar_mem_clear_contents(vis1, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_sky_set_use_extended(sky, extended); oskar_telescope_set_channel_bandwidth(tel, bandwidth); oskar_telescope_set_time_average(tel, time_average); oskar_timer_start(timer1); oskar_cross_correlate(vis1, oskar_sky_num_sources(sky), jones, sky, tel, u_, v_, w_, 1.0, frequency, &status); time1 = oskar_timer_elapsed(timer1); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Run second part. createTestData(prec2, loc2, matrix); num_baselines = oskar_telescope_num_baselines(tel); type = prec2 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; vis2 = oskar_mem_create(type, loc2, num_baselines, &status); oskar_mem_clear_contents(vis2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_sky_set_use_extended(sky, extended); oskar_telescope_set_channel_bandwidth(tel, bandwidth); oskar_telescope_set_time_average(tel, time_average); oskar_timer_start(timer2); oskar_cross_correlate(vis2, oskar_sky_num_sources(sky), jones, sky, tel, u_, v_, w_, 1.0, frequency, &status); time2 = oskar_timer_elapsed(timer2); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Destroy the timers. oskar_timer_free(timer1); oskar_timer_free(timer2); // Compare results. check_values(vis1, vis2); // Free memory. oskar_mem_free(vis1, &status); oskar_mem_free(vis2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Record properties for test. RecordProperty("SourceType", extended ? "Gaussian" : "Point"); RecordProperty("JonesType", matrix ? "Matrix" : "Scalar"); RecordProperty("TimeSmearing", time_average == 0.0 ? "off" : "on"); RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time1_ms", int(time1 * 1000)); RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time2_ms", int(time2 * 1000)); #ifdef ALLOW_PRINTING // Print times. printf(" > %s. %s sources. Time smearing %s.\n", matrix ? "Matrix" : "Scalar", extended ? "Gaussian" : "Point", time_average == 0.0 ? "off" : "on"); printf(" %s precision %s: %.2f ms, %s precision %s: %.2f ms\n", prec1 == OSKAR_SINGLE ? "Single" : "Double", loc1 == OSKAR_CPU ? "CPU" : "GPU", time1 * 1000.0, prec2 == OSKAR_SINGLE ? "Single" : "Double", loc2 == OSKAR_CPU ? "CPU" : "GPU", time2 * 1000.0); #endif }
TEST(prefix_sum, test) { int n = 100000, status = 0, exclusive = 1; oskar_Mem* in_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status); oskar_Mem* out_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status); oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_NATIVE); // Fill input with random integers from 0 to 9. int* t = oskar_mem_int(in_cpu, &status); srand(1556); for (int i = 0; i < n; ++i) t[i] = (int) (10.0 * rand() / ((double) RAND_MAX)); t[0] = 3; // Run on CPU. oskar_timer_start(tmr); oskar_prefix_sum(n, in_cpu, out_cpu, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on CPU took %.3f sec\n", oskar_timer_elapsed(tmr)); #ifdef OSKAR_HAVE_CUDA // Run on GPU with CUDA. oskar_Mem* in_gpu = oskar_mem_create_copy(in_cpu, OSKAR_GPU, &status); oskar_Mem* out_gpu = oskar_mem_create(OSKAR_INT, OSKAR_GPU, n, &status); oskar_timer_start(tmr); oskar_prefix_sum(n, in_gpu, out_gpu, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on GPU took %.3f sec\n", oskar_timer_elapsed(tmr)); // Check consistency between CPU and GPU results. oskar_Mem* out_cmp_gpu = oskar_mem_create_copy(out_gpu, OSKAR_CPU, &status); EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_gpu, n, &status)); #endif #ifdef OSKAR_HAVE_OPENCL // Run on OpenCL. oskar_Mem* in_cl = oskar_mem_create_copy(in_cpu, OSKAR_CL, &status); oskar_Mem* out_cl = oskar_mem_create(OSKAR_INT, OSKAR_CL, n, &status); oskar_timer_start(tmr); printf("Using %s\n", oskar_cl_device_name()); oskar_prefix_sum(n, in_cl, out_cl, 0, exclusive, &status); EXPECT_EQ(0, status); printf("Prefix sum on OpenCL took %.3f sec\n", oskar_timer_elapsed(tmr)); // Check consistency between CPU and OpenCL results. oskar_Mem* out_cmp_cl = oskar_mem_create_copy(out_cl, OSKAR_CPU, &status); EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_cl, n, &status)); #endif if (save) { size_t num_mem = 1; FILE* fhan = fopen("prefix_sum_test.txt", "w"); #ifdef OSKAR_HAVE_CUDA num_mem += 1; #endif #ifdef OSKAR_HAVE_OPENCL num_mem += 1; #endif oskar_mem_save_ascii(fhan, num_mem, n, &status, out_cpu #ifdef OSKAR_HAVE_CUDA , out_cmp_gpu #endif #ifdef OSKAR_HAVE_OPENCL , out_cmp_cl #endif ); fclose(fhan); } // Clean up. oskar_timer_free(tmr); oskar_mem_free(in_cpu, &status); oskar_mem_free(out_cpu, &status); #ifdef OSKAR_HAVE_CUDA oskar_mem_free(in_gpu, &status); oskar_mem_free(out_gpu, &status); oskar_mem_free(out_cmp_gpu, &status); #endif #ifdef OSKAR_HAVE_OPENCL oskar_mem_free(in_cl, &status); oskar_mem_free(out_cl, &status); oskar_mem_free(out_cmp_cl, &status); #endif }
void runTest(int prec1, int prec2, int loc1, int loc2, int matrix) { int status = 0, type; oskar_Mem *beam1, *beam2; oskar_Timer *timer1, *timer2; double time1, time2; // Create the timers. timer1 = oskar_timer_create(loc1 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); timer2 = oskar_timer_create(loc2 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); // Run first part. type = prec1 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; beam1 = oskar_mem_create(type, loc1, num_sources, &status); oskar_mem_clear_contents(beam1, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); createTestData(prec1, loc1, matrix); oskar_timer_start(timer1); oskar_evaluate_cross_power(num_sources, num_stations, jones, 0, beam1, &status); time1 = oskar_timer_elapsed(timer1); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Run second part. type = prec2 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; beam2 = oskar_mem_create(type, loc2, num_sources, &status); oskar_mem_clear_contents(beam2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); createTestData(prec2, loc2, matrix); oskar_timer_start(timer2); oskar_evaluate_cross_power(num_sources, num_stations, jones, 0, beam2, &status); time2 = oskar_timer_elapsed(timer2); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Destroy the timers. oskar_timer_free(timer1); oskar_timer_free(timer2); // Compare results. check_values(beam1, beam2); // Free memory. oskar_mem_free(beam1, &status); oskar_mem_free(beam2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Record properties for test. RecordProperty("JonesType", matrix ? "Matrix" : "Scalar"); RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time1_ms", int(time1 * 1000)); RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time2_ms", int(time2 * 1000)); #ifdef ALLOW_PRINTING // Print times. printf(" > %s.\n", matrix ? "Matrix" : "Scalar"); printf(" %s precision %s: %.2f ms, %s precision %s: %.2f ms\n", prec1 == OSKAR_SINGLE ? "Single" : "Double", loc1 == OSKAR_CPU ? "CPU" : "GPU", time1 * 1000.0, prec2 == OSKAR_SINGLE ? "Single" : "Double", loc2 == OSKAR_CPU ? "CPU" : "GPU", time2 * 1000.0); #endif }
int benchmark(int num_elements, int num_directions, OpType op_type, int loc, int precision, bool evaluate_2d, int niter, double& time_taken) { int status = 0; // Create the timer. oskar_Timer *tmr = oskar_timer_create(OSKAR_TIMER_CUDA); oskar_Station* station = oskar_station_create(precision, loc, num_elements, &status); if (status) return status; station->array_is_3d = (evaluate_2d) ? OSKAR_FALSE : OSKAR_TRUE; oskar_Mem *x, *y, *z, *weights = 0, *beam = 0, *signal = 0; x = oskar_mem_create(precision, loc, num_directions, &status); y = oskar_mem_create(precision, loc, num_directions, &status); z = oskar_mem_create(precision, loc, num_directions, &status); if (status) return status; if (op_type == O2C) { int type = precision | OSKAR_COMPLEX; beam = oskar_mem_create(type, loc, num_directions, &status); weights = oskar_mem_create(type, loc, num_elements, &status); if (status) return status; oskar_timer_start(tmr); for (int i = 0; i < niter; ++i) { oskar_evaluate_array_pattern(beam, 2.0 * M_PI, station, num_directions, x, y, z, weights, &status); } time_taken = oskar_timer_elapsed(tmr); } else if (op_type == C2C || op_type == M2M) { int type = precision | OSKAR_COMPLEX; int num_signals = num_directions * num_elements; weights = oskar_mem_create(type, loc, num_elements, &status); if (op_type == C2C) { beam = oskar_mem_create(type, loc, num_directions, &status); signal = oskar_mem_create(type, loc, num_signals, &status); } else { type |= OSKAR_MATRIX; beam = oskar_mem_create(type, loc, num_directions, &status); signal = oskar_mem_create(type, loc, num_signals, &status); } if (status) return status; oskar_timer_start(tmr); for (int i = 0; i < niter; ++i) { oskar_evaluate_array_pattern_hierarchical(beam, 2.0 * M_PI, station, num_directions, x, y, z, signal, weights, &status); } time_taken = oskar_timer_elapsed(tmr); } // Destroy the timer. oskar_timer_free(tmr); // Free memory. oskar_station_free(station, &status); oskar_mem_free(x, &status); oskar_mem_free(y, &status); oskar_mem_free(z, &status); oskar_mem_free(weights, &status); oskar_mem_free(beam, &status); oskar_mem_free(signal, &status); return status; }