void oskar_vis_block_clear(oskar_VisBlock* vis, int* status) { /* Check if safe to proceed. */ if (*status) return; oskar_mem_clear_contents(vis->auto_correlations, status); oskar_mem_clear_contents(vis->cross_correlations, status); oskar_mem_clear_contents(vis->baseline_uu_metres, status); oskar_mem_clear_contents(vis->baseline_vv_metres, status); oskar_mem_clear_contents(vis->baseline_ww_metres, status); }
static void set_up_device_data(oskar_BeamPattern* h, int* status) { int i, beam_type, max_src, max_size, auto_power, cross_power, raw_data; if (*status) return; /* Get local variables. */ max_src = h->max_chunk_size; max_size = h->num_active_stations * max_src; beam_type = h->prec | OSKAR_COMPLEX; if (h->pol_mode == OSKAR_POL_MODE_FULL) beam_type |= OSKAR_MATRIX; raw_data = h->ixr_txt || h->ixr_fits || h->voltage_raw_txt || h->voltage_amp_txt || h->voltage_phase_txt || h->voltage_amp_fits || h->voltage_phase_fits; auto_power = h->auto_power_fits || h->auto_power_txt; cross_power = h->cross_power_raw_txt || h->cross_power_amp_fits || h->cross_power_phase_fits || h->cross_power_amp_txt || h->cross_power_phase_txt; /* Expand the number of devices to the number of selected GPUs, * if required. */ if (h->num_devices < h->num_gpus) oskar_beam_pattern_set_num_devices(h, h->num_gpus); for (i = 0; i < h->num_devices; ++i) { int dev_loc, i_stokes; DeviceData* d = &h->d[i]; if (*status) break; /* Select the device. */ if (i < h->num_gpus) { oskar_device_set(h->gpu_ids[i], status); dev_loc = OSKAR_GPU; } else { dev_loc = OSKAR_CPU; } /* Device memory. */ d->previous_chunk_index = -1; if (!d->tel) { d->jones_data = oskar_mem_create(beam_type, dev_loc, max_size, status); d->x = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->y = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->z = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status); d->tel = oskar_telescope_create_copy(h->tel, dev_loc, status); d->work = oskar_station_work_create(h->prec, dev_loc, status); } /* Host memory. */ if (!d->jones_data_cpu[0] && raw_data) { d->jones_data_cpu[0] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); d->jones_data_cpu[1] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); } /* Auto-correlation beam output arrays. */ for (i_stokes = 0; i_stokes < 4; ++i_stokes) { if (!h->stokes[i_stokes]) continue; if (!d->auto_power[i_stokes] && auto_power) { /* Device memory. */ d->auto_power[i_stokes] = oskar_mem_create(beam_type, dev_loc, max_size, status); /* Host memory. */ d->auto_power_cpu[i_stokes][0] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); d->auto_power_cpu[i_stokes][1] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_single_axis == 'T') d->auto_power_time_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_single_axis == 'C') d->auto_power_channel_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_size, status); if (h->average_time_and_channel) d->auto_power_channel_and_time_avg[i_stokes] = oskar_mem_create(beam_type, OSKAR_CPU, max_size, status); } /* Cross-correlation beam output arrays. */ if (!d->cross_power[i_stokes] && cross_power) { if (h->num_active_stations < 2) { oskar_log_error(h->log, "Cannot create cross-power beam " "using less than two active stations."); *status = OSKAR_ERR_INVALID_ARGUMENT; break; } /* Device memory. */ d->cross_power[i_stokes] = oskar_mem_create( beam_type, dev_loc, max_src, status); /* Host memory. */ d->cross_power_cpu[i_stokes][0] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); d->cross_power_cpu[i_stokes][1] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_single_axis == 'T') d->cross_power_time_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_single_axis == 'C') d->cross_power_channel_avg[i_stokes] = oskar_mem_create( beam_type, OSKAR_CPU, max_src, status); if (h->average_time_and_channel) d->cross_power_channel_and_time_avg[i_stokes] = oskar_mem_create(beam_type, OSKAR_CPU, max_src, status); } if (d->auto_power[i_stokes]) oskar_mem_clear_contents(d->auto_power[i_stokes], status); if (d->cross_power[i_stokes]) oskar_mem_clear_contents(d->cross_power[i_stokes], status); } /* Timers. */ if (!d->tmr_compute) d->tmr_compute = oskar_timer_create(OSKAR_TIMER_NATIVE); } }
void runTest(int prec1, int prec2, int loc1, int loc2, int matrix, int extended, double time_average) { int num_baselines, status = 0, type; oskar_Mem *vis1, *vis2; oskar_Timer *timer1, *timer2; double time1, time2, frequency = 100e6; // Create the timers. timer1 = oskar_timer_create(loc1 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); timer2 = oskar_timer_create(loc2 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); // Run first part. createTestData(prec1, loc1, matrix); num_baselines = oskar_telescope_num_baselines(tel); type = prec1 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; vis1 = oskar_mem_create(type, loc1, num_baselines, &status); oskar_mem_clear_contents(vis1, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_sky_set_use_extended(sky, extended); oskar_telescope_set_channel_bandwidth(tel, bandwidth); oskar_telescope_set_time_average(tel, time_average); oskar_timer_start(timer1); oskar_cross_correlate(vis1, oskar_sky_num_sources(sky), jones, sky, tel, u_, v_, w_, 1.0, frequency, &status); time1 = oskar_timer_elapsed(timer1); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Run second part. createTestData(prec2, loc2, matrix); num_baselines = oskar_telescope_num_baselines(tel); type = prec2 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; vis2 = oskar_mem_create(type, loc2, num_baselines, &status); oskar_mem_clear_contents(vis2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); oskar_sky_set_use_extended(sky, extended); oskar_telescope_set_channel_bandwidth(tel, bandwidth); oskar_telescope_set_time_average(tel, time_average); oskar_timer_start(timer2); oskar_cross_correlate(vis2, oskar_sky_num_sources(sky), jones, sky, tel, u_, v_, w_, 1.0, frequency, &status); time2 = oskar_timer_elapsed(timer2); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Destroy the timers. oskar_timer_free(timer1); oskar_timer_free(timer2); // Compare results. check_values(vis1, vis2); // Free memory. oskar_mem_free(vis1, &status); oskar_mem_free(vis2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Record properties for test. RecordProperty("SourceType", extended ? "Gaussian" : "Point"); RecordProperty("JonesType", matrix ? "Matrix" : "Scalar"); RecordProperty("TimeSmearing", time_average == 0.0 ? "off" : "on"); RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time1_ms", int(time1 * 1000)); RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time2_ms", int(time2 * 1000)); #ifdef ALLOW_PRINTING // Print times. printf(" > %s. %s sources. Time smearing %s.\n", matrix ? "Matrix" : "Scalar", extended ? "Gaussian" : "Point", time_average == 0.0 ? "off" : "on"); printf(" %s precision %s: %.2f ms, %s precision %s: %.2f ms\n", prec1 == OSKAR_SINGLE ? "Single" : "Double", loc1 == OSKAR_CPU ? "CPU" : "GPU", time1 * 1000.0, prec2 == OSKAR_SINGLE ? "Single" : "Double", loc2 == OSKAR_CPU ? "CPU" : "GPU", time2 * 1000.0); #endif }
void runTest(int prec1, int prec2, int loc1, int loc2, int matrix) { int status = 0, type; oskar_Mem *beam1, *beam2; oskar_Timer *timer1, *timer2; double time1, time2; // Create the timers. timer1 = oskar_timer_create(loc1 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); timer2 = oskar_timer_create(loc2 == OSKAR_GPU ? OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE); // Run first part. type = prec1 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; beam1 = oskar_mem_create(type, loc1, num_sources, &status); oskar_mem_clear_contents(beam1, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); createTestData(prec1, loc1, matrix); oskar_timer_start(timer1); oskar_evaluate_cross_power(num_sources, num_stations, jones, 0, beam1, &status); time1 = oskar_timer_elapsed(timer1); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Run second part. type = prec2 | OSKAR_COMPLEX; if (matrix) type |= OSKAR_MATRIX; beam2 = oskar_mem_create(type, loc2, num_sources, &status); oskar_mem_clear_contents(beam2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); createTestData(prec2, loc2, matrix); oskar_timer_start(timer2); oskar_evaluate_cross_power(num_sources, num_stations, jones, 0, beam2, &status); time2 = oskar_timer_elapsed(timer2); destroyTestData(); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Destroy the timers. oskar_timer_free(timer1); oskar_timer_free(timer2); // Compare results. check_values(beam1, beam2); // Free memory. oskar_mem_free(beam1, &status); oskar_mem_free(beam2, &status); ASSERT_EQ(0, status) << oskar_get_error_string(status); // Record properties for test. RecordProperty("JonesType", matrix ? "Matrix" : "Scalar"); RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time1_ms", int(time1 * 1000)); RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double"); RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU"); RecordProperty("Time2_ms", int(time2 * 1000)); #ifdef ALLOW_PRINTING // Print times. printf(" > %s.\n", matrix ? "Matrix" : "Scalar"); printf(" %s precision %s: %.2f ms, %s precision %s: %.2f ms\n", prec1 == OSKAR_SINGLE ? "Single" : "Double", loc1 == OSKAR_CPU ? "CPU" : "GPU", time1 * 1000.0, prec2 == OSKAR_SINGLE ? "Single" : "Double", loc2 == OSKAR_CPU ? "CPU" : "GPU", time2 * 1000.0); #endif }