Пример #1
0
oskar_Imager* oskar_imager_create(int imager_precision, int* status)
{
    oskar_Imager* h = 0;
    h = (oskar_Imager*) calloc(1, sizeof(oskar_Imager));

    /* Create timers. */
    h->tmr_grid_finalise = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->tmr_grid_update = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->tmr_init = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->tmr_read = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->tmr_write = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->mutex = oskar_mutex_create();

    /* Create scratch arrays. */
    h->imager_prec = imager_precision;
    h->uu_im       = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->vv_im       = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->ww_im       = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->uu_tmp      = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->vv_tmp      = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->ww_tmp      = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->vis_im      = oskar_mem_create(imager_precision | OSKAR_COMPLEX,
            OSKAR_CPU, 0, status);
    h->weight_im   = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->weight_tmp  = oskar_mem_create(imager_precision, OSKAR_CPU, 0, status);
    h->time_im     = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, 0, status);

    /* Check data type. */
    if (imager_precision != OSKAR_SINGLE && imager_precision != OSKAR_DOUBLE)
    {
        *status = OSKAR_ERR_BAD_DATA_TYPE;
        return h;
    }

    /* Get number of devices available, and device location. */
    oskar_device_set_require_double_precision(imager_precision == OSKAR_DOUBLE);
    h->num_gpus_avail = oskar_device_count(0, &h->dev_loc);

    /* Set sensible defaults. */
    oskar_imager_set_gpus(h, -1, 0, status);
    oskar_imager_set_num_devices(h, -1);
    oskar_imager_set_algorithm(h, "FFT", status);
    oskar_imager_set_image_type(h, "I", status);
    oskar_imager_set_weighting(h, "Natural", status);
    oskar_imager_set_ms_column(h, "DATA", status);
    oskar_imager_set_default_direction(h);
    oskar_imager_set_generate_w_kernels_on_gpu(h, 1);
    oskar_imager_set_fov(h, 1.0);
    oskar_imager_set_size(h, 256, status);
    oskar_imager_set_uv_filter_max(h, DBL_MAX);
    return h;
}
int benchmark(int num_elements, int num_directions, OpType op_type,
        int loc, int precision, bool evaluate_2d, int niter, double& time_taken)
{
    int status = 0;
    int type = precision | OSKAR_COMPLEX;
    oskar_Mem *beam = 0, *signal = 0, *z = 0, *z_i = 0;
    oskar_Mem *x = oskar_mem_create(precision, loc, num_directions, &status);
    oskar_Mem *y = oskar_mem_create(precision, loc, num_directions, &status);
    oskar_Mem *x_i = oskar_mem_create(precision, loc, num_elements, &status);
    oskar_Mem *y_i = oskar_mem_create(precision, loc, num_elements, &status);
    oskar_Mem *weights = oskar_mem_create(type, loc, num_elements, &status);
    if (!evaluate_2d)
    {
        z = oskar_mem_create(precision, loc, num_directions, &status);
        z_i = oskar_mem_create(precision, loc, num_elements, &status);
    }
    if (op_type == O2C)
        beam = oskar_mem_create(type, loc, num_directions, &status);
    else if (op_type == C2C || op_type == M2M)
    {
        int num_signals = num_directions * num_elements;
        if (op_type == C2C)
        {
            beam = oskar_mem_create(type, loc, num_directions, &status);
            signal = oskar_mem_create(type, loc, num_signals, &status);
        }
        else
        {
            type |= OSKAR_MATRIX;
            beam = oskar_mem_create(type, loc, num_directions, &status);
            signal = oskar_mem_create(type, loc, num_signals, &status);
        }
    }

    oskar_Timer *tmr = oskar_timer_create(OSKAR_TIMER_NATIVE);
    if (!status)
    {
        oskar_timer_start(tmr);
        for (int i = 0; i < niter; ++i)
        {
            oskar_dftw(num_elements, 2.0 * M_PI, x_i, y_i, z_i, weights,
                    num_directions, x, y, z, signal, beam, &status);
        }
        time_taken = oskar_timer_elapsed(tmr);
    }

    // Free memory.
    oskar_timer_free(tmr);
    oskar_mem_free(x, &status);
    oskar_mem_free(y, &status);
    oskar_mem_free(z, &status);
    oskar_mem_free(x_i, &status);
    oskar_mem_free(y_i, &status);
    oskar_mem_free(z_i, &status);
    oskar_mem_free(weights, &status);
    oskar_mem_free(beam, &status);
    oskar_mem_free(signal, &status);

    return status;
}
Пример #3
0
oskar_Simulator* oskar_simulator_create(int precision, int* status)
{
    oskar_Simulator* h = 0;
    h = (oskar_Simulator*) calloc(1, sizeof(oskar_Simulator));
    h->prec      = precision;
    h->tmr_sim   = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->tmr_write = oskar_timer_create(OSKAR_TIMER_NATIVE);
    h->temp      = oskar_mem_create(precision, OSKAR_CPU, 0, status);
    h->mutex     = oskar_mutex_create();

    /* Set sensible defaults. */
    h->max_sources_per_chunk = 16384;
    oskar_simulator_set_gpus(h, -1, 0, status);
    oskar_simulator_set_num_devices(h, -1);
    oskar_simulator_set_correlation_type(h, "Cross-correlations", status);
    oskar_simulator_set_horizon_clip(h, 1);
    oskar_simulator_set_source_flux_range(h, 0.0, DBL_MAX);
    oskar_simulator_set_max_times_per_block(h, 10);
    return h;
}
Пример #4
0
int benchmark(int num_stations, int num_sources, int type,
        int jones_type, int loc, int use_extended, int use_time_ave, int niter,
        std::vector<double>& times)
{
    int status = 0;

    oskar_Timer* timer;
    timer = oskar_timer_create(loc == OSKAR_GPU ?
            OSKAR_TIMER_CUDA : OSKAR_TIMER_OMP);

    // Set up a test sky model, telescope model and Jones matrices.
    oskar_Telescope* tel = oskar_telescope_create(type, loc,
            num_stations, &status);
    oskar_Sky* sky = oskar_sky_create(type, loc, num_sources, &status);
    oskar_Jones* J = oskar_jones_create(jones_type, loc, num_stations,
            num_sources, &status);

    oskar_telescope_set_channel_bandwidth(tel, 1e6);
    oskar_telescope_set_time_average(tel, (double) use_time_ave);
    oskar_sky_set_use_extended(sky, use_extended);

    // Memory for visibility coordinates and output visibility slice.
    oskar_Mem *vis, *u, *v, *w;
    vis = oskar_mem_create(jones_type, loc, oskar_telescope_num_baselines(tel),
            &status);
    u = oskar_mem_create(type, loc, num_stations, &status);
    v = oskar_mem_create(type, loc, num_stations, &status);
    w = oskar_mem_create(type, loc, num_stations, &status);

    // Run benchmark.
    times.resize(niter);
    for (int i = 0; i < niter; ++i)
    {
        oskar_timer_start(timer);
        oskar_cross_correlate(vis, oskar_sky_num_sources(sky), J, sky, tel, u, v, w,
                0.0, 100e6, &status);
        times[i] = oskar_timer_elapsed(timer);
    }

    // Free memory.
    oskar_mem_free(u, &status);
    oskar_mem_free(v, &status);
    oskar_mem_free(w, &status);
    oskar_mem_free(vis, &status);
    oskar_jones_free(J, &status);
    oskar_telescope_free(tel, &status);
    oskar_sky_free(sky, &status);
    oskar_timer_free(timer);
    return status;
}
static void set_up_device_data(oskar_BeamPattern* h, int* status)
{
    int i, beam_type, max_src, max_size, auto_power, cross_power, raw_data;
    if (*status) return;

    /* Get local variables. */
    max_src = h->max_chunk_size;
    max_size = h->num_active_stations * max_src;
    beam_type = h->prec | OSKAR_COMPLEX;
    if (h->pol_mode == OSKAR_POL_MODE_FULL)
        beam_type |= OSKAR_MATRIX;
    raw_data = h->ixr_txt || h->ixr_fits ||
            h->voltage_raw_txt || h->voltage_amp_txt || h->voltage_phase_txt ||
            h->voltage_amp_fits || h->voltage_phase_fits;
    auto_power = h->auto_power_fits || h->auto_power_txt;
    cross_power = h->cross_power_raw_txt ||
            h->cross_power_amp_fits || h->cross_power_phase_fits ||
            h->cross_power_amp_txt || h->cross_power_phase_txt;

    /* Expand the number of devices to the number of selected GPUs,
     * if required. */
    if (h->num_devices < h->num_gpus)
        oskar_beam_pattern_set_num_devices(h, h->num_gpus);

    for (i = 0; i < h->num_devices; ++i)
    {
        int dev_loc, i_stokes;
        DeviceData* d = &h->d[i];
        if (*status) break;

        /* Select the device. */
        if (i < h->num_gpus)
        {
            oskar_device_set(h->gpu_ids[i], status);
            dev_loc = OSKAR_GPU;
        }
        else
        {
            dev_loc = OSKAR_CPU;
        }

        /* Device memory. */
        d->previous_chunk_index = -1;
        if (!d->tel)
        {
            d->jones_data = oskar_mem_create(beam_type, dev_loc, max_size,
                    status);
            d->x    = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status);
            d->y    = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status);
            d->z    = oskar_mem_create(h->prec, dev_loc, 1 + max_src, status);
            d->tel  = oskar_telescope_create_copy(h->tel, dev_loc, status);
            d->work = oskar_station_work_create(h->prec, dev_loc, status);
        }

        /* Host memory. */
        if (!d->jones_data_cpu[0] && raw_data)
        {
            d->jones_data_cpu[0] = oskar_mem_create(beam_type, OSKAR_CPU,
                    max_size, status);
            d->jones_data_cpu[1] = oskar_mem_create(beam_type, OSKAR_CPU,
                    max_size, status);
        }

        /* Auto-correlation beam output arrays. */
        for (i_stokes = 0; i_stokes < 4; ++i_stokes)
        {
            if (!h->stokes[i_stokes]) continue;

            if (!d->auto_power[i_stokes] && auto_power)
            {
                /* Device memory. */
                d->auto_power[i_stokes] = oskar_mem_create(beam_type, dev_loc,
                        max_size, status);

                /* Host memory. */
                d->auto_power_cpu[i_stokes][0] = oskar_mem_create(
                        beam_type, OSKAR_CPU, max_size, status);
                d->auto_power_cpu[i_stokes][1] = oskar_mem_create(
                        beam_type, OSKAR_CPU, max_size, status);
                if (h->average_single_axis == 'T')
                    d->auto_power_time_avg[i_stokes] = oskar_mem_create(
                            beam_type, OSKAR_CPU, max_size, status);
                if (h->average_single_axis == 'C')
                    d->auto_power_channel_avg[i_stokes] = oskar_mem_create(
                            beam_type, OSKAR_CPU, max_size, status);
                if (h->average_time_and_channel)
                    d->auto_power_channel_and_time_avg[i_stokes] =
                            oskar_mem_create(beam_type, OSKAR_CPU,
                                    max_size, status);
            }

            /* Cross-correlation beam output arrays. */
            if (!d->cross_power[i_stokes] && cross_power)
            {
                if (h->num_active_stations < 2)
                {
                    oskar_log_error(h->log, "Cannot create cross-power beam "
                            "using less than two active stations.");
                    *status = OSKAR_ERR_INVALID_ARGUMENT;
                    break;
                }

                /* Device memory. */
                d->cross_power[i_stokes] = oskar_mem_create(
                        beam_type, dev_loc, max_src, status);

                /* Host memory. */
                d->cross_power_cpu[i_stokes][0] = oskar_mem_create(

                        beam_type, OSKAR_CPU, max_src, status);
                d->cross_power_cpu[i_stokes][1] = oskar_mem_create(
                        beam_type, OSKAR_CPU, max_src, status);
                if (h->average_single_axis == 'T')
                    d->cross_power_time_avg[i_stokes] = oskar_mem_create(
                            beam_type, OSKAR_CPU, max_src, status);
                if (h->average_single_axis == 'C')
                    d->cross_power_channel_avg[i_stokes] = oskar_mem_create(
                            beam_type, OSKAR_CPU, max_src, status);
                if (h->average_time_and_channel)
                    d->cross_power_channel_and_time_avg[i_stokes] =
                            oskar_mem_create(beam_type, OSKAR_CPU,
                                    max_src, status);
            }
            if (d->auto_power[i_stokes])
                oskar_mem_clear_contents(d->auto_power[i_stokes], status);
            if (d->cross_power[i_stokes])
                oskar_mem_clear_contents(d->cross_power[i_stokes], status);
        }

        /* Timers. */
        if (!d->tmr_compute)
            d->tmr_compute = oskar_timer_create(OSKAR_TIMER_NATIVE);
    }
}
Пример #6
0
static void set_up_device_data(oskar_Simulator* h, int* status)
{
    int i, dev_loc, complx, vistype, num_stations, num_src;
    if (*status) return;

    /* Get local variables. */
    num_stations = oskar_telescope_num_stations(h->tel);
    num_src      = h->max_sources_per_chunk;
    complx       = (h->prec) | OSKAR_COMPLEX;
    vistype      = complx;
    if (oskar_telescope_pol_mode(h->tel) == OSKAR_POL_MODE_FULL)
        vistype |= OSKAR_MATRIX;

    /* Expand the number of devices to the number of selected GPUs,
     * if required. */
    if (h->num_devices < h->num_gpus)
        oskar_simulator_set_num_devices(h, h->num_gpus);

    for (i = 0; i < h->num_devices; ++i)
    {
        DeviceData* d = &h->d[i];
        d->previous_chunk_index = -1;

        /* Select the device. */
        if (i < h->num_gpus)
        {
            oskar_device_set(h->gpu_ids[i], status);
            dev_loc = OSKAR_GPU;
        }
        else
        {
            dev_loc = OSKAR_CPU;
        }

        /* Timers. */
        if (!d->tmr_compute)
        {
            d->tmr_compute   = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_copy      = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_clip      = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_E         = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_K         = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_join      = oskar_timer_create(OSKAR_TIMER_NATIVE);
            d->tmr_correlate = oskar_timer_create(OSKAR_TIMER_NATIVE);
        }

        /* Visibility blocks. */
        if (!d->vis_block)
        {
            d->vis_block = oskar_vis_block_create_from_header(dev_loc,
                    h->header, status);
            d->vis_block_cpu[0] = oskar_vis_block_create_from_header(OSKAR_CPU,
                    h->header, status);
            d->vis_block_cpu[1] = oskar_vis_block_create_from_header(OSKAR_CPU,
                    h->header, status);
        }
        oskar_vis_block_clear(d->vis_block, status);
        oskar_vis_block_clear(d->vis_block_cpu[0], status);
        oskar_vis_block_clear(d->vis_block_cpu[1], status);

        /* Device scratch memory. */
        if (!d->tel)
        {
            d->u = oskar_mem_create(h->prec, dev_loc, num_stations, status);
            d->v = oskar_mem_create(h->prec, dev_loc, num_stations, status);
            d->w = oskar_mem_create(h->prec, dev_loc, num_stations, status);
            d->chunk = oskar_sky_create(h->prec, dev_loc, num_src, status);
            d->chunk_clip = oskar_sky_create(h->prec, dev_loc, num_src, status);
            d->tel = oskar_telescope_create_copy(h->tel, dev_loc, status);
            d->J = oskar_jones_create(vistype, dev_loc, num_stations, num_src,
                    status);
            d->R = oskar_type_is_matrix(vistype) ? oskar_jones_create(vistype,
                    dev_loc, num_stations, num_src, status) : 0;
            d->E = oskar_jones_create(vistype, dev_loc, num_stations, num_src,
                    status);
            d->K = oskar_jones_create(complx, dev_loc, num_stations, num_src,
                    status);
            d->Z = 0;
            d->station_work = oskar_station_work_create(h->prec, dev_loc,
                    status);
        }
    }
}
Пример #7
0
TEST(Mem, random_uniform)
{
    int seed = 1;
    int c1 = 437;
    int c2 = 0;
    int c3 = 0xDECAFBAD;
    int n = 544357;
    int status = 0;
    double max_err = 0.0, avg_err = 0.0;
    oskar_Mem* v_cpu_f = oskar_mem_create(OSKAR_SINGLE, OSKAR_CPU, n, &status);
    oskar_Mem* v_gpu_f = oskar_mem_create(OSKAR_SINGLE, OSKAR_GPU, n, &status);
    oskar_Mem* v_cpu_d = oskar_mem_create(OSKAR_DOUBLE, OSKAR_CPU, n, &status);
    oskar_Mem* v_gpu_d = oskar_mem_create(OSKAR_DOUBLE, OSKAR_GPU, n, &status);
    oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_CUDA);

    // Run in single precision.
    oskar_timer_start(tmr);
    oskar_mem_random_uniform(v_cpu_f, seed, c1, c2, c3, &status);
    report_time(n, "uniform", "single", "CPU", oskar_timer_elapsed(tmr));
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    oskar_timer_start(tmr);
    oskar_mem_random_uniform(v_gpu_f, seed, c1, c2, c3, &status);
    report_time(n, "uniform", "single", "GPU", oskar_timer_elapsed(tmr));
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Check consistency between CPU and GPU results.
    oskar_mem_evaluate_relative_error(v_gpu_f, v_cpu_f, 0,
            &max_err, &avg_err, 0, &status);
    EXPECT_LT(max_err, 1e-5);
    EXPECT_LT(avg_err, 1e-5);

    // Run in double precision.
    oskar_timer_start(tmr);
    oskar_mem_random_uniform(v_cpu_d, seed, c1, c2, c3, &status);
    report_time(n, "uniform", "double", "CPU", oskar_timer_elapsed(tmr));
    ASSERT_EQ(0, status) << oskar_get_error_string(status);
    oskar_timer_start(tmr);
    oskar_mem_random_uniform(v_gpu_d, seed, c1, c2, c3, &status);
    report_time(n, "uniform", "double", "GPU", oskar_timer_elapsed(tmr));
    ASSERT_EQ(0, status) << oskar_get_error_string(status);

    // Check consistency between CPU and GPU results.
    oskar_mem_evaluate_relative_error(v_gpu_d, v_cpu_d, 0,
            &max_err, &avg_err, 0, &status);
    EXPECT_LT(max_err, 1e-10);
    EXPECT_LT(avg_err, 1e-10);

    // Check consistency between single and double precision.
    oskar_mem_evaluate_relative_error(v_cpu_f, v_cpu_d, 0,
            &max_err, &avg_err, 0, &status);
    EXPECT_LT(max_err, 1e-5);
    EXPECT_LT(avg_err, 1e-5);

    if (save)
    {
        FILE* fhan = fopen("random_uniform.txt", "w");
        oskar_mem_save_ascii(fhan, 4, n, &status,
                v_cpu_f, v_gpu_f, v_cpu_d, v_gpu_d);
        fclose(fhan);
    }

    // Free memory.
    oskar_mem_free(v_cpu_f, &status);
    oskar_mem_free(v_gpu_f, &status);
    oskar_mem_free(v_cpu_d, &status);
    oskar_mem_free(v_gpu_d, &status);
    oskar_timer_free(tmr);
}
Пример #8
0
    void runTest(int prec1, int prec2, int loc1, int loc2, int matrix,
            int extended, double time_average)
    {
        int num_baselines, status = 0, type;
        oskar_Mem *vis1, *vis2;
        oskar_Timer *timer1, *timer2;
        double time1, time2, frequency = 100e6;

        // Create the timers.
        timer1 = oskar_timer_create(loc1 == OSKAR_GPU ?
                OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE);
        timer2 = oskar_timer_create(loc2 == OSKAR_GPU ?
                OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE);

        // Run first part.
        createTestData(prec1, loc1, matrix);
        num_baselines = oskar_telescope_num_baselines(tel);
        type = prec1 | OSKAR_COMPLEX;
        if (matrix) type |= OSKAR_MATRIX;
        vis1 = oskar_mem_create(type, loc1, num_baselines, &status);
        oskar_mem_clear_contents(vis1, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_sky_set_use_extended(sky, extended);
        oskar_telescope_set_channel_bandwidth(tel, bandwidth);
        oskar_telescope_set_time_average(tel, time_average);
        oskar_timer_start(timer1);
        oskar_cross_correlate(vis1, oskar_sky_num_sources(sky), jones, sky,
                tel, u_, v_, w_, 1.0, frequency, &status);
        time1 = oskar_timer_elapsed(timer1);
        destroyTestData();
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Run second part.
        createTestData(prec2, loc2, matrix);
        num_baselines = oskar_telescope_num_baselines(tel);
        type = prec2 | OSKAR_COMPLEX;
        if (matrix) type |= OSKAR_MATRIX;
        vis2 = oskar_mem_create(type, loc2, num_baselines, &status);
        oskar_mem_clear_contents(vis2, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        oskar_sky_set_use_extended(sky, extended);
        oskar_telescope_set_channel_bandwidth(tel, bandwidth);
        oskar_telescope_set_time_average(tel, time_average);
        oskar_timer_start(timer2);
        oskar_cross_correlate(vis2, oskar_sky_num_sources(sky), jones, sky,
                tel, u_, v_, w_, 1.0, frequency, &status);
        time2 = oskar_timer_elapsed(timer2);
        destroyTestData();
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Destroy the timers.
        oskar_timer_free(timer1);
        oskar_timer_free(timer2);

        // Compare results.
        check_values(vis1, vis2);

        // Free memory.
        oskar_mem_free(vis1, &status);
        oskar_mem_free(vis2, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Record properties for test.
        RecordProperty("SourceType", extended ? "Gaussian" : "Point");
        RecordProperty("JonesType", matrix ? "Matrix" : "Scalar");
        RecordProperty("TimeSmearing", time_average == 0.0 ? "off" : "on");
        RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double");
        RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU");
        RecordProperty("Time1_ms", int(time1 * 1000));
        RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double");
        RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU");
        RecordProperty("Time2_ms", int(time2 * 1000));

#ifdef ALLOW_PRINTING
        // Print times.
        printf("  > %s. %s sources. Time smearing %s.\n",
                matrix ? "Matrix" : "Scalar",
                extended ? "Gaussian" : "Point",
                time_average == 0.0 ? "off" : "on");
        printf("    %s precision %s: %.2f ms, %s precision %s: %.2f ms\n",
                prec1 == OSKAR_SINGLE ? "Single" : "Double",
                loc1 == OSKAR_CPU ? "CPU" : "GPU",
                time1 * 1000.0,
                prec2 == OSKAR_SINGLE ? "Single" : "Double",
                loc2 == OSKAR_CPU ? "CPU" : "GPU",
                time2 * 1000.0);
#endif
    }
Пример #9
0
TEST(prefix_sum, test)
{
    int n = 100000, status = 0, exclusive = 1;
    oskar_Mem* in_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status);
    oskar_Mem* out_cpu = oskar_mem_create(OSKAR_INT, OSKAR_CPU, n, &status);
    oskar_Timer* tmr = oskar_timer_create(OSKAR_TIMER_NATIVE);

    // Fill input with random integers from 0 to 9.
    int* t = oskar_mem_int(in_cpu, &status);
    srand(1556);
    for (int i = 0; i < n; ++i)
        t[i] = (int) (10.0 * rand() / ((double) RAND_MAX));
    t[0] = 3;

    // Run on CPU.
    oskar_timer_start(tmr);
    oskar_prefix_sum(n, in_cpu, out_cpu, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on CPU took %.3f sec\n", oskar_timer_elapsed(tmr));

#ifdef OSKAR_HAVE_CUDA
    // Run on GPU with CUDA.
    oskar_Mem* in_gpu = oskar_mem_create_copy(in_cpu, OSKAR_GPU, &status);
    oskar_Mem* out_gpu = oskar_mem_create(OSKAR_INT, OSKAR_GPU, n, &status);
    oskar_timer_start(tmr);
    oskar_prefix_sum(n, in_gpu, out_gpu, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on GPU took %.3f sec\n", oskar_timer_elapsed(tmr));

    // Check consistency between CPU and GPU results.
    oskar_Mem* out_cmp_gpu = oskar_mem_create_copy(out_gpu, OSKAR_CPU, &status);
    EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_gpu, n, &status));
#endif

#ifdef OSKAR_HAVE_OPENCL
    // Run on OpenCL.
    oskar_Mem* in_cl = oskar_mem_create_copy(in_cpu, OSKAR_CL, &status);
    oskar_Mem* out_cl = oskar_mem_create(OSKAR_INT, OSKAR_CL, n, &status);
    oskar_timer_start(tmr);
    printf("Using %s\n", oskar_cl_device_name());
    oskar_prefix_sum(n, in_cl, out_cl, 0, exclusive, &status);
    EXPECT_EQ(0, status);
    printf("Prefix sum on OpenCL took %.3f sec\n", oskar_timer_elapsed(tmr));

    // Check consistency between CPU and OpenCL results.
    oskar_Mem* out_cmp_cl = oskar_mem_create_copy(out_cl, OSKAR_CPU, &status);
    EXPECT_EQ(0, oskar_mem_different(out_cpu, out_cmp_cl, n, &status));
#endif

    if (save)
    {
        size_t num_mem = 1;
        FILE* fhan = fopen("prefix_sum_test.txt", "w");
#ifdef OSKAR_HAVE_CUDA
        num_mem += 1;
#endif
#ifdef OSKAR_HAVE_OPENCL
        num_mem += 1;
#endif
        oskar_mem_save_ascii(fhan, num_mem, n, &status, out_cpu
#ifdef OSKAR_HAVE_CUDA
                , out_cmp_gpu
#endif
#ifdef OSKAR_HAVE_OPENCL
                , out_cmp_cl
#endif
                );
        fclose(fhan);
    }

    // Clean up.
    oskar_timer_free(tmr);
    oskar_mem_free(in_cpu, &status);
    oskar_mem_free(out_cpu, &status);
#ifdef OSKAR_HAVE_CUDA
    oskar_mem_free(in_gpu, &status);
    oskar_mem_free(out_gpu, &status);
    oskar_mem_free(out_cmp_gpu, &status);
#endif
#ifdef OSKAR_HAVE_OPENCL
    oskar_mem_free(in_cl, &status);
    oskar_mem_free(out_cl, &status);
    oskar_mem_free(out_cmp_cl, &status);
#endif
}
Пример #10
0
    void runTest(int prec1, int prec2, int loc1, int loc2, int matrix)
    {
        int status = 0, type;
        oskar_Mem *beam1, *beam2;
        oskar_Timer *timer1, *timer2;
        double time1, time2;

        // Create the timers.
        timer1 = oskar_timer_create(loc1 == OSKAR_GPU ?
                OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE);
        timer2 = oskar_timer_create(loc2 == OSKAR_GPU ?
                OSKAR_TIMER_CUDA : OSKAR_TIMER_NATIVE);

        // Run first part.
        type = prec1 | OSKAR_COMPLEX;
        if (matrix) type |= OSKAR_MATRIX;
        beam1 = oskar_mem_create(type, loc1, num_sources, &status);
        oskar_mem_clear_contents(beam1, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        createTestData(prec1, loc1, matrix);
        oskar_timer_start(timer1);
        oskar_evaluate_cross_power(num_sources, num_stations,
                jones, 0, beam1, &status);
        time1 = oskar_timer_elapsed(timer1);
        destroyTestData();
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Run second part.
        type = prec2 | OSKAR_COMPLEX;
        if (matrix) type |= OSKAR_MATRIX;
        beam2 = oskar_mem_create(type, loc2, num_sources, &status);
        oskar_mem_clear_contents(beam2, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);
        createTestData(prec2, loc2, matrix);
        oskar_timer_start(timer2);
        oskar_evaluate_cross_power(num_sources, num_stations,
                jones, 0, beam2, &status);
        time2 = oskar_timer_elapsed(timer2);
        destroyTestData();
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Destroy the timers.
        oskar_timer_free(timer1);
        oskar_timer_free(timer2);

        // Compare results.
        check_values(beam1, beam2);

        // Free memory.
        oskar_mem_free(beam1, &status);
        oskar_mem_free(beam2, &status);
        ASSERT_EQ(0, status) << oskar_get_error_string(status);

        // Record properties for test.
        RecordProperty("JonesType", matrix ? "Matrix" : "Scalar");
        RecordProperty("Prec1", prec1 == OSKAR_SINGLE ? "Single" : "Double");
        RecordProperty("Loc1", loc1 == OSKAR_CPU ? "CPU" : "GPU");
        RecordProperty("Time1_ms", int(time1 * 1000));
        RecordProperty("Prec2", prec2 == OSKAR_SINGLE ? "Single" : "Double");
        RecordProperty("Loc2", loc2 == OSKAR_CPU ? "CPU" : "GPU");
        RecordProperty("Time2_ms", int(time2 * 1000));

#ifdef ALLOW_PRINTING
        // Print times.
        printf("  > %s.\n", matrix ? "Matrix" : "Scalar");
        printf("    %s precision %s: %.2f ms, %s precision %s: %.2f ms\n",
                prec1 == OSKAR_SINGLE ? "Single" : "Double",
                loc1 == OSKAR_CPU ? "CPU" : "GPU",
                time1 * 1000.0,
                prec2 == OSKAR_SINGLE ? "Single" : "Double",
                loc2 == OSKAR_CPU ? "CPU" : "GPU",
                time2 * 1000.0);
#endif
    }
Пример #11
0
int benchmark(int num_elements, int num_directions, OpType op_type,
        int loc, int precision, bool evaluate_2d, int niter, double& time_taken)
{
    int status = 0;

    // Create the timer.
    oskar_Timer *tmr = oskar_timer_create(OSKAR_TIMER_CUDA);

    oskar_Station* station = oskar_station_create(precision, loc,
            num_elements, &status);
    if (status) return status;
    station->array_is_3d = (evaluate_2d) ? OSKAR_FALSE : OSKAR_TRUE;

    oskar_Mem *x, *y, *z, *weights = 0, *beam = 0, *signal = 0;
    x = oskar_mem_create(precision, loc, num_directions, &status);
    y = oskar_mem_create(precision, loc, num_directions, &status);
    z = oskar_mem_create(precision, loc, num_directions, &status);
    if (status) return status;

    if (op_type == O2C)
    {
        int type = precision | OSKAR_COMPLEX;
        beam = oskar_mem_create(type, loc, num_directions, &status);
        weights = oskar_mem_create(type, loc, num_elements, &status);
        if (status) return status;

        oskar_timer_start(tmr);
        for (int i = 0; i < niter; ++i)
        {
            oskar_evaluate_array_pattern(beam, 2.0 * M_PI, station,
                    num_directions, x, y, z, weights, &status);
        }
        time_taken = oskar_timer_elapsed(tmr);
    }
    else if (op_type == C2C || op_type == M2M)
    {
        int type = precision | OSKAR_COMPLEX;
        int num_signals = num_directions * num_elements;

        weights = oskar_mem_create(type, loc, num_elements, &status);
        if (op_type == C2C)
        {
            beam = oskar_mem_create(type, loc, num_directions, &status);
            signal = oskar_mem_create(type, loc, num_signals, &status);
        }
        else
        {
            type |= OSKAR_MATRIX;
            beam = oskar_mem_create(type, loc, num_directions, &status);
            signal = oskar_mem_create(type, loc, num_signals, &status);
        }
        if (status) return status;

        oskar_timer_start(tmr);
        for (int i = 0; i < niter; ++i)
        {
            oskar_evaluate_array_pattern_hierarchical(beam, 2.0 * M_PI, station,
                    num_directions, x, y, z, signal, weights, &status);
        }
        time_taken = oskar_timer_elapsed(tmr);
    }

    // Destroy the timer.
    oskar_timer_free(tmr);

    // Free memory.
    oskar_station_free(station, &status);
    oskar_mem_free(x, &status);
    oskar_mem_free(y, &status);
    oskar_mem_free(z, &status);
    oskar_mem_free(weights, &status);
    oskar_mem_free(beam, &status);
    oskar_mem_free(signal, &status);

    return status;
}