Beispiel #1
0
void oskar_simulator_run_block(oskar_Simulator* h, int block_index,
        int device_id, int* status)
{
    double obs_start_mjd, dt_dump_days;
    int i_active, time_index_start, time_index_end;
    int num_channels, num_times_block, total_chunks, total_times;
    DeviceData* d;
    if (*status) return;

    /* Check that initialisation has happened. We can't initialise here,
     * as we're already multi-threaded at this point. */
    if (!h->header)
    {
        *status = OSKAR_ERR_MEMORY_NOT_ALLOCATED;
        oskar_log_error(h->log, "Simulator not initalised. "
                "Call oskar_simulator_check_init() first.");
        return;
    }

#ifdef _OPENMP
    if (!h->coords_only)
    {
        /* Disable any nested parallelism. */
        omp_set_num_threads(1);
        omp_set_nested(0);
    }
#endif

    /* Set the GPU to use. (Supposed to be a very low-overhead call.) */
    if (device_id >= 0 && device_id < h->num_gpus)
        oskar_device_set(h->gpu_ids[device_id], status);

    /* Clear the visibility block. */
    i_active = block_index % 2; /* Index of the active buffer. */
    d = &(h->d[device_id]);
    oskar_timer_resume(d->tmr_compute);
    oskar_vis_block_clear(d->vis_block, status);

    /* Set the visibility block meta-data. */
    total_chunks = h->num_sky_chunks;
    num_channels = h->num_channels;
    total_times = h->num_time_steps;
    obs_start_mjd = h->time_start_mjd_utc;
    dt_dump_days = h->time_inc_sec / 86400.0;
    time_index_start = block_index * h->max_times_per_block;
    time_index_end = time_index_start + h->max_times_per_block - 1;
    if (time_index_end >= total_times)
        time_index_end = total_times - 1;
    num_times_block = 1 + time_index_end - time_index_start;

    /* Set the number of active times in the block. */
    oskar_vis_block_set_num_times(d->vis_block, num_times_block, status);
    oskar_vis_block_set_start_time_index(d->vis_block, time_index_start);

    /* Go though all possible work units in the block. A work unit is defined
     * as the simulation for one time and one sky chunk. */
    while (!h->coords_only)
    {
        oskar_Sky* sky;
        int i_work_unit, i_chunk, i_time, i_channel, sim_time_idx;

        oskar_mutex_lock(h->mutex);
        i_work_unit = (h->work_unit_index)++;
        oskar_mutex_unlock(h->mutex);
        if ((i_work_unit >= num_times_block * total_chunks) || *status) break;

        /* Convert slice index to chunk/time index. */
        i_chunk      = i_work_unit / num_times_block;
        i_time       = i_work_unit - i_chunk * num_times_block;
        sim_time_idx = time_index_start + i_time;

        /* Copy sky chunk to device only if different from the previous one. */
        if (i_chunk != d->previous_chunk_index)
        {
            oskar_timer_resume(d->tmr_copy);
            oskar_sky_copy(d->chunk, h->sky_chunks[i_chunk], status);
            oskar_timer_pause(d->tmr_copy);
        }
        sky = h->apply_horizon_clip ? d->chunk_clip : d->chunk;

        /* Apply horizon clip if required. */
        if (h->apply_horizon_clip)
        {
            double gast, mjd;
            mjd = obs_start_mjd + dt_dump_days * (sim_time_idx + 0.5);
            gast = oskar_convert_mjd_to_gast_fast(mjd);
            oskar_timer_resume(d->tmr_clip);
            oskar_sky_horizon_clip(d->chunk_clip, d->chunk, d->tel, gast,
                    d->station_work, status);
            oskar_timer_pause(d->tmr_clip);
        }

        /* Simulate all baselines for all channels for this time and chunk. */
        for (i_channel = 0; i_channel < num_channels; ++i_channel)
        {
            if (*status) break;
            if (h->log)
            {
                oskar_mutex_lock(h->mutex);
                oskar_log_message(h->log, 'S', 1, "Time %*i/%i, "
                        "Chunk %*i/%i, Channel %*i/%i [Device %i, %i sources]",
                        disp_width(total_times), sim_time_idx + 1, total_times,
                        disp_width(total_chunks), i_chunk + 1, total_chunks,
                        disp_width(num_channels), i_channel + 1, num_channels,
                        device_id, oskar_sky_num_sources(sky));
                oskar_mutex_unlock(h->mutex);
            }
            sim_baselines(h, d, sky, i_channel, i_time, sim_time_idx, status);
        }
        d->previous_chunk_index = i_chunk;
    }

    /* Copy the visibility block to host memory. */
    oskar_timer_resume(d->tmr_copy);
    oskar_vis_block_copy(d->vis_block_cpu[i_active], d->vis_block, status);
    oskar_timer_pause(d->tmr_copy);
    oskar_timer_pause(d->tmr_compute);
}
Beispiel #2
0
void oskar_simulator_run(oskar_Simulator* h, int* status)
{
    int i, num_threads = 1, num_vis_blocks;
    if (*status) return;

    /* Check the visibilities are going somewhere. */
    if (!h->vis_name
#ifndef OSKAR_NO_MS
            && !h->ms_name
#endif
    )
    {
        oskar_log_error(h->log, "No output file specified.");
#ifdef OSKAR_NO_MS
        if (h->ms_name)
            oskar_log_error(h->log,
                    "OSKAR was compiled without Measurement Set support.");
#endif
        *status = OSKAR_ERR_FILE_IO;
        return;
    }

    /* Initialise if required. */
    oskar_simulator_check_init(h, status);

    /* Get the number of visibility blocks to be processed. */
    num_vis_blocks = oskar_simulator_num_vis_blocks(h);

    /* Record memory usage. */
    if (h->log && !*status)
    {
        oskar_log_section(h->log, 'M', "Initial memory usage");
#ifdef OSKAR_HAVE_CUDA
        for (i = 0; i < h->num_gpus; ++i)
            oskar_cuda_mem_log(h->log, 0, h->gpu_ids[i]);
#endif
        system_mem_log(h->log);
        oskar_log_section(h->log, 'M', "Starting simulation...");
    }

    /* Start simulation timer. */
    oskar_timer_start(h->tmr_sim);

    /*-----------------------------------------------------------------------
     *-- START OF MULTITHREADED SIMULATION CODE -----------------------------
     *-----------------------------------------------------------------------*/
    /* Loop over blocks of observation time, running simulation and file
     * writing one block at a time. Simulation and file output are overlapped
     * by using double buffering, and a dedicated thread is used for file
     * output.
     *
     * Thread 0 is used for file writes.
     * Threads 1 to n (mapped to compute devices) do the simulation.
     *
     * Note that no write is launched on the first loop counter (as no
     * data are ready yet) and no simulation is performed for the last loop
     * counter (which corresponds to the last block + 1) as this iteration
     * simply writes the last block.
     */
#ifdef _OPENMP
    num_threads = h->num_devices + 1;
    omp_set_num_threads(num_threads);
    omp_set_nested(0);
#else
    oskar_log_warning(h->log, "OpenMP not found: Using one compute device.");
#endif

    oskar_simulator_reset_work_unit_index(h);
#pragma omp parallel
    {
        int b, thread_id = 0, device_id = 0;

        /* Get host thread ID and device ID. */
#ifdef _OPENMP
        thread_id = omp_get_thread_num();
        device_id = thread_id - 1;
#endif

        /* Loop over simulation time blocks (+1, for the last write). */
        for (b = 0; b < num_vis_blocks + 1; ++b)
        {
            if ((thread_id > 0 || num_threads == 1) && b < num_vis_blocks)
                oskar_simulator_run_block(h, b, device_id, status);
            if (thread_id == 0 && b > 0)
            {
                oskar_VisBlock* block;
                block = oskar_simulator_finalise_block(h, b - 1, status);
                oskar_simulator_write_block(h, block, b - 1, status);
            }

            /* Barrier 1: Reset work unit index. */
#pragma omp barrier
            if (thread_id == 0)
                oskar_simulator_reset_work_unit_index(h);

            /* Barrier 2: Synchronise before moving to the next block. */
#pragma omp barrier
            if (thread_id == 0 && b < num_vis_blocks && h->log && !*status)
                oskar_log_message(h->log, 'S', 0, "Block %*i/%i (%3.0f%%) "
                        "complete. Simulation time elapsed: %.3f s",
                        disp_width(num_vis_blocks), b+1, num_vis_blocks,
                        100.0 * (b+1) / (double)num_vis_blocks,
                        oskar_timer_elapsed(h->tmr_sim));
        }
    }
    /*-----------------------------------------------------------------------
     *-- END OF MULTITHREADED SIMULATION CODE -------------------------------
     *-----------------------------------------------------------------------*/

    /* Record memory usage. */
    if (h->log && !*status)
    {
        oskar_log_section(h->log, 'M', "Final memory usage");
#ifdef OSKAR_HAVE_CUDA
        for (i = 0; i < h->num_gpus; ++i)
            oskar_cuda_mem_log(h->log, 0, h->gpu_ids[i]);
#endif
        system_mem_log(h->log);
    }

    /* If there are sources in the simulation and the station beam is not
     * normalised to 1.0 at the phase centre, the values of noise RMS
     * may give a very unexpected S/N ratio!
     * The alternative would be to scale the noise to match the station
     * beam gain but that would require knowledge of the station beam
     * amplitude at the phase centre for each time and channel. */
    if (h->log && oskar_telescope_noise_enabled(h->tel) && !*status)
    {
        int have_sources, amp_calibrated;
        have_sources = (h->num_sky_chunks > 0 &&
                oskar_sky_num_sources(h->sky_chunks[0]) > 0);
        amp_calibrated = oskar_station_normalise_final_beam(
                oskar_telescope_station_const(h->tel, 0));
        if (have_sources && !amp_calibrated)
        {
            const char* a = "WARNING: System noise added to visibilities";
            const char* b = "without station beam normalisation enabled.";
            const char* c = "This will give an invalid signal to noise ratio.";
            oskar_log_line(h->log, 'W', ' '); oskar_log_line(h->log, 'W', '*');
            oskar_log_message(h->log, 'W', -1, a);
            oskar_log_message(h->log, 'W', -1, b);
            oskar_log_message(h->log, 'W', -1, c);
            oskar_log_line(h->log, 'W', '*'); oskar_log_line(h->log, 'W', ' ');
        }
    }

    /* Record times and summarise output files. */
    if (h->log && !*status)
    {
        size_t log_size = 0;
        char* log_data;
        oskar_log_set_value_width(h->log, 25);
        record_timing(h);
        oskar_log_section(h->log, 'M', "Simulation complete");
        oskar_log_message(h->log, 'M', 0, "Output(s):");
        if (h->vis_name)
            oskar_log_value(h->log, 'M', 1,
                    "OSKAR binary file", "%s", h->vis_name);
        if (h->ms_name)
            oskar_log_value(h->log, 'M', 1,
                    "Measurement Set", "%s", h->ms_name);

        /* Write simulation log to the output files. */
        log_data = oskar_log_file_data(h->log, &log_size);
#ifndef OSKAR_NO_MS
        if (h->ms)
            oskar_ms_add_history(h->ms, "OSKAR_LOG", log_data, log_size);
#endif
        if (h->vis)
            oskar_binary_write(h->vis, OSKAR_CHAR, OSKAR_TAG_GROUP_RUN,
                    OSKAR_TAG_RUN_LOG, 0, log_size, log_data, status);
        free(log_data);
    }

    /* Finalise. */
    oskar_simulator_finalise(h, status);
}
static void* run_blocks(void* arg)
{
    oskar_Interferometer* h;
    int b, thread_id, device_id, num_blocks, num_threads, *status;

    /* Get thread function arguments. */
    h = ((ThreadArgs*)arg)->h;
    num_threads = ((ThreadArgs*)arg)->num_threads;
    thread_id = ((ThreadArgs*)arg)->thread_id;
    device_id = thread_id - 1;
    status = &(h->status);

#ifdef _OPENMP
    /* Disable any nested parallelism. */
    omp_set_nested(0);
    omp_set_num_threads(1);
#endif

    /* Loop over blocks of observation time, running simulation and file
     * writing one block at a time. Simulation and file output are overlapped
     * by using double buffering, and a dedicated thread is used for file
     * output.
     *
     * Thread 0 is used for file writes.
     * Threads 1 to n (mapped to compute devices) do the simulation.
     *
     * Note that no write is launched on the first loop counter (as no
     * data are ready yet) and no simulation is performed for the last loop
     * counter (which corresponds to the last block + 1) as this iteration
     * simply writes the last block.
     */
    num_blocks = oskar_interferometer_num_vis_blocks(h);
    for (b = 0; b < num_blocks + 1; ++b)
    {
        if ((thread_id > 0 || num_threads == 1) && b < num_blocks)
            oskar_interferometer_run_block(h, b, device_id, status);
        if (thread_id == 0 && b > 0)
        {
            oskar_VisBlock* block;
            block = oskar_interferometer_finalise_block(h, b - 1, status);
            oskar_interferometer_write_block(h, block, b - 1, status);
        }

        /* Barrier 1: Reset work unit index and print status. */
        oskar_barrier_wait(h->barrier);
        if (thread_id == 0)
        {
            oskar_interferometer_reset_work_unit_index(h);
            if (b < num_blocks && h->log && !*status)
                oskar_log_message(h->log, 'S', 0, "Block %*i/%i (%3.0f%%) "
                        "complete. Simulation time elapsed: %.3f s",
                        disp_width(num_blocks), b+1, num_blocks,
                        100.0 * (b+1) / (double)num_blocks,
                        oskar_timer_elapsed(h->tmr_sim));
        }

        /* Barrier 2: Synchronise before moving to the next block. */
        oskar_barrier_wait(h->barrier);
    }
    return 0;
}