Exemple #1
0
static void oskar_condition_unlock(oskar_ConditionVar* var)
{
    oskar_mutex_unlock(&var->lock);
}
Exemple #2
0
void oskar_simulator_run_block(oskar_Simulator* h, int block_index,
        int device_id, int* status)
{
    double obs_start_mjd, dt_dump_days;
    int i_active, time_index_start, time_index_end;
    int num_channels, num_times_block, total_chunks, total_times;
    DeviceData* d;
    if (*status) return;

    /* Check that initialisation has happened. We can't initialise here,
     * as we're already multi-threaded at this point. */
    if (!h->header)
    {
        *status = OSKAR_ERR_MEMORY_NOT_ALLOCATED;
        oskar_log_error(h->log, "Simulator not initalised. "
                "Call oskar_simulator_check_init() first.");
        return;
    }

#ifdef _OPENMP
    if (!h->coords_only)
    {
        /* Disable any nested parallelism. */
        omp_set_num_threads(1);
        omp_set_nested(0);
    }
#endif

    /* Set the GPU to use. (Supposed to be a very low-overhead call.) */
    if (device_id >= 0 && device_id < h->num_gpus)
        oskar_device_set(h->gpu_ids[device_id], status);

    /* Clear the visibility block. */
    i_active = block_index % 2; /* Index of the active buffer. */
    d = &(h->d[device_id]);
    oskar_timer_resume(d->tmr_compute);
    oskar_vis_block_clear(d->vis_block, status);

    /* Set the visibility block meta-data. */
    total_chunks = h->num_sky_chunks;
    num_channels = h->num_channels;
    total_times = h->num_time_steps;
    obs_start_mjd = h->time_start_mjd_utc;
    dt_dump_days = h->time_inc_sec / 86400.0;
    time_index_start = block_index * h->max_times_per_block;
    time_index_end = time_index_start + h->max_times_per_block - 1;
    if (time_index_end >= total_times)
        time_index_end = total_times - 1;
    num_times_block = 1 + time_index_end - time_index_start;

    /* Set the number of active times in the block. */
    oskar_vis_block_set_num_times(d->vis_block, num_times_block, status);
    oskar_vis_block_set_start_time_index(d->vis_block, time_index_start);

    /* Go though all possible work units in the block. A work unit is defined
     * as the simulation for one time and one sky chunk. */
    while (!h->coords_only)
    {
        oskar_Sky* sky;
        int i_work_unit, i_chunk, i_time, i_channel, sim_time_idx;

        oskar_mutex_lock(h->mutex);
        i_work_unit = (h->work_unit_index)++;
        oskar_mutex_unlock(h->mutex);
        if ((i_work_unit >= num_times_block * total_chunks) || *status) break;

        /* Convert slice index to chunk/time index. */
        i_chunk      = i_work_unit / num_times_block;
        i_time       = i_work_unit - i_chunk * num_times_block;
        sim_time_idx = time_index_start + i_time;

        /* Copy sky chunk to device only if different from the previous one. */
        if (i_chunk != d->previous_chunk_index)
        {
            oskar_timer_resume(d->tmr_copy);
            oskar_sky_copy(d->chunk, h->sky_chunks[i_chunk], status);
            oskar_timer_pause(d->tmr_copy);
        }
        sky = h->apply_horizon_clip ? d->chunk_clip : d->chunk;

        /* Apply horizon clip if required. */
        if (h->apply_horizon_clip)
        {
            double gast, mjd;
            mjd = obs_start_mjd + dt_dump_days * (sim_time_idx + 0.5);
            gast = oskar_convert_mjd_to_gast_fast(mjd);
            oskar_timer_resume(d->tmr_clip);
            oskar_sky_horizon_clip(d->chunk_clip, d->chunk, d->tel, gast,
                    d->station_work, status);
            oskar_timer_pause(d->tmr_clip);
        }

        /* Simulate all baselines for all channels for this time and chunk. */
        for (i_channel = 0; i_channel < num_channels; ++i_channel)
        {
            if (*status) break;
            if (h->log)
            {
                oskar_mutex_lock(h->mutex);
                oskar_log_message(h->log, 'S', 1, "Time %*i/%i, "
                        "Chunk %*i/%i, Channel %*i/%i [Device %i, %i sources]",
                        disp_width(total_times), sim_time_idx + 1, total_times,
                        disp_width(total_chunks), i_chunk + 1, total_chunks,
                        disp_width(num_channels), i_channel + 1, num_channels,
                        device_id, oskar_sky_num_sources(sky));
                oskar_mutex_unlock(h->mutex);
            }
            sim_baselines(h, d, sky, i_channel, i_time, sim_time_idx, status);
        }
        d->previous_chunk_index = i_chunk;
    }

    /* Copy the visibility block to host memory. */
    oskar_timer_resume(d->tmr_copy);
    oskar_vis_block_copy(d->vis_block_cpu[i_active], d->vis_block, status);
    oskar_timer_pause(d->tmr_copy);
    oskar_timer_pause(d->tmr_compute);
}
static void* run_blocks(void* arg)
{
    oskar_Imager* h;
    oskar_Mem *plane, *uu, *vv, *ww = 0, *amp, *weight, *block, *l, *m, *n;
    size_t max_size;
    const size_t smallest = 1024, largest = 65536;
    int dev_loc = OSKAR_CPU, *status;

    /* Get thread function arguments. */
    h = ((ThreadArgs*)arg)->h;
    const int thread_id = ((ThreadArgs*)arg)->thread_id;
    const int num_vis = ((ThreadArgs*)arg)->num_vis;
    plane = ((ThreadArgs*)arg)->plane;
    status = &(h->status);

    /* Set the device used by the thread. */
    if (thread_id < h->num_gpus)
    {
        dev_loc = h->dev_loc;
        oskar_device_set(h->dev_loc, h->gpu_ids[thread_id], status);
    }

    /* Copy visibility data to device. */
    uu = oskar_mem_create_copy(((ThreadArgs*)arg)->uu, dev_loc, status);
    vv = oskar_mem_create_copy(((ThreadArgs*)arg)->vv, dev_loc, status);
    amp = oskar_mem_create_copy(((ThreadArgs*)arg)->amp, dev_loc, status);
    weight = oskar_mem_create_copy(((ThreadArgs*)arg)->weight, dev_loc, status);
    if (h->algorithm == OSKAR_ALGORITHM_DFT_3D)
        ww = oskar_mem_create_copy(((ThreadArgs*)arg)->ww, dev_loc, status);

#ifdef _OPENMP
    /* Disable nested parallelism. */
    omp_set_nested(0);
    omp_set_num_threads(1);
#endif

    /* Calculate the maximum pixel block size, and number of blocks. */
    const size_t num_pixels = (size_t)h->image_size * (size_t)h->image_size;
    max_size = num_pixels / h->num_devices;
    max_size = ((max_size + smallest - 1) / smallest) * smallest;
    if (max_size > largest) max_size = largest;
    if (max_size < smallest) max_size = smallest;
    const int num_blocks = (int) ((num_pixels + max_size - 1) / max_size);

    /* Allocate device memory for pixel block data. */
    block = oskar_mem_create(h->imager_prec, dev_loc, 0, status);
    l = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);
    m = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);
    n = oskar_mem_create(h->imager_prec, dev_loc, max_size, status);

    /* Loop until all blocks are done. */
    for (;;)
    {
        size_t block_size;

        /* Get a unique block index. */
        oskar_mutex_lock(h->mutex);
        const int i_block = (h->i_block)++;
        oskar_mutex_unlock(h->mutex);
        if ((i_block >= num_blocks) || *status) break;

        /* Calculate the block size. */
        const size_t block_start = i_block * max_size;
        block_size = num_pixels - block_start;
        if (block_size > max_size) block_size = max_size;

        /* Copy the (l,m,n) positions for the block. */
        oskar_mem_copy_contents(l, h->l, 0, block_start, block_size, status);
        oskar_mem_copy_contents(m, h->m, 0, block_start, block_size, status);
        if (h->algorithm == OSKAR_ALGORITHM_DFT_3D)
            oskar_mem_copy_contents(n, h->n, 0, block_start,
                    block_size, status);

        /* Run DFT for the block. */
        oskar_dft_c2r(num_vis, 2.0 * M_PI, uu, vv, ww, amp, weight,
                (int) block_size, l, m, n, block, status);

        /* Add data to existing pixels. */
        oskar_mem_add(plane, plane, block,
                block_start, block_start, 0, block_size, status);
    }

    /* Free memory. */
    oskar_mem_free(uu, status);
    oskar_mem_free(vv, status);
    oskar_mem_free(ww, status);
    oskar_mem_free(amp, status);
    oskar_mem_free(weight, status);
    oskar_mem_free(block, status);
    oskar_mem_free(l, status);
    oskar_mem_free(m, status);
    oskar_mem_free(n, status);
    return 0;
}