void oskar_simulator_run_block(oskar_Simulator* h, int block_index, int device_id, int* status) { double obs_start_mjd, dt_dump_days; int i_active, time_index_start, time_index_end; int num_channels, num_times_block, total_chunks, total_times; DeviceData* d; if (*status) return; /* Check that initialisation has happened. We can't initialise here, * as we're already multi-threaded at this point. */ if (!h->header) { *status = OSKAR_ERR_MEMORY_NOT_ALLOCATED; oskar_log_error(h->log, "Simulator not initalised. " "Call oskar_simulator_check_init() first."); return; } #ifdef _OPENMP if (!h->coords_only) { /* Disable any nested parallelism. */ omp_set_num_threads(1); omp_set_nested(0); } #endif /* Set the GPU to use. (Supposed to be a very low-overhead call.) */ if (device_id >= 0 && device_id < h->num_gpus) oskar_device_set(h->gpu_ids[device_id], status); /* Clear the visibility block. */ i_active = block_index % 2; /* Index of the active buffer. */ d = &(h->d[device_id]); oskar_timer_resume(d->tmr_compute); oskar_vis_block_clear(d->vis_block, status); /* Set the visibility block meta-data. */ total_chunks = h->num_sky_chunks; num_channels = h->num_channels; total_times = h->num_time_steps; obs_start_mjd = h->time_start_mjd_utc; dt_dump_days = h->time_inc_sec / 86400.0; time_index_start = block_index * h->max_times_per_block; time_index_end = time_index_start + h->max_times_per_block - 1; if (time_index_end >= total_times) time_index_end = total_times - 1; num_times_block = 1 + time_index_end - time_index_start; /* Set the number of active times in the block. */ oskar_vis_block_set_num_times(d->vis_block, num_times_block, status); oskar_vis_block_set_start_time_index(d->vis_block, time_index_start); /* Go though all possible work units in the block. A work unit is defined * as the simulation for one time and one sky chunk. */ while (!h->coords_only) { oskar_Sky* sky; int i_work_unit, i_chunk, i_time, i_channel, sim_time_idx; oskar_mutex_lock(h->mutex); i_work_unit = (h->work_unit_index)++; oskar_mutex_unlock(h->mutex); if ((i_work_unit >= num_times_block * total_chunks) || *status) break; /* Convert slice index to chunk/time index. */ i_chunk = i_work_unit / num_times_block; i_time = i_work_unit - i_chunk * num_times_block; sim_time_idx = time_index_start + i_time; /* Copy sky chunk to device only if different from the previous one. */ if (i_chunk != d->previous_chunk_index) { oskar_timer_resume(d->tmr_copy); oskar_sky_copy(d->chunk, h->sky_chunks[i_chunk], status); oskar_timer_pause(d->tmr_copy); } sky = h->apply_horizon_clip ? d->chunk_clip : d->chunk; /* Apply horizon clip if required. */ if (h->apply_horizon_clip) { double gast, mjd; mjd = obs_start_mjd + dt_dump_days * (sim_time_idx + 0.5); gast = oskar_convert_mjd_to_gast_fast(mjd); oskar_timer_resume(d->tmr_clip); oskar_sky_horizon_clip(d->chunk_clip, d->chunk, d->tel, gast, d->station_work, status); oskar_timer_pause(d->tmr_clip); } /* Simulate all baselines for all channels for this time and chunk. */ for (i_channel = 0; i_channel < num_channels; ++i_channel) { if (*status) break; if (h->log) { oskar_mutex_lock(h->mutex); oskar_log_message(h->log, 'S', 1, "Time %*i/%i, " "Chunk %*i/%i, Channel %*i/%i [Device %i, %i sources]", disp_width(total_times), sim_time_idx + 1, total_times, disp_width(total_chunks), i_chunk + 1, total_chunks, disp_width(num_channels), i_channel + 1, num_channels, device_id, oskar_sky_num_sources(sky)); oskar_mutex_unlock(h->mutex); } sim_baselines(h, d, sky, i_channel, i_time, sim_time_idx, status); } d->previous_chunk_index = i_chunk; } /* Copy the visibility block to host memory. */ oskar_timer_resume(d->tmr_copy); oskar_vis_block_copy(d->vis_block_cpu[i_active], d->vis_block, status); oskar_timer_pause(d->tmr_copy); oskar_timer_pause(d->tmr_compute); }
void oskar_simulator_run(oskar_Simulator* h, int* status) { int i, num_threads = 1, num_vis_blocks; if (*status) return; /* Check the visibilities are going somewhere. */ if (!h->vis_name #ifndef OSKAR_NO_MS && !h->ms_name #endif ) { oskar_log_error(h->log, "No output file specified."); #ifdef OSKAR_NO_MS if (h->ms_name) oskar_log_error(h->log, "OSKAR was compiled without Measurement Set support."); #endif *status = OSKAR_ERR_FILE_IO; return; } /* Initialise if required. */ oskar_simulator_check_init(h, status); /* Get the number of visibility blocks to be processed. */ num_vis_blocks = oskar_simulator_num_vis_blocks(h); /* Record memory usage. */ if (h->log && !*status) { oskar_log_section(h->log, 'M', "Initial memory usage"); #ifdef OSKAR_HAVE_CUDA for (i = 0; i < h->num_gpus; ++i) oskar_cuda_mem_log(h->log, 0, h->gpu_ids[i]); #endif system_mem_log(h->log); oskar_log_section(h->log, 'M', "Starting simulation..."); } /* Start simulation timer. */ oskar_timer_start(h->tmr_sim); /*----------------------------------------------------------------------- *-- START OF MULTITHREADED SIMULATION CODE ----------------------------- *-----------------------------------------------------------------------*/ /* Loop over blocks of observation time, running simulation and file * writing one block at a time. Simulation and file output are overlapped * by using double buffering, and a dedicated thread is used for file * output. * * Thread 0 is used for file writes. * Threads 1 to n (mapped to compute devices) do the simulation. * * Note that no write is launched on the first loop counter (as no * data are ready yet) and no simulation is performed for the last loop * counter (which corresponds to the last block + 1) as this iteration * simply writes the last block. */ #ifdef _OPENMP num_threads = h->num_devices + 1; omp_set_num_threads(num_threads); omp_set_nested(0); #else oskar_log_warning(h->log, "OpenMP not found: Using one compute device."); #endif oskar_simulator_reset_work_unit_index(h); #pragma omp parallel { int b, thread_id = 0, device_id = 0; /* Get host thread ID and device ID. */ #ifdef _OPENMP thread_id = omp_get_thread_num(); device_id = thread_id - 1; #endif /* Loop over simulation time blocks (+1, for the last write). */ for (b = 0; b < num_vis_blocks + 1; ++b) { if ((thread_id > 0 || num_threads == 1) && b < num_vis_blocks) oskar_simulator_run_block(h, b, device_id, status); if (thread_id == 0 && b > 0) { oskar_VisBlock* block; block = oskar_simulator_finalise_block(h, b - 1, status); oskar_simulator_write_block(h, block, b - 1, status); } /* Barrier 1: Reset work unit index. */ #pragma omp barrier if (thread_id == 0) oskar_simulator_reset_work_unit_index(h); /* Barrier 2: Synchronise before moving to the next block. */ #pragma omp barrier if (thread_id == 0 && b < num_vis_blocks && h->log && !*status) oskar_log_message(h->log, 'S', 0, "Block %*i/%i (%3.0f%%) " "complete. Simulation time elapsed: %.3f s", disp_width(num_vis_blocks), b+1, num_vis_blocks, 100.0 * (b+1) / (double)num_vis_blocks, oskar_timer_elapsed(h->tmr_sim)); } } /*----------------------------------------------------------------------- *-- END OF MULTITHREADED SIMULATION CODE ------------------------------- *-----------------------------------------------------------------------*/ /* Record memory usage. */ if (h->log && !*status) { oskar_log_section(h->log, 'M', "Final memory usage"); #ifdef OSKAR_HAVE_CUDA for (i = 0; i < h->num_gpus; ++i) oskar_cuda_mem_log(h->log, 0, h->gpu_ids[i]); #endif system_mem_log(h->log); } /* If there are sources in the simulation and the station beam is not * normalised to 1.0 at the phase centre, the values of noise RMS * may give a very unexpected S/N ratio! * The alternative would be to scale the noise to match the station * beam gain but that would require knowledge of the station beam * amplitude at the phase centre for each time and channel. */ if (h->log && oskar_telescope_noise_enabled(h->tel) && !*status) { int have_sources, amp_calibrated; have_sources = (h->num_sky_chunks > 0 && oskar_sky_num_sources(h->sky_chunks[0]) > 0); amp_calibrated = oskar_station_normalise_final_beam( oskar_telescope_station_const(h->tel, 0)); if (have_sources && !amp_calibrated) { const char* a = "WARNING: System noise added to visibilities"; const char* b = "without station beam normalisation enabled."; const char* c = "This will give an invalid signal to noise ratio."; oskar_log_line(h->log, 'W', ' '); oskar_log_line(h->log, 'W', '*'); oskar_log_message(h->log, 'W', -1, a); oskar_log_message(h->log, 'W', -1, b); oskar_log_message(h->log, 'W', -1, c); oskar_log_line(h->log, 'W', '*'); oskar_log_line(h->log, 'W', ' '); } } /* Record times and summarise output files. */ if (h->log && !*status) { size_t log_size = 0; char* log_data; oskar_log_set_value_width(h->log, 25); record_timing(h); oskar_log_section(h->log, 'M', "Simulation complete"); oskar_log_message(h->log, 'M', 0, "Output(s):"); if (h->vis_name) oskar_log_value(h->log, 'M', 1, "OSKAR binary file", "%s", h->vis_name); if (h->ms_name) oskar_log_value(h->log, 'M', 1, "Measurement Set", "%s", h->ms_name); /* Write simulation log to the output files. */ log_data = oskar_log_file_data(h->log, &log_size); #ifndef OSKAR_NO_MS if (h->ms) oskar_ms_add_history(h->ms, "OSKAR_LOG", log_data, log_size); #endif if (h->vis) oskar_binary_write(h->vis, OSKAR_CHAR, OSKAR_TAG_GROUP_RUN, OSKAR_TAG_RUN_LOG, 0, log_size, log_data, status); free(log_data); } /* Finalise. */ oskar_simulator_finalise(h, status); }
static void* run_blocks(void* arg) { oskar_Interferometer* h; int b, thread_id, device_id, num_blocks, num_threads, *status; /* Get thread function arguments. */ h = ((ThreadArgs*)arg)->h; num_threads = ((ThreadArgs*)arg)->num_threads; thread_id = ((ThreadArgs*)arg)->thread_id; device_id = thread_id - 1; status = &(h->status); #ifdef _OPENMP /* Disable any nested parallelism. */ omp_set_nested(0); omp_set_num_threads(1); #endif /* Loop over blocks of observation time, running simulation and file * writing one block at a time. Simulation and file output are overlapped * by using double buffering, and a dedicated thread is used for file * output. * * Thread 0 is used for file writes. * Threads 1 to n (mapped to compute devices) do the simulation. * * Note that no write is launched on the first loop counter (as no * data are ready yet) and no simulation is performed for the last loop * counter (which corresponds to the last block + 1) as this iteration * simply writes the last block. */ num_blocks = oskar_interferometer_num_vis_blocks(h); for (b = 0; b < num_blocks + 1; ++b) { if ((thread_id > 0 || num_threads == 1) && b < num_blocks) oskar_interferometer_run_block(h, b, device_id, status); if (thread_id == 0 && b > 0) { oskar_VisBlock* block; block = oskar_interferometer_finalise_block(h, b - 1, status); oskar_interferometer_write_block(h, block, b - 1, status); } /* Barrier 1: Reset work unit index and print status. */ oskar_barrier_wait(h->barrier); if (thread_id == 0) { oskar_interferometer_reset_work_unit_index(h); if (b < num_blocks && h->log && !*status) oskar_log_message(h->log, 'S', 0, "Block %*i/%i (%3.0f%%) " "complete. Simulation time elapsed: %.3f s", disp_width(num_blocks), b+1, num_blocks, 100.0 * (b+1) / (double)num_blocks, oskar_timer_elapsed(h->tmr_sim)); } /* Barrier 2: Synchronise before moving to the next block. */ oskar_barrier_wait(h->barrier); } return 0; }