int main(int argc, char **argv) { int i; int a[16]; int it; int rv = 0; for (i = 0; i < 16; i++) { a[i] = 0; } for (it = 0; it < 4; it++) { { nanos_err_t nanos_err; nanos_wd_dyn_props_t dyn_props; unsigned int nth_i; struct nanos_args_1_t imm_args; nanos_data_access_t dependences[1]; static nanos_smp_args_t smp_ol_main_1_args = {.outline = (void (*)(void *))(void (*)(struct nanos_args_1_t *))&smp_ol_main_1}; static struct nanos_const_wd_definition_1 nanos_wd_const_data = {.base = {.props = {.mandatory_creation = 1, .tied = 1, .clear_chunk = 0, .reserved0 = 0, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, .reserved4 = 0}, .data_alignment = __alignof__(struct nanos_args_1_t), .num_copies = 0, .num_devices = 1, .num_dimensions = 0, .description = 0}, .devices = {[0] = {.factory = &nanos_smp_factory, .arg = &smp_ol_main_1_args}}}; unsigned int nanos_num_threads = nanos_omp_get_num_threads_next_parallel(0); nanos_team_t nanos_team = (void *)0; nanos_thread_t nanos_team_threads[nanos_num_threads]; nanos_err = nanos_create_team(&nanos_team, (void *)0, &nanos_num_threads, (nanos_constraint_t *)0, 1, nanos_team_threads, &nanos_wd_const_data.base); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } dyn_props.tie_to = (void *)0; dyn_props.priority = 0; dyn_props.flags.is_final = 0; for (nth_i = 1; nth_i < nanos_num_threads; nth_i = nth_i + 1) { dyn_props.tie_to = nanos_team_threads[nth_i]; struct nanos_args_1_t *ol_args = 0; nanos_wd_t nanos_wd_ = (void *)0; nanos_err = nanos_create_wd_compact(&nanos_wd_, &nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), (void **)&ol_args, nanos_current_wd(), (nanos_copy_data_t **)0, (nanos_region_dimension_internal_t **)0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } (*ol_args).i = &i; (*ol_args).a = &a; nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *)0, (void *)0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } } dyn_props.tie_to = nanos_team_threads[0]; imm_args.i = &i; imm_args.a = &a; nanos_err = nanos_create_wd_and_run_compact(&nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), &imm_args, 0, dependences, (nanos_copy_data_t *)0, (nanos_region_dimension_internal_t *)0, (void (*)(void *, nanos_wd_t))0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } nanos_err = nanos_end_team(nanos_team); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } } }
void NANOS_parallel( void ( * func ) ( void * ), void * data, unsigned numThreads, long data_size, long ( *get_data_align )( void ), void * ( * get_empty_data )( void ), void ( * init_func ) ( void *, void * ) ) { nanos_err_t err; // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data) int num_copies = 0; // TODO Compute dimensions int num_dimensions = 0; // Compute device descriptor (at the moment, only SMP is supported) int num_devices = 1; // TODO No dependencies for parallel construct in SMP devices int num_data_accesses = 0; nanos_data_access_t dependences[1]; // Create the Device descriptor (at the moment, only SMP is supported) nanos_smp_args_t _smp_args = { func }; char * parallel_name; asprintf( ¶llel_name, "parallel_%d", parallel_id++ ); struct nanos_const_wd_definition nanos_wd_const_data = { { { 1, // mandatory creation 1, // tied 0, 0, 0, 0, 0, 0 }, // properties ( *get_data_align )( ), // data alignment num_copies, num_devices, num_dimensions, parallel_name // description }, { { &nanos_smp_factory, // device description &_smp_args } // outlined function } }; // Compute properties of the WD: mandatory creation, priority, tiedness, real-time info and copy declarations nanos_wd_dyn_props_t dyn_props; dyn_props.tie_to = ( void * ) 0; dyn_props.priority = 0; dyn_props.flags.is_final = 0; // Create the working team if( numThreads == 0 ) numThreads = nanos_omp_get_num_threads_next_parallel( 0 ); void * nanos_team = ( void * ) 0; const unsigned int nthreads_vla = numThreads; void * team_threads[nthreads_vla]; err = nanos_create_team( &nanos_team, ( void * ) 0, &numThreads, (nanos_constraint_t *) 0, /*reuse current*/ 1, team_threads ); if( err != NANOS_OK ) nanos_handle_error( err ); // Create a wd tied to each thread unsigned nth_i; for( nth_i = 1; nth_i < numThreads; nth_i++ ) { // Set properties to the current wd of the team dyn_props.tie_to = team_threads[nth_i]; // Create the current WD of the team void * empty_data = ( *get_empty_data )( ); void * wd = ( void * ) 0; err = nanos_create_wd_compact( &wd, &nanos_wd_const_data.base, &dyn_props, data_size, ( void** ) &empty_data, nanos_current_wd( ), ( nanos_copy_data_t ** ) 0, ( nanos_region_dimension_internal_t ** ) 0 ); if (err != NANOS_OK) nanos_handle_error(err); // Initialize outlined data ( *init_func )( empty_data, data ); // Submit work to the WD err = nanos_submit( wd, num_data_accesses, ( nanos_data_access_t * ) 0, ( void * ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); } // Create the wd for the master thread, which will run the team dyn_props.tie_to = team_threads[0]; err = nanos_create_wd_and_run_compact( &nanos_wd_const_data.base, &dyn_props, data_size, data, num_data_accesses, dependences, ( nanos_copy_data_t * ) 0, ( nanos_region_dimension_internal_t * ) 0, ( void ( * )( void *, void * ) ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); // End the team err = nanos_end_team( nanos_team ); if( err != NANOS_OK ) nanos_handle_error( err ); }