int main ( int argc, char **argv ) { char text[10] = "123456789"; char text2[10] = "987654321"; char* dummy1 = text; my_args* args = 0; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; nanos_copy_data_t *cd = 0; nanos_wd_t wd1=0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args, nanos_current_wd(), &props, 2, &cd) ); args->a = 1; args->b = dummy1; cd[0] = (nanos_copy_data_t) {(uint64_t)&(args->a), NANOS_PRIVATE, {true, false}, sizeof(args->a)}; cd[1] = (nanos_copy_data_t) {(uint64_t)args->b, NANOS_SHARED, {true, true}, sizeof(char)*10}; NANOS_SAFE( nanos_submit( wd1,0,0,0 ) ); NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); if ( strcmp( text2, dummy1 ) == 0 ) { printf( "Checking for copy-back correctness... PASS\n" ); } else { printf( "Checking for copy-back correctness... FAIL\n" ); printf( "expecting '%s', copied back: '%s'\n", text2, dummy1 ); return 1; } return 0; }
void fib(int n, int *res) { if (n < 2) { *res = n; } else { int res1 = 0; int res2 = 0; { int mcc_arg_0 = n - 1; int *mcc_arg_1 = &res1; { _Bool mcc_is_in_final; nanos_err_t mcc_err_in_final = nanos_in_final(&mcc_is_in_final); if (mcc_err_in_final != NANOS_OK) { nanos_handle_error(mcc_err_in_final); } if (mcc_is_in_final) { fib_mcc_serial(n - 1, &res1); } else { { nanos_wd_dyn_props_t nanos_wd_dyn_props; int memo_dimensions[1]; struct nanos_args_0_t *ol_args; nanos_err_t err; struct nanos_args_0_t imm_args; static nanos_smp_args_t smp_ol_fib_1_args = {.outline = (void (*)(void *))(void (*)(struct nanos_args_0_t *))&smp_ol_fib_1}; static struct nanos_const_wd_definition_1 nanos_wd_const_data = {.base = {.props = {.mandatory_creation = 0, .tied = 0, .clear_chunk = 0, .reserved0 = 0, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, .reserved4 = 0}, .data_alignment = __alignof__(struct nanos_args_0_t), .num_copies = 1, .num_devices = 1, .num_dimensions = 1, .description = 0}, .devices = {[0] = {.factory = &nanos_smp_factory, .arg = &smp_ol_fib_1_args}}}; nanos_wd_dyn_props.tie_to = 0; nanos_wd_dyn_props.priority = 0; nanos_wd_dyn_props.flags.is_final = 1; memo_dimensions[0] = mcc_arg_0 + 1; nanos_wd_dyn_props.memo.num_dimensions = 1; nanos_wd_dyn_props.memo.dimensions = memo_dimensions; ol_args = (struct nanos_args_0_t *)0; nanos_wd_t nanos_wd_ = (void *)0; nanos_copy_data_t *ol_copy_data = (nanos_copy_data_t *)0; nanos_region_dimension_internal_t *ol_copy_dimensions = (nanos_region_dimension_internal_t *)0; err = nanos_create_wd_compact(&nanos_wd_, &nanos_wd_const_data.base, &nanos_wd_dyn_props, sizeof(struct nanos_args_0_t), (void **)&ol_args, nanos_current_wd(), &ol_copy_data, &ol_copy_dimensions); if (err != NANOS_OK) { nanos_handle_error(err); } nanos_region_dimension_t dimensions_0[1] = {[0] = {.size = sizeof(int), .lower_bound = 0, .accessed_length = sizeof(int)}}; nanos_data_access_t dependences[1] = {[0] = {.address = (void *)mcc_arg_1, .flags = {.input = 0, .output = 1, .can_rename = 0, .concurrent = 0, .commutative = 0}, .dimension_count = (short int)1, .dimensions = dimensions_0, .offset = 0}}; ; if (nanos_wd_ != (void *)0) { (*ol_args).n = mcc_arg_0; (*ol_args).res = mcc_arg_1; ol_copy_dimensions[0].size = 1 * sizeof(int); ol_copy_dimensions[0].lower_bound = 0 * sizeof(int); ol_copy_dimensions[0].accessed_length = (0 - 0 + 1) * sizeof(int); ol_copy_data[0].sharing = NANOS_SHARED; ol_copy_data[0].address = (void *)mcc_arg_1; ol_copy_data[0].flags.input = 0; ol_copy_data[0].flags.output = 1; ol_copy_data[0].dimension_count = (short int)1; ol_copy_data[0].dimensions = &ol_copy_dimensions[0]; ol_copy_data[0].offset = 0; err = nanos_set_translate_function(nanos_wd_, (void (*)(void *, nanos_wd_t))nanos_xlate_fun_fibompmemoc_0); if (err != NANOS_OK) { nanos_handle_error(err); } err = nanos_submit(nanos_wd_, 1, dependences, (void *)0); if (err != NANOS_OK) { nanos_handle_error(err); } } else { nanos_region_dimension_internal_t imm_copy_dimensions[1]; nanos_copy_data_t imm_copy_data[1]; imm_args.n = mcc_arg_0; imm_args.res = mcc_arg_1; imm_copy_dimensions[0].size = 1 * sizeof(int); imm_copy_dimensions[0].lower_bound = 0 * sizeof(int); imm_copy_dimensions[0].accessed_length = (0 - 0 + 1) * sizeof(int); imm_copy_data[0].sharing = NANOS_SHARED; imm_copy_data[0].address = (void *)mcc_arg_1; imm_copy_data[0].flags.input = 0; imm_copy_data[0].flags.output = 1; imm_copy_data[0].dimension_count = (short int)1; imm_copy_data[0].dimensions = &imm_copy_dimensions[0]; imm_copy_data[0].offset = 0; err = nanos_create_wd_and_run_compact(&nanos_wd_const_data.base, &nanos_wd_dyn_props, sizeof(struct nanos_args_0_t), &imm_args, 1, dependences, imm_copy_data, imm_copy_dimensions, (void (*)(void *, nanos_wd_t))nanos_xlate_fun_fibompmemoc_0); if (err != NANOS_OK) { nanos_handle_error(err); } } } } } }
int main(int argc, char * argv[]) { int * * l_array_of_arrays; int * l_partial_sums; int l_num_procs; int l_total; int l_i, l_j; if (argc != 2) { printf("Usage: %s number_of_processors\n", argv[0]); return 0; } l_num_procs = atoi(argv[1]); if (l_num_procs < 1 && l_num_procs > 16) { printf("The number of processors must be between 1 and 16\n"); return 0; } l_partial_sums = (int *) malloc(l_num_procs * sizeof(int)); l_array_of_arrays = (int **) malloc(l_num_procs * sizeof(int *)); for (l_i = 0; l_i < l_num_procs; l_i++) { l_array_of_arrays[l_i] = (int *) malloc(16834 * sizeof(int)); for (l_j = 0; l_j < 16834; l_j++) { if ((l_j % 2) == 0) l_array_of_arrays[l_i][l_j] = 1; else l_array_of_arrays[l_i][l_j] = 0; } } for (l_i = 0; l_i < l_num_procs; l_i++) { { nanos_smp_args_t _ol_main_0_smp_args = { (void (*)(void *)) _smp__ol_main_0 }; _nx_data_env_0_t * ol_args = (_nx_data_env_0_t *) 0; nanos_wd_t wd = (nanos_wd_t) 0; const_data1.data_alignment = __alignof__(_nx_data_env_0_t); const_data1.devices[0].arg = &_ol_main_0_smp_args; nanos_wd_dyn_props_t dyn_data1 = { 0 }; nanos_err_t err; err = nanos_create_wd_compact(&wd, (nanos_const_wd_definition_t *) &const_data1, &dyn_data1, sizeof(_nx_data_env_0_t), (void **) &ol_args, nanos_current_wd(), (nanos_copy_data_t **) 0, NULL); if (err != NANOS_OK) nanos_handle_error(err); if (wd != (nanos_wd_t) 0) { ol_args->l_array_of_arrays_0 = l_array_of_arrays; ol_args->l_partial_sums_0 = l_partial_sums; ol_args->l_i_0 = l_i; err = nanos_submit(wd, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0); if (err != NANOS_OK) nanos_handle_error(err); } else { _nx_data_env_0_t imm_args; imm_args.l_array_of_arrays_0 = l_array_of_arrays; imm_args.l_partial_sums_0 = l_partial_sums; imm_args.l_i_0 = l_i; err = nanos_create_wd_and_run_compact((nanos_const_wd_definition_t *) &const_data1, &dyn_data1, sizeof(_nx_data_env_0_t), &imm_args, 0, (nanos_data_access_t *) 0, (nanos_copy_data_t *) 0, 0, NULL); if (err != NANOS_OK) nanos_handle_error(err); } } } nanos_wg_wait_completion( nanos_current_wd(), 0 ); l_total = 0; for (l_i = 0; l_i < l_num_procs; l_i++) { printf("%d -> %d\n", l_i, l_partial_sums[l_i]); l_total += l_partial_sums[l_i]; } printf("Result = %d\n", l_total); return 0; }
bool single_dependency() { int my_value; int * dep_addr = &my_value; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0}; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; nanos_wd_t wd1=0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); my_args *args2=0; nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0}; nanos_wd_t wd2 = 0; nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) ); args2->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) ); NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); return (my_value == 1); } bool single_inout_chain() { int i; int my_value; int * dep_addr = &my_value; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0}; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; nanos_wd_t wd1=0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); for ( i = 0; i < 100; i++ ) { my_args *args2=0; nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0}; nanos_wd_t wd2 = 0; nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) ); args2->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) ); } NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); return (my_value == 100); } bool multiple_inout_chains() { int i, j; int size = 10; int my_value[size]; for ( i = 0; i < size; i++ ) { int * dep_addr = &my_value[i]; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0}; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; nanos_wd_t wd1=0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); for ( j = 0; j < size; j++ ) { my_args *args2=0; nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0}; nanos_wd_t wd2 = 0; nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) ); args2->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) ); } } NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( i = 0; i < size; i++ ) { if ( my_value[i] != size ) return false; } return true; } bool multiple_predecessors() { int j; int size=100; int my_value[size]; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; for ( j = 0; j < size; j++ ) { int * dep_addr1 = &my_value[j]; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr1; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } nanos_dependence_t deps2[size]; int *dep_addr2[size]; my_args *args2=0; for ( j = 0; j < size; j++ ) { dep_addr2[j] = &my_value[j]; deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,1,0,0},0}; } nanos_wd_t wd2=0; nanos_device_t test_devices_3[1] = { NANOS_SMP_DESC( test_device_arg_3) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_3, sizeof(my_args)*size, __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) ); for ( j = 0; j < size; j++) args2[j].p_i = dep_addr2[j]; NANOS_SAFE( nanos_submit( wd2,size,&deps2[0],0 ) ); NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( j = 0; j < size; j++ ) { if ( my_value[j] != 1 ) return false; } return true; } bool multiple_antidependencies() { int j; int my_value=1500; int my_reslt[100]; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; for ( j = 0; j < 100; j++ ) { int * dep_addr1 = &my_value; int * reslt_addr =&my_reslt[j]; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {1,0,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_4[1] = { NANOS_SMP_DESC( test_device_arg_4 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_4, sizeof(my_args)*2, __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1[0].p_i = dep_addr1; args1[1].p_i = reslt_addr; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } int *dep_addr2 = &my_value; nanos_dependence_t deps2 = (nanos_dependence_t){(void **) &dep_addr2,0, {1,1,0,0},0}; my_args *args2=0; nanos_wd_t wd2=0; nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) ); args2->p_i = dep_addr2; NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) ); NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( j = 0; j < 100; j++ ) { if ( my_reslt[j] != 1500 ) return false; } if (my_value != 1501) return false; return true; } bool out_dep_chain() { int i; int my_value; int * dep_addr = &my_value; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; for ( i = 0; i < 100; i++ ) { my_args *args2=0; nanos_dependence_t deps2 = {(void **)&dep_addr,0, {0,1,0,0}, 0}; nanos_wd_t wd2 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) }; NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_1, sizeof(my_args), __alignof__(my_args),(void**)&args2, nanos_current_wd(), &props, 0, NULL) ); args2->p_i = dep_addr; NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) ); } int input=500; int * input_addr = &input; nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0}; my_args *args1=0; nanos_wd_t wd1=0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_4) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args)*2, __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1[0].p_i = input_addr; args1[1].p_i = dep_addr; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); return (my_value == 500); } bool wait_on_test() { int j; int size=10; int my_value[size]; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; for ( j = 0; j < size; j++ ) { my_value[j] = 500; int * dep_addr1 = &my_value[j]; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr1; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } nanos_dependence_t deps2[size]; int *dep_addr2[size]; for ( j = 0; j < size; j++ ) { dep_addr2[j] = &my_value[j]; deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,0,0,0},0}; } NANOS_SAFE( nanos_wait_on( size, &deps2[0] )); for ( j = 0; j < size; j++ ) { if ( my_value[j] != 0 ) return false; } return true; } bool create_and_run_test() { int j; int my_value[100]; int other_value=0; nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; for ( j = 0; j < 100; j++ ) { my_value[j] = 500; int * dep_addr1 = &my_value[j]; my_args *args1=0; nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = dep_addr1; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } nanos_dependence_t deps2[100]; int *dep_addr2[100]; for ( j = 0; j < 100; j++ ) { dep_addr2[j] = &my_value[j]; deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,0,0,0},0}; } my_args arg; arg.p_i = &other_value; nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_1 ) }; NANOS_SAFE( nanos_create_wd_and_run( 1, test_devices_2, sizeof(my_args), __alignof__(my_args), (void *)&arg, 100, &deps2[0], &props , 0, NULL, NULL ) ); for ( j = 0; j < 100; j++ ) { if ( my_value[j] != 0 ) return false; } return true; } // Test commutative tasks, this test creates a task with an inout dependency on an array an then // a bunch of commutative (reduction) tasks that update it. Finally it waits for them all to finish and // checks the result bool commutative_task_1() { int i, j; int size = 100; int my_value[size]; int *value_ref = (int *)&my_value; for ( i = 0; i < size; i++ ) { my_value[i] = 0; } nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = size; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); for ( j = 0; j < size; j++ ) { my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( j = 0; j < 100; j++ ) { if ( my_value[j] != 2 ) return false; } return true; } // Test commutative tasks, this test creates a task with an inout dependency on an array an then // a bunch of commutative (reduction) tasks that update it. Then, another set of tasks are successors // of the commutative ones. This checks that the commutation task behaves correctly bool commutative_task_2() { int i, j; int size = 100; int my_value[size]; int *value_ref = (int *)&my_value; int my_results[size]; for ( i = 0; i < size; i++ ) { my_value[i] = 0; my_results[i] = 0; } nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = size; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); for ( j = 0; j < size; j++ ) { my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } for ( j = 0; j < size; j++ ) { my_args3 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_7 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args3), __alignof__(my_args3), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->p_result = &my_results[j]; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( j = 0; j < size; j++ ) { if ( my_results[j] < 0 ) return false; } return true; } // Test commutative tasks, this test creates a task with an inout dependency on an array an then // a bunch of tasks that read the dependency, then, again, a bunch of commutative (reduction) tasks // that update it. Then, another set of tasks are successors // of the commutative ones. This checks that the commutation task behaves correctly bool commutative_task_3() { int i, j; int size = 100; int my_value[size]; int *value_ref = (int *)&my_value; int my_results[size]; for ( i = 0; i < size; i++ ) { my_value[i] = 0; my_results[i] = 0; } nanos_wd_props_t props = { .mandatory_creation = true, .tied = false, .tie_to = false, }; my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = size; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); for ( j = 0; j < size; j++ ) { my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_8 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } for ( j = 0; j < size; j++ ) { my_args2 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } for ( j = 0; j < size; j++ ) { my_args3 *args1=0; nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0}; nanos_wd_t wd1 = 0; nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_7 ) }; NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args3), __alignof__(my_args3), (void**)&args1, nanos_current_wd(), &props, 0, NULL) ); args1->p_i = my_value; args1->p_result = &my_results[j]; args1->index = j; NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) ); } NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) ); for ( j = 0; j < size; j++ ) { if ( my_results[j] < 0 ) return false; } return true; } int main ( int argc, char **argv ) { printf("Single dependency test... \n"); fflush(stdout); if ( single_dependency() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); } printf("Single inout chain test... \n"); fflush(stdout); if ( single_inout_chain() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); } printf("Multiple inout chains test... \n"); fflush(stdout); if ( multiple_inout_chains() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("task with multiple predecessors... \n"); fflush(stdout); if ( multiple_predecessors() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("task with multiple anti-dependencies... \n"); fflush(stdout); if ( multiple_antidependencies() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("Out dependencies chain... \n"); fflush(stdout); if ( out_dep_chain() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("Wait on test...\n"); fflush(stdout); if ( wait_on_test() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("create and run test...\n"); fflush(stdout); if ( create_and_run_test() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("commutative tasks test...\n"); fflush(stdout); if ( commutative_task_1() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("commutative tasks 2 test...\n"); fflush(stdout); if ( commutative_task_2() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } printf("commutative tasks 3 test...\n"); fflush(stdout); if ( commutative_task_3() ) { printf("PASS\n"); fflush(stdout); } else { printf("FAIL\n"); fflush(stdout); return 1; } return 0; }
int main(int argc, char **argv) { int i; int a[16]; int it; int rv = 0; for (i = 0; i < 16; i++) { a[i] = 0; } for (it = 0; it < 4; it++) { { nanos_err_t nanos_err; nanos_wd_dyn_props_t dyn_props; unsigned int nth_i; struct nanos_args_1_t imm_args; nanos_data_access_t dependences[1]; static nanos_smp_args_t smp_ol_main_1_args = {.outline = (void (*)(void *))(void (*)(struct nanos_args_1_t *))&smp_ol_main_1}; static struct nanos_const_wd_definition_1 nanos_wd_const_data = {.base = {.props = {.mandatory_creation = 1, .tied = 1, .clear_chunk = 0, .reserved0 = 0, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, .reserved4 = 0}, .data_alignment = __alignof__(struct nanos_args_1_t), .num_copies = 0, .num_devices = 1, .num_dimensions = 0, .description = 0}, .devices = {[0] = {.factory = &nanos_smp_factory, .arg = &smp_ol_main_1_args}}}; unsigned int nanos_num_threads = nanos_omp_get_num_threads_next_parallel(0); nanos_team_t nanos_team = (void *)0; nanos_thread_t nanos_team_threads[nanos_num_threads]; nanos_err = nanos_create_team(&nanos_team, (void *)0, &nanos_num_threads, (nanos_constraint_t *)0, 1, nanos_team_threads, &nanos_wd_const_data.base); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } dyn_props.tie_to = (void *)0; dyn_props.priority = 0; dyn_props.flags.is_final = 0; for (nth_i = 1; nth_i < nanos_num_threads; nth_i = nth_i + 1) { dyn_props.tie_to = nanos_team_threads[nth_i]; struct nanos_args_1_t *ol_args = 0; nanos_wd_t nanos_wd_ = (void *)0; nanos_err = nanos_create_wd_compact(&nanos_wd_, &nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), (void **)&ol_args, nanos_current_wd(), (nanos_copy_data_t **)0, (nanos_region_dimension_internal_t **)0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } (*ol_args).i = &i; (*ol_args).a = &a; nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *)0, (void *)0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } } dyn_props.tie_to = nanos_team_threads[0]; imm_args.i = &i; imm_args.a = &a; nanos_err = nanos_create_wd_and_run_compact(&nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), &imm_args, 0, dependences, (nanos_copy_data_t *)0, (nanos_region_dimension_internal_t *)0, (void (*)(void *, nanos_wd_t))0); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } nanos_err = nanos_end_team(nanos_team); if (nanos_err != NANOS_OK) { nanos_handle_error(nanos_err); } } }
void NANOS_parallel( void ( * func ) ( void * ), void * data, unsigned numThreads, long data_size, long ( *get_data_align )( void ), void * ( * get_empty_data )( void ), void ( * init_func ) ( void *, void * ) ) { nanos_err_t err; // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data) int num_copies = 0; // TODO Compute dimensions int num_dimensions = 0; // Compute device descriptor (at the moment, only SMP is supported) int num_devices = 1; // TODO No dependencies for parallel construct in SMP devices int num_data_accesses = 0; nanos_data_access_t dependences[1]; // Create the Device descriptor (at the moment, only SMP is supported) nanos_smp_args_t _smp_args = { func }; char * parallel_name; asprintf( ¶llel_name, "parallel_%d", parallel_id++ ); struct nanos_const_wd_definition nanos_wd_const_data = { { { 1, // mandatory creation 1, // tied 0, 0, 0, 0, 0, 0 }, // properties ( *get_data_align )( ), // data alignment num_copies, num_devices, num_dimensions, parallel_name // description }, { { &nanos_smp_factory, // device description &_smp_args } // outlined function } }; // Compute properties of the WD: mandatory creation, priority, tiedness, real-time info and copy declarations nanos_wd_dyn_props_t dyn_props; dyn_props.tie_to = ( void * ) 0; dyn_props.priority = 0; dyn_props.flags.is_final = 0; // Create the working team if( numThreads == 0 ) numThreads = nanos_omp_get_num_threads_next_parallel( 0 ); void * nanos_team = ( void * ) 0; const unsigned int nthreads_vla = numThreads; void * team_threads[nthreads_vla]; err = nanos_create_team( &nanos_team, ( void * ) 0, &numThreads, (nanos_constraint_t *) 0, /*reuse current*/ 1, team_threads ); if( err != NANOS_OK ) nanos_handle_error( err ); // Create a wd tied to each thread unsigned nth_i; for( nth_i = 1; nth_i < numThreads; nth_i++ ) { // Set properties to the current wd of the team dyn_props.tie_to = team_threads[nth_i]; // Create the current WD of the team void * empty_data = ( *get_empty_data )( ); void * wd = ( void * ) 0; err = nanos_create_wd_compact( &wd, &nanos_wd_const_data.base, &dyn_props, data_size, ( void** ) &empty_data, nanos_current_wd( ), ( nanos_copy_data_t ** ) 0, ( nanos_region_dimension_internal_t ** ) 0 ); if (err != NANOS_OK) nanos_handle_error(err); // Initialize outlined data ( *init_func )( empty_data, data ); // Submit work to the WD err = nanos_submit( wd, num_data_accesses, ( nanos_data_access_t * ) 0, ( void * ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); } // Create the wd for the master thread, which will run the team dyn_props.tie_to = team_threads[0]; err = nanos_create_wd_and_run_compact( &nanos_wd_const_data.base, &dyn_props, data_size, data, num_data_accesses, dependences, ( nanos_copy_data_t * ) 0, ( nanos_region_dimension_internal_t * ) 0, ( void ( * )( void *, void * ) ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); // End the team err = nanos_end_team( nanos_team ); if( err != NANOS_OK ) nanos_handle_error( err ); }
static void NANOS_worksharing( int lb, int ub, int step, int chunk, char * description, void ( * func ) ( void * data, nanos_ws_desc_t * wsd ), void * data, long data_size, long ( * get_data_align )( void ), void * empty_data, void ( * init_func ) ( void *, void * ), void * ws_policy, bool wait ) { nanos_err_t err; // Create the Worksharing bool single_guard; nanos_ws_desc_t * wsd; nanos_ws_info_loop_t ws_info_loop; ws_info_loop.lower_bound = lb; ws_info_loop.upper_bound = ub; ws_info_loop.loop_step = step; ws_info_loop.chunk_size = chunk; err = nanos_worksharing_create( &wsd, ws_policy, ( void ** ) &ws_info_loop, &single_guard ); if( err != NANOS_OK ) nanos_handle_error( err ); if( single_guard ) { int sup_threads; err = nanos_team_get_num_supporting_threads( &sup_threads ); if( err != NANOS_OK ) nanos_handle_error( err ); if( sup_threads > 0 ) { // Configure the Worksahring err = nanos_malloc( ( void ** ) &( *wsd ).threads, sizeof( void * ) * sup_threads, /*filename*/"", /*fileline*/0 ); if( err != NANOS_OK ) nanos_handle_error( err ); err = nanos_team_get_supporting_threads( &( *wsd ).nths, ( *wsd ).threads ); if( err != NANOS_OK ) nanos_handle_error( err ); // Create the WD and its properties void * wd = ( void * ) 0; nanos_wd_dyn_props_t props; props.tie_to = ( void * ) 0; props.priority = 0; props.flags.is_final = 0; // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data) int num_copies = 0; // Compute dependencies (ROSE is not currently supporting dependencies among the tasks) int num_data_accesses = 0; // TODO Compute dimensions int num_dimensions = 0; // Compute device descriptor (at the moment, only SMP is supported) int num_devices = 1; // Create the slicer nanos_smp_args_t _smp_args = { func }; struct nanos_const_wd_definition nanos_wd_const_data = { { { 1, // mandatory creation 1, // tied 0, 0, 0, 0, 0, 0 }, // properties ( *get_data_align )( ), // data alignment num_copies, num_devices, num_dimensions, description // description }, { { &nanos_smp_factory, // device description &_smp_args } // outlined function } }; void * slicer = nanos_find_slicer( "replicate" ); if( slicer == (void *)0 ) nanos_handle_error( NANOS_UNIMPLEMENTED ); struct sections_data_t* empty_data = ( struct sections_data_t * ) 0; err = nanos_create_sliced_wd( &wd, nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, data_size, nanos_wd_const_data.base.data_alignment, ( void ** ) &empty_data, ( void ** ) 0, slicer, &nanos_wd_const_data.base.props, &props, num_copies, ( nanos_copy_data_t ** ) 0, num_dimensions, ( nanos_region_dimension_internal_t ** ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); // Initialize outlined data ( *init_func )( empty_data, data ); // Submit the work to the runtime system err = nanos_submit( wd, num_data_accesses, ( nanos_data_access_t * ) 0, ( nanos_team_t ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); err = nanos_free( ( * wsd ).threads ); if( err != NANOS_OK ) nanos_handle_error( err ); } } ( * func )( data, wsd ); // Wait in case it is necessary if( wait ) { err = nanos_omp_barrier( ); if( err != NANOS_OK ) nanos_handle_error( err ); } }
void NANOS_task( void ( * func ) ( void * ), void *data, long data_size, long ( * get_data_align ) ( void ), void * empty_data, void ( * init_func ) ( void *, void * ), bool if_clause, unsigned untied, int num_deps, int * deps_dir, void ** deps_data, int * deps_n_dims, nanos_region_dimension_t ** deps_dims, long int * deps_offset ) { nanos_err_t err; bool nanos_is_in_final; err = nanos_in_final( &nanos_is_in_final ); if( nanos_is_in_final ) { ( *func )( data ); } else { // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data) int num_copies = 0; // TODO Compute dimensions (for devices other than SMP) int num_dimensions = 0; // Compute device descriptor (at the moment, only SMP is supported) int num_devices = 1; // Compute dependencies const unsigned int num_data_accesses = num_deps; nanos_data_access_t dependences[num_data_accesses]; int i; for( i = 0; i < num_data_accesses; ++i ) { int in = ( deps_dir[i] & ( e_dep_dir_in | e_dep_dir_inout ) ); int out = ( deps_dir[i] & ( e_dep_dir_out | e_dep_dir_inout ) ); nanos_access_type_internal_t flags = { ( in != 0 ), // input ( out != 0 ), // output 0 , // can rename 0 , // concurrent 0 , // commutative }; nanos_data_access_t dep = { deps_data[i], flags, deps_n_dims[i], deps_dims[i], deps_offset[i] }; dependences[i] = dep; } // Create the Device descriptor (at the moment, only SMP is supported) nanos_smp_args_t _smp_args = { func }; char * task_name; asprintf( &task_name, "task_%d", task_id++ ); struct nanos_const_wd_definition nanos_wd_const_data = { { { 0, // mandatory creation !untied, // tied 0, 0, 0, 0, 0, 0 }, // properties ( *get_data_align )( ), // data alignment num_copies, num_devices, num_dimensions, task_name // description }, { { &nanos_smp_factory, // device description &_smp_args } // outlined function } }; // Compute properties of the WD: mandatory creation, priority, tiedness, real-time info and copy declarations nanos_wd_dyn_props_t dyn_props; dyn_props.tie_to = 0; dyn_props.priority = 0; dyn_props.flags.is_final = 0; // Create the WD nanos_wd_t wd = (nanos_wd_t) 0; err = nanos_create_wd_compact( &wd, &nanos_wd_const_data.base, &dyn_props, data_size, ( void ** ) &empty_data, nanos_current_wd( ), ( nanos_copy_data_t ** ) 0, ( nanos_region_dimension_internal_t ** ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); if( wd != ( void * ) 0 ) { // Submit the task to the existing actual working group // Initialize outlined data ( *init_func )( empty_data, data ); err = nanos_submit( wd, num_data_accesses, dependences, ( void * ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); } else { // The task must be run immediately err = nanos_create_wd_and_run_compact( &nanos_wd_const_data.base, &dyn_props, data_size, data, num_data_accesses, dependences, ( nanos_copy_data_t * ) 0, ( nanos_region_dimension_internal_t * ) 0, ( void ( * )( void *, void * ) ) 0 ); if( err != NANOS_OK ) nanos_handle_error( err ); } } }