Beispiel #1
0
int main ( int argc, char **argv )
{
   char text[10] = "123456789";
   char text2[10] = "987654321";
   char* dummy1 = text;
   
   my_args* args = 0;
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   nanos_copy_data_t *cd = 0;

   nanos_wd_t wd1=0;
   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args, nanos_current_wd(), &props, 2, &cd) );

   args->a = 1;
   args->b = dummy1;

   cd[0] = (nanos_copy_data_t) {(uint64_t)&(args->a), NANOS_PRIVATE, {true, false}, sizeof(args->a)};
   cd[1] = (nanos_copy_data_t) {(uint64_t)args->b, NANOS_SHARED, {true, true}, sizeof(char)*10}; 

   NANOS_SAFE( nanos_submit( wd1,0,0,0 ) );

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );

   if ( strcmp( text2, dummy1 ) == 0 ) {
      printf( "Checking for copy-back correctness...  PASS\n" );
   } else {
      printf( "Checking for copy-back correctness...  FAIL\n" );
      printf( "expecting '%s', copied back: '%s'\n", text2, dummy1 );
      return 1;
   }

   return 0;
}
void fib(int n, int *res)
{
  if (n < 2)
    {
      *res = n;
    }
  else
    {
      int res1 = 0;
      int res2 = 0;
      {
        int mcc_arg_0 = n - 1;
        int *mcc_arg_1 = &res1;
        {
          _Bool mcc_is_in_final;
          nanos_err_t mcc_err_in_final = nanos_in_final(&mcc_is_in_final);
          if (mcc_err_in_final != NANOS_OK)
            {
              nanos_handle_error(mcc_err_in_final);
            }
          if (mcc_is_in_final)
            {
              fib_mcc_serial(n - 1, &res1);
            }
          else
            {
              {
                nanos_wd_dyn_props_t nanos_wd_dyn_props;
                int memo_dimensions[1];
                struct nanos_args_0_t *ol_args;
                nanos_err_t err;
                struct nanos_args_0_t imm_args;
                static nanos_smp_args_t smp_ol_fib_1_args = {.outline = (void (*)(void *))(void (*)(struct nanos_args_0_t *))&smp_ol_fib_1};
                static struct nanos_const_wd_definition_1 nanos_wd_const_data = {.base = {.props = {.mandatory_creation = 0, .tied = 0, .clear_chunk = 0, .reserved0 = 0, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, .reserved4 = 0}, .data_alignment = __alignof__(struct nanos_args_0_t), .num_copies = 1, .num_devices = 1, .num_dimensions = 1, .description = 0}, .devices = {[0] = {.factory = &nanos_smp_factory, .arg = &smp_ol_fib_1_args}}};
                nanos_wd_dyn_props.tie_to = 0;
                nanos_wd_dyn_props.priority = 0;
                nanos_wd_dyn_props.flags.is_final = 1;
                memo_dimensions[0] = mcc_arg_0 + 1;
                nanos_wd_dyn_props.memo.num_dimensions = 1;
                nanos_wd_dyn_props.memo.dimensions = memo_dimensions;
                ol_args = (struct nanos_args_0_t *)0;
                nanos_wd_t nanos_wd_ = (void *)0;
                nanos_copy_data_t *ol_copy_data = (nanos_copy_data_t *)0;
                nanos_region_dimension_internal_t *ol_copy_dimensions = (nanos_region_dimension_internal_t *)0;
                err = nanos_create_wd_compact(&nanos_wd_, &nanos_wd_const_data.base, &nanos_wd_dyn_props, sizeof(struct nanos_args_0_t), (void **)&ol_args, nanos_current_wd(), &ol_copy_data, &ol_copy_dimensions);
                if (err != NANOS_OK)
                  {
                    nanos_handle_error(err);
                  }
                nanos_region_dimension_t dimensions_0[1] = {[0] = {.size = sizeof(int), .lower_bound = 0, .accessed_length = sizeof(int)}};
                nanos_data_access_t dependences[1] = {[0] = {.address = (void *)mcc_arg_1, .flags = {.input = 0, .output = 1, .can_rename = 0, .concurrent = 0, .commutative = 0}, .dimension_count = (short int)1, .dimensions = dimensions_0, .offset = 0}};
                ;
                if (nanos_wd_ != (void *)0)
                  {
                    (*ol_args).n = mcc_arg_0;
                    (*ol_args).res = mcc_arg_1;
                    ol_copy_dimensions[0].size = 1 * sizeof(int);
                    ol_copy_dimensions[0].lower_bound = 0 * sizeof(int);
                    ol_copy_dimensions[0].accessed_length = (0 - 0 + 1) * sizeof(int);
                    ol_copy_data[0].sharing = NANOS_SHARED;
                    ol_copy_data[0].address = (void *)mcc_arg_1;
                    ol_copy_data[0].flags.input = 0;
                    ol_copy_data[0].flags.output = 1;
                    ol_copy_data[0].dimension_count = (short int)1;
                    ol_copy_data[0].dimensions = &ol_copy_dimensions[0];
                    ol_copy_data[0].offset = 0;
                    err = nanos_set_translate_function(nanos_wd_, (void (*)(void *, nanos_wd_t))nanos_xlate_fun_fibompmemoc_0);
                    if (err != NANOS_OK)
                      {
                        nanos_handle_error(err);
                      }
                    err = nanos_submit(nanos_wd_, 1, dependences, (void *)0);
                    if (err != NANOS_OK)
                      {
                        nanos_handle_error(err);
                      }
                  }
                else
                  {
                    nanos_region_dimension_internal_t imm_copy_dimensions[1];
                    nanos_copy_data_t imm_copy_data[1];
                    imm_args.n = mcc_arg_0;
                    imm_args.res = mcc_arg_1;
                    imm_copy_dimensions[0].size = 1 * sizeof(int);
                    imm_copy_dimensions[0].lower_bound = 0 * sizeof(int);
                    imm_copy_dimensions[0].accessed_length = (0 - 0 + 1) * sizeof(int);
                    imm_copy_data[0].sharing = NANOS_SHARED;
                    imm_copy_data[0].address = (void *)mcc_arg_1;
                    imm_copy_data[0].flags.input = 0;
                    imm_copy_data[0].flags.output = 1;
                    imm_copy_data[0].dimension_count = (short int)1;
                    imm_copy_data[0].dimensions = &imm_copy_dimensions[0];
                    imm_copy_data[0].offset = 0;
                    err = nanos_create_wd_and_run_compact(&nanos_wd_const_data.base, &nanos_wd_dyn_props, sizeof(struct nanos_args_0_t), &imm_args, 1, dependences, imm_copy_data, imm_copy_dimensions, (void (*)(void *, nanos_wd_t))nanos_xlate_fun_fibompmemoc_0);
                    if (err != NANOS_OK)
                      {
                        nanos_handle_error(err);
                      }
                  }
              }
            }
        }
      }
int main(int argc, char * argv[])
{
    int * * l_array_of_arrays;
    int * l_partial_sums;
    int l_num_procs;
    int l_total;
    int l_i, l_j;
    if (argc != 2)
    {
        printf("Usage: %s number_of_processors\n", argv[0]);
        return 0;
    }
    l_num_procs = atoi(argv[1]);
    if (l_num_procs < 1 && l_num_procs > 16)
    {
        printf("The number of processors must be between 1 and 16\n");
        return 0;
    }
    l_partial_sums = (int *) malloc(l_num_procs * sizeof(int));
    l_array_of_arrays = (int **) malloc(l_num_procs * sizeof(int *));
    for (l_i = 0;
        l_i < l_num_procs;
        l_i++)
    {
        l_array_of_arrays[l_i] = (int *) malloc(16834 * sizeof(int));
        for (l_j = 0;
            l_j < 16834;
            l_j++)
        {
            if ((l_j % 2) == 0)
                l_array_of_arrays[l_i][l_j] = 1;
            else
                l_array_of_arrays[l_i][l_j] = 0;
        }
    }
    for (l_i = 0;
        l_i < l_num_procs;
        l_i++)
    {
        {
            nanos_smp_args_t _ol_main_0_smp_args = {
                (void (*)(void *)) _smp__ol_main_0
            };
            _nx_data_env_0_t * ol_args = (_nx_data_env_0_t *) 0;
            nanos_wd_t wd = (nanos_wd_t) 0;
            const_data1.data_alignment = __alignof__(_nx_data_env_0_t);
            const_data1.devices[0].arg = &_ol_main_0_smp_args;
            nanos_wd_dyn_props_t dyn_data1 = { 0 };
            nanos_err_t err;
            err = nanos_create_wd_compact(&wd, (nanos_const_wd_definition_t *) &const_data1, &dyn_data1, sizeof(_nx_data_env_0_t), (void **) &ol_args, nanos_current_wd(), (nanos_copy_data_t **) 0, NULL);
            if (err != NANOS_OK)
                nanos_handle_error(err);
            if (wd != (nanos_wd_t) 0)
            {
                ol_args->l_array_of_arrays_0 = l_array_of_arrays;
                ol_args->l_partial_sums_0 = l_partial_sums;
                ol_args->l_i_0 = l_i;
                err = nanos_submit(wd, 0, (nanos_data_access_t *) 0, (nanos_team_t) 0);
                if (err != NANOS_OK)
                    nanos_handle_error(err);
            }
            else
            {
                _nx_data_env_0_t imm_args;
                imm_args.l_array_of_arrays_0 = l_array_of_arrays;
                imm_args.l_partial_sums_0 = l_partial_sums;
                imm_args.l_i_0 = l_i;
                err = nanos_create_wd_and_run_compact((nanos_const_wd_definition_t *) &const_data1, &dyn_data1,  sizeof(_nx_data_env_0_t),
                       &imm_args, 0, (nanos_data_access_t *) 0, (nanos_copy_data_t *) 0, 0, NULL);
                if (err != NANOS_OK)
                    nanos_handle_error(err);
            }
        }
    }
    nanos_wg_wait_completion( nanos_current_wd(), 0 );
    l_total = 0;
    for (l_i = 0;
        l_i < l_num_procs;
        l_i++)
    {
        printf("%d -> %d\n", l_i, l_partial_sums[l_i]);
        l_total += l_partial_sums[l_i];
    }
    printf("Result = %d\n", l_total);
    return 0;
}
bool single_dependency()
{
   int my_value;
   int * dep_addr = &my_value;
   my_args *args1=0;
   nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0};
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };
   nanos_wd_t wd1=0;
   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1->p_i = dep_addr;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   my_args *args2=0;
   nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0};
   nanos_wd_t wd2 = 0;
   nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) };
   NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) );
   args2->p_i = dep_addr;
   NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) );

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   
   return (my_value == 1);
}

bool single_inout_chain()
{
   int i;
   int my_value;
   int * dep_addr = &my_value;
   my_args *args1=0;
   nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0};
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };
   nanos_wd_t wd1=0;
   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1->p_i = dep_addr;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   for ( i = 0; i < 100; i++ ) {
      my_args *args2=0;
      nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0};
      nanos_wd_t wd2 = 0;
      nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) };
      NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) );
      args2->p_i = dep_addr;
      NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) );
   }

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   
   return (my_value == 100);
}

bool multiple_inout_chains()
{
   int i, j;
   int size = 10;
   int my_value[size];

   for ( i = 0; i < size; i++ ) {
      int * dep_addr = &my_value[i];
      my_args *args1=0;
      nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0};
      nanos_wd_props_t props = {
        .mandatory_creation = true,
        .tied = false,
        .tie_to = false,
      };
      nanos_wd_t wd1=0;
      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = dep_addr;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

      for ( j = 0; j < size; j++ ) {
         my_args *args2=0;
         nanos_dependence_t deps2 = {(void **)&dep_addr,0, {1,1,0,0}, 0};
         nanos_wd_t wd2 = 0;
         nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2 ) };
         NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) );
         args2->p_i = dep_addr;
         NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) );
      }
   }

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );

   for ( i = 0; i < size; i++ ) {
      if ( my_value[i] != size ) return false;
   }
   return true;
}

bool multiple_predecessors()
{
   int j;
   int size=100;
   int my_value[size];
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   for ( j = 0; j < size; j++ ) {
      int * dep_addr1 = &my_value[j];
      my_args *args1=0;
      nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = dep_addr1;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   nanos_dependence_t deps2[size];
   int *dep_addr2[size];
   my_args *args2=0;
   for ( j = 0; j < size; j++ ) {
      dep_addr2[j] = &my_value[j];
      deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,1,0,0},0};
   }

   nanos_wd_t wd2=0;
   nanos_device_t test_devices_3[1] = { NANOS_SMP_DESC( test_device_arg_3) };
   NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_3, sizeof(my_args)*size, __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) );
   for ( j = 0; j < size; j++)
      args2[j].p_i = dep_addr2[j];
   NANOS_SAFE( nanos_submit( wd2,size,&deps2[0],0 ) );

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   for ( j = 0; j < size; j++ ) {
      if ( my_value[j] != 1 ) return false;
   }
   return true;
}

bool multiple_antidependencies()
{
   int j;
   int my_value=1500;
   int my_reslt[100];
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   for ( j = 0; j < 100; j++ ) {
      int * dep_addr1 = &my_value;
      int * reslt_addr =&my_reslt[j];
      my_args *args1=0;
      nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {1,0,0,0}, 0};

      nanos_wd_t wd1 = 0;
      nanos_device_t test_devices_4[1] = { NANOS_SMP_DESC( test_device_arg_4 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_4, sizeof(my_args)*2, __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1[0].p_i = dep_addr1;
      args1[1].p_i = reslt_addr;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   int *dep_addr2 = &my_value;
   nanos_dependence_t deps2 = (nanos_dependence_t){(void **) &dep_addr2,0, {1,1,0,0},0};
   my_args *args2=0;

   nanos_wd_t wd2=0;
   nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_2) };
   NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_2, sizeof(my_args), __alignof__(my_args), (void**)&args2, nanos_current_wd(), &props, 0, NULL) );
   args2->p_i = dep_addr2;
   NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) );

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   for ( j = 0; j < 100; j++ ) {
      if ( my_reslt[j] != 1500 ) return false;
   }
   if (my_value != 1501) return false;
   return true;
}

bool out_dep_chain()
{
   int i;
   int my_value;
   int * dep_addr = &my_value;
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   for ( i = 0; i < 100; i++ ) {
      my_args *args2=0;
      nanos_dependence_t deps2 = {(void **)&dep_addr,0, {0,1,0,0}, 0};
      nanos_wd_t wd2 = 0;
      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) };
      NANOS_SAFE( nanos_create_wd ( &wd2, 1,test_devices_1, sizeof(my_args), __alignof__(my_args),(void**)&args2, nanos_current_wd(), &props, 0, NULL) );
      args2->p_i = dep_addr;
      NANOS_SAFE( nanos_submit( wd2,1,&deps2,0 ) );
   }

   int input=500;
   int * input_addr = &input;
   nanos_dependence_t deps1 = {(void **)&dep_addr,0, {0,1,0,0}, 0};
   my_args *args1=0;
   nanos_wd_t wd1=0;
   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_4) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args)*2, __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1[0].p_i = input_addr;
   args1[1].p_i = dep_addr;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   
   return (my_value == 500);
}

bool wait_on_test()
{
   int j;
   int size=10;
   int my_value[size];
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   for ( j = 0; j < size; j++ ) {
      my_value[j] = 500;
      int * dep_addr1 = &my_value[j];
      my_args *args1=0;
      nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = dep_addr1;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   nanos_dependence_t deps2[size];
   int *dep_addr2[size];
   for ( j = 0; j < size; j++ ) {
      dep_addr2[j] = &my_value[j];
      deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,0,0,0},0};
   }
   
   NANOS_SAFE( nanos_wait_on( size, &deps2[0] ));

   for ( j = 0; j < size; j++ ) {
    if ( my_value[j] != 0 ) return false;
   }
   return true;
}

bool create_and_run_test()
{
   int j;
   int my_value[100];
   int other_value=0;
   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   for ( j = 0; j < 100; j++ ) {
      my_value[j] = 500;
      int * dep_addr1 = &my_value[j];
      my_args *args1=0;
      nanos_dependence_t deps1 = {(void **)&dep_addr1,0, {0,1,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_1 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args), __alignof__(my_args), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = dep_addr1;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   nanos_dependence_t deps2[100];
   int *dep_addr2[100];
   for ( j = 0; j < 100; j++ ) {
      dep_addr2[j] = &my_value[j];
      deps2[j] = (nanos_dependence_t){(void **) &dep_addr2[j],0, {1,0,0,0},0};
   }

   my_args arg;
   arg.p_i = &other_value;
   nanos_device_t test_devices_2[1] = { NANOS_SMP_DESC( test_device_arg_1 ) };

   NANOS_SAFE( nanos_create_wd_and_run( 1, test_devices_2, sizeof(my_args), __alignof__(my_args),  (void *)&arg, 100, &deps2[0], &props , 0, NULL, NULL ) );

   for ( j = 0; j < 100; j++ ) {
    if ( my_value[j] != 0 ) return false;
   }
   return true;
}

// Test commutative tasks, this test creates a task with an inout dependency on an array an then
// a bunch of commutative (reduction) tasks that update it. Finally it waits for them all to finish and
// checks the result
bool commutative_task_1()
{
   int i, j;
   int size = 100;
   int my_value[size];
   int *value_ref = (int *)&my_value;

   for ( i = 0; i < size; i++ ) {
      my_value[i] = 0;
   }

   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   my_args2 *args1=0;
   nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0};
   nanos_wd_t wd1 = 0;

   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1->p_i = my_value;
   args1->index = size;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   for ( j = 0; j < size; j++ ) {
      my_args2 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );
   for ( j = 0; j < 100; j++ ) {
      if ( my_value[j] != 2 ) return false;
   }
   return true;
}

// Test commutative tasks, this test creates a task with an inout dependency on an array an then
// a bunch of commutative (reduction) tasks that update it. Then, another set of tasks are successors
// of the commutative ones. This checks that the commutation task behaves correctly
bool commutative_task_2()
{
   int i, j;
   int size = 100;
   int my_value[size];
   int *value_ref = (int *)&my_value;
   int my_results[size];

   for ( i = 0; i < size; i++ ) {
      my_value[i] = 0;
      my_results[i] = 0;
   }

   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   my_args2 *args1=0;
   nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0};
   nanos_wd_t wd1 = 0;

   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1->p_i = my_value;
   args1->index = size;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   for ( j = 0; j < size; j++ ) {
      my_args2 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   for ( j = 0; j < size; j++ ) {
      my_args3 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_7 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args3), __alignof__(my_args3), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->p_result = &my_results[j];
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );

   for ( j = 0; j < size; j++ ) {
      if ( my_results[j] < 0 ) return false;
   }
   return true;
}

// Test commutative tasks, this test creates a task with an inout dependency on an array an then
// a bunch of tasks that read the dependency, then, again, a bunch of commutative (reduction) tasks 
// that update it. Then, another set of tasks are successors
// of the commutative ones. This checks that the commutation task behaves correctly
bool commutative_task_3()
{
   int i, j;
   int size = 100;
   int my_value[size];
   int *value_ref = (int *)&my_value;
   int my_results[size];

   for ( i = 0; i < size; i++ ) {
      my_value[i] = 0;
      my_results[i] = 0;
   }

   nanos_wd_props_t props = {
     .mandatory_creation = true,
     .tied = false,
     .tie_to = false,
   };

   my_args2 *args1=0;
   nanos_dependence_t deps1 = {(void **)&value_ref,0, {0,1,0,0}, 0};
   nanos_wd_t wd1 = 0;

   nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_6 ) };
   NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
   args1->p_i = my_value;
   args1->index = size;
   NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );

   for ( j = 0; j < size; j++ ) {
      my_args2 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_8 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   for ( j = 0; j < size; j++ ) {
      my_args2 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,1,0,1}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_5 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args2), __alignof__(my_args2), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   for ( j = 0; j < size; j++ ) {
      my_args3 *args1=0;
      nanos_dependence_t deps1 = {(void **)&value_ref,0, {1,0,0,0}, 0};
      nanos_wd_t wd1 = 0;

      nanos_device_t test_devices_1[1] = { NANOS_SMP_DESC( test_device_arg_7 ) };
      NANOS_SAFE( nanos_create_wd ( &wd1, 1,test_devices_1, sizeof(my_args3), __alignof__(my_args3), (void**)&args1, nanos_current_wd(), &props, 0, NULL) );
      args1->p_i = my_value;
      args1->p_result = &my_results[j];
      args1->index = j;
      NANOS_SAFE( nanos_submit( wd1,1,&deps1,0 ) );
   }

   NANOS_SAFE( nanos_wg_wait_completion( nanos_current_wd() ) );

   for ( j = 0; j < size; j++ ) {
      if ( my_results[j] < 0 ) return false;
   }
   return true;
}



int main ( int argc, char **argv )
{
   printf("Single dependency test... \n");
   fflush(stdout);
   if ( single_dependency() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
   }
   
   printf("Single inout chain test... \n");
   fflush(stdout);
   if ( single_inout_chain() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
   }

   printf("Multiple inout chains test... \n");
   fflush(stdout);
   if ( multiple_inout_chains() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("task with multiple predecessors... \n");
   fflush(stdout);
   if ( multiple_predecessors() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }
   printf("task with multiple anti-dependencies... \n");
   fflush(stdout);
   if ( multiple_antidependencies() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("Out dependencies chain... \n");
   fflush(stdout);
   if ( out_dep_chain() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("Wait on test...\n");
   fflush(stdout);
   if ( wait_on_test() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("create and run test...\n");
   fflush(stdout);
   if ( create_and_run_test() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("commutative tasks test...\n");
   fflush(stdout);
   if ( commutative_task_1() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("commutative tasks 2 test...\n");
   fflush(stdout);
   if ( commutative_task_2() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   printf("commutative tasks 3 test...\n");
   fflush(stdout);
   if ( commutative_task_3() ) {
      printf("PASS\n");
      fflush(stdout);
   } else {
      printf("FAIL\n");
      fflush(stdout);
      return 1;
   }

   return 0;
}
Beispiel #5
0
int main(int argc, char **argv)
{
  int i;
  int a[16];
  int it;
  int rv = 0;
  for (i = 0; i < 16; i++)
    {
      a[i] = 0;
    }
  for (it = 0; it < 4; it++)
    {
      {
        nanos_err_t nanos_err;
        nanos_wd_dyn_props_t dyn_props;
        unsigned int nth_i;
        struct nanos_args_1_t imm_args;
        nanos_data_access_t dependences[1];
        static nanos_smp_args_t smp_ol_main_1_args = {.outline = (void (*)(void *))(void (*)(struct nanos_args_1_t *))&smp_ol_main_1};
        static struct nanos_const_wd_definition_1 nanos_wd_const_data = {.base = {.props = {.mandatory_creation = 1, .tied = 1, .clear_chunk = 0, .reserved0 = 0, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, .reserved4 = 0}, .data_alignment = __alignof__(struct nanos_args_1_t), .num_copies = 0, .num_devices = 1, .num_dimensions = 0, .description = 0}, .devices = {[0] = {.factory = &nanos_smp_factory, .arg = &smp_ol_main_1_args}}};
        unsigned int nanos_num_threads = nanos_omp_get_num_threads_next_parallel(0);
        nanos_team_t nanos_team = (void *)0;
        nanos_thread_t nanos_team_threads[nanos_num_threads];
        nanos_err = nanos_create_team(&nanos_team, (void *)0, &nanos_num_threads, (nanos_constraint_t *)0, 1, nanos_team_threads, &nanos_wd_const_data.base);
        if (nanos_err != NANOS_OK)
          {
            nanos_handle_error(nanos_err);
          }
        dyn_props.tie_to = (void *)0;
        dyn_props.priority = 0;
        dyn_props.flags.is_final = 0;
        for (nth_i = 1; nth_i < nanos_num_threads; nth_i = nth_i + 1)
          {
            dyn_props.tie_to = nanos_team_threads[nth_i];
            struct nanos_args_1_t *ol_args = 0;
            nanos_wd_t nanos_wd_ = (void *)0;
            nanos_err = nanos_create_wd_compact(&nanos_wd_, &nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), (void **)&ol_args, nanos_current_wd(), (nanos_copy_data_t **)0, (nanos_region_dimension_internal_t **)0);
            if (nanos_err != NANOS_OK)
              {
                nanos_handle_error(nanos_err);
              }
            (*ol_args).i = &i;
            (*ol_args).a = &a;
            nanos_err = nanos_submit(nanos_wd_, 0, (nanos_data_access_t *)0, (void *)0);
            if (nanos_err != NANOS_OK)
              {
                nanos_handle_error(nanos_err);
              }
          }
        dyn_props.tie_to = nanos_team_threads[0];
        imm_args.i = &i;
        imm_args.a = &a;
        nanos_err = nanos_create_wd_and_run_compact(&nanos_wd_const_data.base, &dyn_props, sizeof(struct nanos_args_1_t), &imm_args, 0, dependences, (nanos_copy_data_t *)0, (nanos_region_dimension_internal_t *)0, (void (*)(void *, nanos_wd_t))0);
        if (nanos_err != NANOS_OK)
          {
            nanos_handle_error(nanos_err);
          }
        nanos_err = nanos_end_team(nanos_team);
        if (nanos_err != NANOS_OK)
          {
            nanos_handle_error(nanos_err);
          }
      }
    }
Beispiel #6
0
void NANOS_parallel( void ( * func ) ( void * ), void * data, unsigned numThreads, long data_size, long ( *get_data_align )( void ), 
                     void * ( * get_empty_data )( void ), void ( * init_func ) ( void *, void * ) )
{
    nanos_err_t err;
    
    // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data)
    int num_copies = 0;
    // TODO Compute dimensions
    int num_dimensions = 0;
    // Compute device descriptor (at the moment, only SMP is supported)
    int num_devices = 1;
    // TODO No dependencies for parallel construct in SMP devices
    int num_data_accesses = 0;
    nanos_data_access_t dependences[1];
  
    // Create the Device descriptor (at the moment, only SMP is supported)
    nanos_smp_args_t _smp_args = { func };
    char * parallel_name; 
    asprintf( &parallel_name, "parallel_%d", parallel_id++ );
    struct nanos_const_wd_definition nanos_wd_const_data = {
        { { 1,          // mandatory creation
            1,          // tied
            0, 0, 0, 0, 0, 0 },                     // properties 
            ( *get_data_align )( ),                 // data alignment
            num_copies, num_devices, num_dimensions,                            
            parallel_name                           // description
        }, 
        { { &nanos_smp_factory,                     // device description
            &_smp_args }                            // outlined function
        }
    };

    // Compute properties of the WD: mandatory creation, priority, tiedness, real-time info and copy declarations
    nanos_wd_dyn_props_t dyn_props;
    dyn_props.tie_to = ( void * ) 0;
    dyn_props.priority = 0;
    dyn_props.flags.is_final = 0;

    // Create the working team
    if( numThreads == 0 )
        numThreads = nanos_omp_get_num_threads_next_parallel( 0 );
    void * nanos_team = ( void * ) 0;
    const unsigned int nthreads_vla = numThreads;
    void * team_threads[nthreads_vla];
    err = nanos_create_team( &nanos_team, ( void * ) 0, &numThreads,
                             (nanos_constraint_t *) 0, /*reuse current*/ 1, team_threads );
    if( err != NANOS_OK )
        nanos_handle_error( err );
    
    // Create a wd tied to each thread
    unsigned nth_i;
    for( nth_i = 1; nth_i < numThreads; nth_i++ )
    {
        // Set properties to the current wd of the team
        dyn_props.tie_to = team_threads[nth_i];
        
        // Create the current WD of the team
        void * empty_data = ( *get_empty_data )( );
        void * wd = ( void * ) 0;
        err = nanos_create_wd_compact( &wd, &nanos_wd_const_data.base, &dyn_props, 
                                       data_size, ( void** ) &empty_data, 
                                       nanos_current_wd( ), ( nanos_copy_data_t ** ) 0, 
                                       ( nanos_region_dimension_internal_t ** ) 0 );
        if (err != NANOS_OK) 
            nanos_handle_error(err);
        
        // Initialize outlined data
        ( *init_func )( empty_data, data );
    
        // Submit work to the WD
        err = nanos_submit( wd, num_data_accesses, ( nanos_data_access_t * ) 0, ( void * ) 0 );
        if( err != NANOS_OK ) 
            nanos_handle_error( err );
    }

    // Create the wd for the master thread, which will run the team
    dyn_props.tie_to = team_threads[0];
    err = nanos_create_wd_and_run_compact( &nanos_wd_const_data.base, &dyn_props, data_size, data, 
                                           num_data_accesses, dependences, ( nanos_copy_data_t * ) 0,
                                           ( nanos_region_dimension_internal_t * ) 0, 
                                           ( void ( * )( void *, void * ) ) 0 );
    if( err != NANOS_OK )
        nanos_handle_error( err );

    // End the team
    err = nanos_end_team( nanos_team );
    if( err != NANOS_OK )
        nanos_handle_error( err );
}
Beispiel #7
0
static void NANOS_worksharing( int lb, int ub, int step, int chunk, char * description,
                               void ( * func ) ( void * data, nanos_ws_desc_t * wsd ), void * data, long data_size, long ( * get_data_align )( void ), 
                               void * empty_data, void ( * init_func ) ( void *, void * ), void * ws_policy, bool wait )
{
    nanos_err_t err;

    // Create the Worksharing
    bool single_guard;
    nanos_ws_desc_t * wsd;
    nanos_ws_info_loop_t ws_info_loop;
    ws_info_loop.lower_bound = lb;
    ws_info_loop.upper_bound = ub;
    ws_info_loop.loop_step = step;
    ws_info_loop.chunk_size = chunk;
    err = nanos_worksharing_create( &wsd, ws_policy, ( void ** ) &ws_info_loop, &single_guard );
    if( err != NANOS_OK )
        nanos_handle_error( err );

    if( single_guard )
    {
        int sup_threads;
        err = nanos_team_get_num_supporting_threads( &sup_threads );
        if( err != NANOS_OK )
            nanos_handle_error( err );
        if( sup_threads > 0 )
        {
            // Configure the Worksahring
            err = nanos_malloc( ( void ** ) &( *wsd ).threads, sizeof( void * ) * sup_threads, /*filename*/"", /*fileline*/0 );
            if( err != NANOS_OK )
                nanos_handle_error( err );
            err = nanos_team_get_supporting_threads( &( *wsd ).nths, ( *wsd ).threads );
            if( err != NANOS_OK )
                nanos_handle_error( err );
            
            // Create the WD and its properties
            void * wd = ( void * ) 0;
            nanos_wd_dyn_props_t props;
            props.tie_to = ( void * ) 0;
            props.priority = 0;
            props.flags.is_final = 0;
            
            // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data)
            int num_copies = 0;
            // Compute dependencies (ROSE is not currently supporting dependencies among the tasks)
            int num_data_accesses = 0;
            // TODO Compute dimensions
            int num_dimensions = 0;
            // Compute device descriptor (at the moment, only SMP is supported)
            int num_devices = 1;
    
            // Create the slicer
            nanos_smp_args_t _smp_args = { func };
            struct nanos_const_wd_definition nanos_wd_const_data = { 
                { { 1,          // mandatory creation
                    1,          // tied
                    0, 0, 0, 0, 0, 0 },                         // properties 
                    ( *get_data_align )( ),                     // data alignment
                    num_copies, num_devices, num_dimensions,
                    description                                 // description
                }, 
                { { &nanos_smp_factory,                         // device description
                    &_smp_args }                                // outlined function
                } 
            };
            void * slicer = nanos_find_slicer( "replicate" );
            if( slicer == (void *)0 )
                nanos_handle_error( NANOS_UNIMPLEMENTED );
            
            struct sections_data_t* empty_data = ( struct sections_data_t * ) 0;
            err = nanos_create_sliced_wd( &wd, nanos_wd_const_data.base.num_devices, nanos_wd_const_data.devices, 
                                          data_size, nanos_wd_const_data.base.data_alignment, ( void ** ) &empty_data, 
                                          ( void ** ) 0, slicer, &nanos_wd_const_data.base.props, &props, 
                                          num_copies, ( nanos_copy_data_t ** ) 0, 
                                          num_dimensions, ( nanos_region_dimension_internal_t ** ) 0 );
            if( err != NANOS_OK )
                nanos_handle_error( err );
            
            // Initialize outlined data
            ( *init_func )( empty_data, data );
            
            // Submit the work to the runtime system
            err = nanos_submit( wd, num_data_accesses, ( nanos_data_access_t * ) 0, ( nanos_team_t ) 0 );
            if( err != NANOS_OK )
                nanos_handle_error( err );
            
            err = nanos_free( ( * wsd ).threads );
            if( err != NANOS_OK )
                nanos_handle_error( err );
        }
    }
    
    ( * func )( data, wsd );
    
    // Wait in case it is necessary
    if( wait )
    {
        err = nanos_omp_barrier( );
        if( err != NANOS_OK )
            nanos_handle_error( err );
    }
}
Beispiel #8
0
void NANOS_task( void ( * func ) ( void * ), void *data, 
                 long data_size, long ( * get_data_align ) ( void ), 
                 void * empty_data, void ( * init_func ) ( void *, void * ),
                 bool if_clause, unsigned untied,
                 int num_deps, int * deps_dir, void ** deps_data, 
                 int * deps_n_dims, nanos_region_dimension_t ** deps_dims, 
                 long int * deps_offset )
{
    nanos_err_t err;
    
    bool nanos_is_in_final;
    err = nanos_in_final( &nanos_is_in_final );
    if( nanos_is_in_final )
    {
        ( *func )( data );
    }
    else
    {
        // Compute copy data (For SMP devices there are no copies. Just CUDA device requires copy data)
        int num_copies = 0;
        // TODO Compute dimensions (for devices other than SMP)
        int num_dimensions = 0;
        // Compute device descriptor (at the moment, only SMP is supported)
        int num_devices = 1;
        // Compute dependencies
        const unsigned int num_data_accesses = num_deps;
        nanos_data_access_t dependences[num_data_accesses];
        int i;
        for( i = 0; i < num_data_accesses; ++i )
        {
            int in = ( deps_dir[i] & ( e_dep_dir_in | e_dep_dir_inout ) );
            int out = ( deps_dir[i] & ( e_dep_dir_out | e_dep_dir_inout ) );
            nanos_access_type_internal_t flags = {
                ( in != 0 ), // input
                ( out != 0 ), // output
                0 , // can rename
                0 , // concurrent
                0 , // commutative
            };
            nanos_data_access_t dep = { deps_data[i], flags, deps_n_dims[i], deps_dims[i], deps_offset[i] };
            dependences[i] = dep;
        }
        
        // Create the Device descriptor (at the moment, only SMP is supported)
        nanos_smp_args_t _smp_args = { func };
        char * task_name; 
        asprintf( &task_name, "task_%d", task_id++ );
        struct nanos_const_wd_definition nanos_wd_const_data = {
            { { 0,          // mandatory creation
                !untied,    // tied 
                0, 0, 0, 0, 0, 0 },                     // properties 
            ( *get_data_align )( ),                     // data alignment
            num_copies, num_devices, num_dimensions,
            task_name                                 // description
            }, 
            { { &nanos_smp_factory,                     // device description
                &_smp_args }                            // outlined function
            }
        };
        
        // Compute properties of the WD: mandatory creation, priority, tiedness, real-time info and copy declarations
        nanos_wd_dyn_props_t dyn_props;
        dyn_props.tie_to = 0;
        dyn_props.priority = 0;
        dyn_props.flags.is_final = 0;
    
        // Create the WD
        nanos_wd_t wd = (nanos_wd_t) 0;
        err = nanos_create_wd_compact( &wd, &nanos_wd_const_data.base, &dyn_props, 
                                    data_size, ( void ** ) &empty_data,
                                    nanos_current_wd( ), ( nanos_copy_data_t ** ) 0, 
                                    ( nanos_region_dimension_internal_t ** ) 0 );
        if( err != NANOS_OK ) 
            nanos_handle_error( err );
        
        if( wd != ( void * ) 0 )
        {   // Submit the task to the existing actual working group
            // Initialize outlined data
            ( *init_func )( empty_data, data );
    
            err = nanos_submit( wd, num_data_accesses, dependences, ( void * ) 0 );
            if( err != NANOS_OK ) 
                nanos_handle_error( err );
        }
        else
        { // The task must be run immediately
            err = nanos_create_wd_and_run_compact( &nanos_wd_const_data.base, &dyn_props, 
                                                data_size, data, num_data_accesses,
                                                dependences, ( nanos_copy_data_t * ) 0, 
                                                ( nanos_region_dimension_internal_t * ) 0, 
                                                ( void ( * )( void *, void * ) ) 0 );
            if( err != NANOS_OK ) 
                nanos_handle_error( err );
        }
    }
}