void soclShutdown() { static int shutdown = 0; if (!shutdown) { shutdown = 1; starpu_pthread_mutex_lock(&_socl_mutex); if( _starpu_init ) starpu_task_wait_for_all(); gc_stop(); if( _starpu_init ) starpu_task_wait_for_all(); int active_entities = gc_active_entity_count(); if (active_entities != 0) { DEBUG_MSG("Unreleased entities: %d\n", active_entities); gc_print_remaining_entities(); } if( _starpu_init && _starpu_init_failed != -ENODEV) starpu_shutdown(); starpu_pthread_mutex_unlock(&_socl_mutex); if (socl_devices != NULL) { free(socl_devices); socl_devices = NULL; } } }
int main(int argc, char **argv) { int ret; starpu_init(NULL); starpu_data_malloc_pinned_if_possible((void **)&buffer, VECTORSIZE); starpu_vector_data_register(&v_handle, 0, (uintptr_t)buffer, VECTORSIZE, sizeof(char)); struct starpu_data_filter f = { .filter_func = starpu_vector_divide_in_2_filter_func, /* there are only 2 children */ .nchildren = 2, /* the length of the first part */ .filter_arg = VECTORSIZE/2, .get_nchildren = NULL, .get_child_ops = NULL }; unsigned iter; for (iter = 0; iter < NITER; iter++) { starpu_data_map_filters(v_handle, 1, &f); ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 0)); if (ret == -ENODEV) goto enodev; ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 1)); if (ret == -ENODEV) goto enodev; starpu_task_wait_for_all(); starpu_data_unpartition(v_handle, 0); ret = use_handle(v_handle); if (ret == -ENODEV) goto enodev; starpu_task_wait_for_all(); } starpu_data_unregister(v_handle); starpu_shutdown(); return 0; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return 0; }
int main(int argc, char **argv) { double timing; struct timeval start; struct timeval end; starpu_init(NULL); fprintf(stderr, "#tasks : %d\n", ntasks); unsigned i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); /* We check if the function is valid from the codelet or from * the callback */ task->cl = &dummy_cl; task->cl_arg = task; task->callback_func = check_task_callback; task->callback_arg = task; int ret = starpu_task_submit(task, NULL); STARPU_ASSERT(!ret); } starpu_task_wait_for_all(); fprintf(stderr, "#empty tasks : %d\n", ntasks); /* We repeat the same experiment with null codelets */ for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = NULL; /* We check if the function is valid from the callback */ task->callback_func = check_task_callback; task->callback_arg = task; int ret = starpu_task_submit(task, NULL); STARPU_ASSERT(!ret); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { int ntasks = NTASKS; int ret; struct starpu_conf conf; starpu_conf_init(&conf); conf.sched_policy = &dummy_sched_policy, ret = starpu_init(&conf); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_QUICK_CHECK ntasks /= 100; #endif int i; for (i = 0; i < ntasks; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->cl_arg = NULL; ret = starpu_task_submit(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; }
static int run(struct starpu_sched_policy *policy) { int ret; struct starpu_conf conf; int i; starpu_conf_init(&conf); conf.sched_policy = policy; ret = starpu_init(&conf); if (ret != 0) exit(STARPU_TEST_SKIPPED); starpu_profiling_status_set(1); struct starpu_codelet clA = { .cpu_funcs = {A}, .nbuffers = 0 }; struct starpu_codelet clB = { .cpu_funcs = {B}, .nbuffers = 0 }; starpu_srand48(0); for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); if (((int)(starpu_drand48()*2))%2) { task->cl = &clA; task->priority=STARPU_MIN_PRIO; } else { task->cl = &clB; task->priority=STARPU_MAX_PRIO; } task->detach=1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); FPRINTF(stdout,"\n"); starpu_shutdown(); return 0; enodev: starpu_shutdown(); return -ENODEV; }
int main(int argc, char **argv) { int ret; unsigned loop, nloops=NLOOPS; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); struct starpu_task *taskA, *taskB; for (loop = 0; loop < nloops; loop++) { taskA = create_dummy_task(); taskB = create_dummy_task(); /* By default, dynamically allocated tasks are destroyed at * termination, we cannot declare a dependency on something * that does not exist anymore. */ taskA->destroy = 0; taskA->synchronous = 1; ret = starpu_task_submit(taskA); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_declare_deps_array(taskB, 1, &taskA); taskB->synchronous = 1; ret = starpu_task_submit(taskB); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); starpu_task_destroy(taskA); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
/*main program*/ int main(int argc, char * argv[]) { /* Init */ int ret; int mpi_rank, mpi_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); /*element initialization : domains are connected as a ring for this test*/ int num_elements=NUM_EL; struct element * el_left=malloc(num_elements*sizeof(el_left[0])); struct element * el_right=malloc(num_elements*sizeof(el_right[0])); int i; for(i=0; i<num_elements; i++) { init_element(el_left+i,i+1,((mpi_rank-1)+mpi_size)%mpi_size); init_element(el_right+i,i+1,(mpi_rank+1)%mpi_size); } /* Communication loop */ for (i=0; i<NUM_LOOPS; i++) //number of "computations loops" { int e; for (e=0; e<num_elements; e++) //Do something for each elements { insert_work_for_one_element(el_right+e); insert_work_for_one_element(el_left+e); } } /* End */ starpu_task_wait_for_all(); for(i=0; i<num_elements; i++) { free_element(el_left+i); free_element(el_right+i); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); FPRINTF(stderr, "No assert until end\n"); return 0; }
int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned b; for (b = 0; b < NBUFFERS; b++) { buffers[b].index = b; starpu_variable_data_register(&buffers[b].handle, STARPU_MAIN_RAM, (uintptr_t)&buffers[b].val, sizeof(unsigned)); } unsigned iter; for (iter = 0; iter < NITER; iter++) { for (b = 0; b < NBUFFERS; b++) { ret = starpu_data_acquire_cb(buffers[b].handle, STARPU_RW, callback_sync_data, &buffers[b]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* do some cleanup */ ret = EXIT_SUCCESS; for (b = 0; b < NBUFFERS; b++) { starpu_data_unregister(buffers[b].handle); /* check result */ if (buffers[b].val != NITER) { FPRINTF(stderr, "buffer[%u] = %u should be %d\n", b, buffers[b].val, NITER); ret = EXIT_FAILURE; } } starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int v=40; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); double *x = (double*)malloc(sizeof(double)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->prologue_callback_func = callback_func; task->prologue_callback_arg = NULL; task->prologue_callback_pop_func = pop_prologue_callback_func; task->prologue_callback_pop_arg = (void*) 5; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); *x = -999.0; ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_PROLOGUE_CALLBACK, prologue_callback_func, STARPU_PROLOGUE_CALLBACK_ARG, x, STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func, STARPU_PROLOGUE_CALLBACK_POP_ARG, 5, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); enodev: starpu_data_unregister(handle); free(x); FPRINTF(stderr, "v -> %d\n", v); starpu_shutdown(); return (ret == -ENODEV) ? 77 : 0; }
int main(int argc, char **argv) { starpu_init(&conf); unsigned i; for (i = 0; i < NTASKS; i++) { struct starpu_task *task = starpu_task_create(); task->cl = &dummy_codelet; task->cl_arg = NULL; starpu_task_submit(task, NULL); } starpu_task_wait_for_all(); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { starpu_init(NULL); /* Allocate all buffers and register them to StarPU */ unsigned b; for (b = 0; b < NBUFFERS; b++) { starpu_data_malloc_pinned_if_possible((void **)&buffer[b], VECTORSIZE); starpu_vector_data_register(&v_handle[b], 0, (uintptr_t)buffer[b], VECTORSIZE, sizeof(char)); } unsigned iter; for (iter = 0; iter < NITER; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < NBUFFERS; b++) use_handle(v_handle[b]); starpu_task_wait_for_all(); /* Grab the different pieces of data into main memory */ for (b = 0; b < NBUFFERS; b++) starpu_data_acquire(v_handle[b], STARPU_RW); /* Release them */ for (b = 0; b < NBUFFERS; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < NBUFFERS; b++) starpu_data_unregister(v_handle[b]); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { int ret; int var = 42; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int copy = starpu_asynchronous_copy_disabled(); FPRINTF(stderr, "copy %d\n", copy); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_task_insert(&cl, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ int b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); } int iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire(v_handle[b], STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free(buffer[b]); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { unsigned i; float foo; starpu_data_handle float_array_handle; starpu_codelet cl; starpu_init(NULL); if (argc == 2) niter = atoi(argv[1]); foo = 0.0f; starpu_variable_data_register(&float_array_handle, 0 /* home node */, (uintptr_t)&foo, sizeof(float)); #ifdef STARPU_USE_OPENCL starpu_opencl_load_opencl_from_file("examples/basic_examples/variable_kernels_opencl_codelet.cl", &opencl_code); #endif cl.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL; cl.cpu_func = cpu_codelet; #ifdef STARPU_USE_CUDA cl.cuda_func = cuda_codelet; #endif #ifdef STARPU_USE_OPENCL cl.opencl_func = opencl_codelet; #endif cl.nbuffers = 1; cl.model = NULL; for (i = 0; i < niter; i++) { struct starpu_task *task = starpu_task_create(); int ret; task->cl = &cl; task->callback_func = NULL; task->buffers[0].handle = float_array_handle; task->buffers[0].mode = STARPU_RW; ret = starpu_task_submit(task, NULL); if (STARPU_UNLIKELY(ret == -ENODEV)) { fprintf(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); /* update the array in RAM */ starpu_data_acquire(float_array_handle, STARPU_R); fprintf(stderr, "variable -> %f\n", foo); starpu_data_release(float_array_handle); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { int size, x; int color; MPI_Comm newcomm; int rank, newrank; int ret; starpu_data_handle_t data[3]; int value = 90; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 4) { FPRINTF(stderr, "We need at least 4 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } color = rank%2; MPI_Comm_split(MPI_COMM_WORLD, color, rank, &newcomm); MPI_Comm_rank(newcomm, &newrank); FPRINTF(stderr, "[%d][%d] color %d\n", rank, newrank, color); if (newrank == 0) { FPRINTF(stderr, "[%d][%d] sending %d\n", rank, newrank, rank); MPI_Send(&rank, 1, MPI_INT, 1, 10, newcomm); } else if (newrank == 1) { MPI_Recv(&x, 1, MPI_INT, 0, 10, newcomm, MPI_STATUS_IGNORE); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, x); } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { starpu_variable_data_register(&data[2], STARPU_MAIN_RAM, (uintptr_t)&value, sizeof(int)); } else starpu_variable_data_register(&data[2], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[2], 44, 0, MPI_COMM_WORLD); if (newrank == 0) { starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&rank, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); } else starpu_variable_data_register(&data[0], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[0], 12, 0, newcomm); if (newrank == 0) { starpu_mpi_req req[2]; starpu_mpi_issend(data[1], &req[0], 1, 22, newcomm); starpu_mpi_isend(data[0], &req[1], 1, 12, newcomm); starpu_mpi_wait(&req[0], MPI_STATUS_IGNORE); starpu_mpi_wait(&req[1], MPI_STATUS_IGNORE); } else if (newrank == 1) { int *xx; starpu_mpi_recv(data[0], 0, 12, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); starpu_variable_data_register(&data[1], -1, (uintptr_t)NULL, sizeof(int)); starpu_mpi_data_register_comm(data[1], 22, 0, newcomm); starpu_mpi_recv(data[0], 0, 22, newcomm, MPI_STATUS_IGNORE); starpu_data_acquire(data[0], STARPU_RW); xx = (int *)starpu_variable_get_local_ptr(data[0]); starpu_data_release(data[0]); FPRINTF(stderr, "[%d][%d] received %d\n", rank, newrank, *xx); STARPU_ASSERT_MSG(x==*xx, "Received value %d is incorrect (should be %d)\n", *xx, x); } if (rank == 0) { starpu_data_acquire(data[2], STARPU_RW); int rvalue = *((int *)starpu_variable_get_local_ptr(data[2])); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "sending value %d to %d and receiving from %d\n", rvalue, 1, size-1); starpu_mpi_send(data[2], 1, 44, MPI_COMM_WORLD); starpu_mpi_recv(data[2], size-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); starpu_data_release(data[2]); FPRINTF_MPI(stderr, "Value back is %d\n", *xx); STARPU_ASSERT_MSG(*xx == rvalue + (2*(size-1)), "Received value %d is incorrect (should be %d)\n", *xx, rvalue + (2*(size-1))); } else { int next = (rank == size-1) ? 0 : rank+1; starpu_mpi_recv(data[2], rank-1, 44, MPI_COMM_WORLD, MPI_STATUS_IGNORE); starpu_data_acquire(data[2], STARPU_RW); int *xx = (int *)starpu_variable_get_local_ptr(data[2]); FPRINTF_MPI(stderr, "receiving %d from %d and sending %d to %d\n", *xx, rank-1, *xx+2, next); *xx = *xx + 2; starpu_data_release(data[2]); starpu_mpi_send(data[2], next, 44, MPI_COMM_WORLD); } if (newrank == 0 || newrank == 1) { starpu_mpi_insert_task(newcomm, &mycodelet, STARPU_RW, data[0], STARPU_VALUE, &x, sizeof(x), STARPU_EXECUTE_ON_NODE, 1, 0); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); } starpu_data_unregister(data[2]); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Comm_free(&newcomm); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int ret, rank, size, err, node; long x0=32; int x1=23; starpu_data_handle_t data_handlesx0; starpu_data_handle_t data_handlesx1; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank != 0 && rank != 1) goto end; if (rank == 0) { starpu_variable_data_register(&data_handlesx0, STARPU_MAIN_RAM, (uintptr_t)&x0, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, rank); starpu_variable_data_register(&data_handlesx1, -1, (uintptr_t)NULL, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, 1); } else if (rank == 1) { starpu_variable_data_register(&data_handlesx1, STARPU_MAIN_RAM, (uintptr_t)&x1, sizeof(x1)); starpu_mpi_data_register(data_handlesx1, 1, rank); starpu_variable_data_register(&data_handlesx0, -1, (uintptr_t)NULL, sizeof(x0)); starpu_mpi_data_register(data_handlesx0, 0, 0); } node = starpu_mpi_data_get_rank(data_handlesx1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, 0); assert(err == 0); node = starpu_mpi_data_get_rank(data_handlesx0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_r, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_R, data_handlesx1, 0); assert(err == 0); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, 0); assert(err == 0); node = 1; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_rw_rw, STARPU_VALUE, &node, sizeof(node), STARPU_RW, data_handlesx0, STARPU_RW, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_r, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_r_w, STARPU_VALUE, &node, sizeof(node), STARPU_R, data_handlesx0, STARPU_W, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); /* Here the value specified by the property STARPU_EXECUTE_ON_NODE is going to overwrite the node even though the data model clearly specifies which node is going to execute the codelet */ node = 0; err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet_w_r, STARPU_VALUE, &node, sizeof(node), STARPU_W, data_handlesx0, STARPU_R, data_handlesx1, STARPU_EXECUTE_ON_NODE, node, 0); assert(err == 0); FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data_handlesx0); starpu_data_unregister(data_handlesx1); end: starpu_mpi_shutdown(); starpu_shutdown(); return 0; }
int main(int argc, char **argv) { int ret = 0; starpu_data_handle_t handle1; starpu_data_handle_t handle2; double real = 45.0; double imaginary = 12.0; double copy_real = 78.0; double copy_imaginary = 78.0; int compare; int *compare_ptr = &compare; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/interface/complex_kernels.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_complex_data_register(&handle1, STARPU_MAIN_RAM, &real, &imaginary, 1); starpu_complex_data_register(&handle2, STARPU_MAIN_RAM, ©_real, ©_imaginary, 1); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 0) { FPRINTF(stderr, "Complex numbers should NOT be similar\n"); goto end; } ret = starpu_task_insert(&cl_copy, STARPU_R, handle1, STARPU_W, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle1", strlen("handle1")+1, STARPU_R, handle1, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_display, STARPU_VALUE, "handle2", strlen("handle2")+1, STARPU_R, handle2, 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); ret = starpu_task_insert(&cl_compare, STARPU_R, handle1, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); if (ret == -ENODEV) goto end; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); if (compare != 1) { FPRINTF(stderr, "Complex numbers should be similar\n"); } end: #ifdef STARPU_USE_OPENCL { int ret2 = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret2, "starpu_opencl_unload_opencl"); } #endif starpu_data_unregister(handle1); starpu_data_unregister(handle2); starpu_shutdown(); if (ret == -ENODEV) return 77; else return !compare; }
int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scratch_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif A = (unsigned *) calloc(VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&A_handle, STARPU_MAIN_RAM, (uintptr_t)A, VECTORSIZE, sizeof(unsigned)); starpu_vector_data_register(&B_handle, -1, (uintptr_t)NULL, VECTORSIZE, sizeof(unsigned)); unsigned loop; for (loop = 0; loop < NLOOPS; loop++) { struct starpu_task *task_f = starpu_task_create(); task_f->cl = &cl_f; task_f->handles[0] = A_handle; task_f->handles[1] = B_handle; ret = starpu_task_submit(task_f); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); /* Check result */ unsigned i; ret = EXIT_SUCCESS; for (i = 0; i < VECTORSIZE; i++) { if (A[i] != NLOOPS) { FPRINTF(stderr, "Error: Incorrect value A[%u] = %u != %d\n", i, A[i], NLOOPS); ret = EXIT_FAILURE; break; } } free(A); STARPU_RETURN(ret); enodev: starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int rank, nodes; int ret=0; int compare=0; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2) { fprintf(stderr, "This program needs at least 2 nodes (%d available)\n", nodes); ret = 77; } else { starpu_data_handle_t handle; starpu_data_handle_t handle2; double real[2] = {4.0, 2.0}; double imaginary[2] = {7.0, 9.0}; double real2[2] = {14.0, 12.0}; double imaginary2[2] = {17.0, 19.0}; if (rank == 1) { real[0] = 0.0; real[1] = 0.0; imaginary[0] = 0.0; imaginary[1] = 0.0; } starpu_complex_data_register(&handle, STARPU_MAIN_RAM, real, imaginary, 2); starpu_complex_data_register(&handle2, -1, real2, imaginary2, 2); if (rank == 0) { int *compare_ptr = &compare; starpu_task_insert(&cl_display, STARPU_VALUE, "node0 initial value", strlen("node0 initial value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 1, 10, MPI_COMM_WORLD, NULL, NULL); starpu_mpi_irecv_detached(handle2, 1, 20, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node0 received value", strlen("node0 received value")+1, STARPU_R, handle2, 0); starpu_task_insert(&cl_compare, STARPU_R, handle, STARPU_R, handle2, STARPU_VALUE, &compare_ptr, sizeof(compare_ptr), 0); } else if (rank == 1) { starpu_mpi_irecv_detached(handle, 0, 10, MPI_COMM_WORLD, NULL, NULL); starpu_task_insert(&cl_display, STARPU_VALUE, "node1 received value", strlen("node1 received value")+1, STARPU_R, handle, 0); starpu_mpi_isend_detached(handle, 0, 20, MPI_COMM_WORLD, NULL, NULL); } starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_data_unregister(handle2); } starpu_mpi_shutdown(); starpu_shutdown(); if (rank == 0) return !compare; else return ret; }
int do_test(int rank, int sdetached, int rdetached) { int ret, i; int val[2]; starpu_data_handle_t data[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 1) { val[0] = VAL0; val[1] = VAL1; } else { val[0] = -1; val[1] = -1; } starpu_variable_data_register(&data[0], STARPU_MAIN_RAM, (uintptr_t)&val[0], sizeof(val[0])); starpu_variable_data_register(&data[1], STARPU_MAIN_RAM, (uintptr_t)&val[1], sizeof(val[1])); starpu_mpi_data_register(data[0], 77, 1); starpu_mpi_data_register(data[1], 88, 1); if (rank == 1) { for(i=1 ; i>=0 ; i--) { if (sdetached) starpu_mpi_isend_detached(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, NULL, NULL); else starpu_mpi_send(data[i], 0, starpu_data_get_tag(data[i]), MPI_COMM_WORLD); } } else if (rank == 0) { int received = 0; for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) { if (rdetached) starpu_mpi_irecv_detached(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, callback, &received); else starpu_mpi_recv(data[i], 1, starpu_data_get_tag(data[i]), MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rdetached) { STARPU_PTHREAD_MUTEX_LOCK(&mutex); while (received != 2) { FPRINTF_MPI(stderr, "Received %d messages\n", received); STARPU_PTHREAD_COND_WAIT(&cond, &mutex); } STARPU_PTHREAD_MUTEX_UNLOCK(&mutex); } for(i=0 ; i<2 ; i++) starpu_data_acquire(data[i], STARPU_R); for(i=0 ; i<2 ; i++) FPRINTF_MPI(stderr, "Value[%d] = %d\n", i, val[i]); for(i=0 ; i<2 ; i++) starpu_data_release(data[i]); } FPRINTF_MPI(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); starpu_data_unregister(data[0]); starpu_data_unregister(data[1]); if (rank == 0) { ret = (val[0] == VAL0 && val[1] == VAL1) ? 0 : 1; } starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { unsigned *foo; starpu_data_handle_t handle; int ret; unsigned n, i, size; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif n = starpu_worker_get_count(); if (n == 1) { starpu_shutdown(); return STARPU_TEST_SKIPPED; } size = 10 * n; foo = (unsigned *) calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = n, }; starpu_data_partition(handle, &f); for (i = 0; i < f.nchildren; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); starpu_data_unpartition(handle, STARPU_MAIN_RAM); starpu_data_unregister(handle); starpu_shutdown(); ret = EXIT_SUCCESS; for (i = 0; i < size; i++) { if (foo[i] != i*2) { FPRINTF(stderr,"value %u is %u instead of %u\n", i, foo[i], 2*i); ret = EXIT_FAILURE; } } return ret; enodev: starpu_data_unregister(handle); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret, rank, size; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { if (rank == 0) FPRINTF(stderr, "We need at least 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } starpu_vector_data_register(&token_handle, STARPU_MAIN_RAM, (uintptr_t)&token, 1, sizeof(token)); int nloops = NITER; int loop; int last_loop = nloops - 1; int last_rank = size - 1; for (loop = 0; loop < nloops; loop++) { int tag = loop*size + rank; if (loop == 0 && rank == 0) { token = 0; FPRINTF(stdout, "Start with token value %u\n", token); } else { starpu_mpi_irecv_detached(token_handle, (rank+size-1)%size, tag, MPI_COMM_WORLD, NULL, NULL); } increment_token(); if (loop == last_loop && rank == last_rank) { starpu_data_acquire(token_handle, STARPU_R); FPRINTF(stdout, "Finished : token value %u\n", token); starpu_data_release(token_handle); } else { starpu_mpi_isend_detached(token_handle, (rank+1)%size, tag+1, MPI_COMM_WORLD, NULL, NULL); } } starpu_task_wait_for_all(); starpu_data_unregister(token_handle); starpu_mpi_shutdown(); starpu_shutdown(); if (rank == last_rank) { FPRINTF(stderr, "[%d] token = %u == %u * %d ?\n", rank, token, nloops, size); STARPU_ASSERT(token == nloops*size); } return 0; }
int main(int argc, char **argv) { double start, end; int ret; parse_args(argc, argv); #ifdef STARPU_QUICK_CHECK niter /= 10; #endif ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); init_problem_data(); partition_mult_data(); if (bound) starpu_bound_start(0, 0); start = starpu_timing_now(); unsigned x, y, iter; for (iter = 0; iter < niter; iter++) { for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim; ret = starpu_task_submit(task); if (ret == -ENODEV) { ret = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); } end = starpu_timing_now(); if (bound) starpu_bound_stop(); double timing = end - start; double min, min_int; double flops = 2.0*((unsigned long long)niter)*((unsigned long long)xdim) *((unsigned long long)ydim)*((unsigned long long)zdim); if (bound) starpu_bound_compute(&min, &min_int, 1); PRINTF("# x\ty\tz\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops\tTims\tTiGFlops"); PRINTF("\n"); PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0); PRINTF("\n"); enodev: starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); if (check) check_output(); starpu_free(A); starpu_free(B); starpu_free(C); starpu_cublas_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int rank, size, err; int x[2]; int ret, i; starpu_data_handle_t data_handles[2]; int values[2]; ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, 1); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { x[0] = 11; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&x[0], sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } else if (rank == 1) { x[1] = 12; starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&x[1], sizeof(x[1])); } else { starpu_variable_data_register(&data_handles[0], -1, (uintptr_t)NULL, sizeof(x[0])); starpu_variable_data_register(&data_handles[1], -1, (uintptr_t)NULL, sizeof(x[1])); } starpu_mpi_data_register(data_handles[0], 0, 0); starpu_mpi_data_register(data_handles[1], 1, 1); err = starpu_mpi_task_insert(MPI_COMM_WORLD, &mycodelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], STARPU_EXECUTE_ON_DATA, data_handles[1], 0); assert(err == 0); starpu_task_wait_for_all(); for(i=0 ; i<2 ; i++) { starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[i], 0, NULL, NULL); if (rank == 0) { starpu_data_acquire(data_handles[i], STARPU_R); values[i] = *((int *)starpu_data_get_local_ptr(data_handles[i])); starpu_data_release(data_handles[i]); } } ret = 0; if (rank == 0) { FPRINTF(stderr, "[%d][local ptr] VALUES: %d %d\n", rank, values[0], values[1]); if (values[0] != 12 || values[1] != 144) { ret = EXIT_FAILURE; } } starpu_data_unregister(data_handles[0]); starpu_data_unregister(data_handles[1]); starpu_mpi_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int ret, exit_value = 0; /* Initialize StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_cublas_init(); /* This is equivalent to vec_a = malloc(N*sizeof(TYPE)); vec_b = malloc(N*sizeof(TYPE)); */ starpu_malloc((void **)&_vec_x, N*sizeof(TYPE)); assert(_vec_x); starpu_malloc((void **)&_vec_y, N*sizeof(TYPE)); assert(_vec_y); unsigned i; for (i = 0; i < N; i++) { _vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */ _vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */ } FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]); FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]); /* Declare the data to StarPU */ starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE)); starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE)); /* Divide the vector into blocks */ struct starpu_data_filter block_filter = { .filter_func = starpu_vector_filter_block, .nchildren = NBLOCKS }; starpu_data_partition(_handle_x, &block_filter); starpu_data_partition(_handle_y, &block_filter); double start; double end; start = starpu_timing_now(); unsigned b; for (b = 0; b < NBLOCKS; b++) { struct starpu_task *task = starpu_task_create(); task->cl = &axpy_cl; task->cl_arg = &_alpha; task->cl_arg_size = sizeof(_alpha); task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b); task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b); task->tag_id = b; ret = starpu_task_submit(task); if (ret == -ENODEV) { exit_value = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); enodev: starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM); starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM); starpu_data_unregister(_handle_x); starpu_data_unregister(_handle_y); end = starpu_timing_now(); double timing = end - start; FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing); FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha); if (exit_value != 77) exit_value = check(); starpu_free((void *)_vec_x); starpu_free((void *)_vec_y); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif /* Stop StarPU */ starpu_shutdown(); return exit_value; }
static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks) { double start; double end; int ret; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { ret = create_task_11(dataA, k); if (ret == -ENODEV) return ret; for (i = k+1; i<nblocks; i++) { ret = create_task_12(dataA, k, i); if (ret == -ENODEV) return ret; ret = create_task_21(dataA, k, i); if (ret == -ENODEV) return ret; } starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k)); for (i = k+1; i<nblocks; i++) for (j = k+1; j<nblocks; j++) { ret = create_task_22(dataA, k, i, j); if (ret == -ENODEV) return ret; } for (i = k+1; i<nblocks; i++) { starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i)); starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k)); } } /* stall the application until the end of computations */ starpu_task_wait_for_all(); end = starpu_timing_now(); if (bound) starpu_bound_stop(); double timing = end - start; unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops"); PRINTF("\n"); PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f); } PRINTF("\n"); return 0; }
static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp, struct piv_s *piv_description, unsigned nblocks, starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned), double *timing) { double start; double end; int ret; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block); if (ret == -ENODEV) return ret; for (i = 0; i < nblocks; i++) { if (i != k) { ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block); if (ret == -ENODEV) return ret; } } for (i = k+1; i<nblocks; i++) { ret = create_task_12(dataAp, nblocks, k, i, get_block); if (ret == -ENODEV) return ret; ret = create_task_21(dataAp, nblocks, k, i, get_block); if (ret == -ENODEV) return ret; } starpu_data_wont_use(get_block(dataAp, nblocks, k, k)); for (i = k+1; i<nblocks; i++) for (j = k+1; j<nblocks; j++) { ret = create_task_22(dataAp, nblocks, k, i, j, get_block); if (ret == -ENODEV) return ret; } for (i = k+1; i<nblocks; i++) { starpu_data_wont_use(get_block(dataAp, nblocks, k, i)); starpu_data_wont_use(get_block(dataAp, nblocks, i, k)); } } /* stall the application until the end of computations */ starpu_task_wait_for_all(); end = starpu_timing_now(); if (bound) starpu_bound_stop(); *timing = end - start; return 0; }
int main(int argc, char **argv) { int my_rank, size, x, y, loop; float mean=0; float matrix[X][Y]; starpu_data_handle_t data_handles[X][Y]; int ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_mpi_init(&argc, &argv, 1); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); parse_args(argc, argv); /* Initial data values */ starpu_srand48((long int)time(NULL)); for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { matrix[x][y] = (float)starpu_drand48(); mean += matrix[x][y]; } } mean /= (X*Y); if (display) { FPRINTF_MPI(stdout, "mean=%2.2f\n", mean); for(x = 0; x < X; x++) { fprintf(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { fprintf(stdout, "%2.2f ", matrix[x][y]); } fprintf(stdout, "\n"); } } /* Initial distribution */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib(x, y, size); if (mpi_rank == my_rank) { //FPRINTF(stderr, "[%d] Owning data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], 0, (uintptr_t)&(matrix[x][y]), sizeof(float)); } else if (my_rank == my_distrib(x+1, y, size) || my_rank == my_distrib(x-1, y, size) || my_rank == my_distrib(x, y+1, size) || my_rank == my_distrib(x, y-1, size)) { /* I don't own that index, but will need it for my computations */ //FPRINTF(stderr, "[%d] Neighbour of data[%d][%d]\n", my_rank, x, y); starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); } else { /* I know it's useless to allocate anything for this */ data_handles[x][y] = NULL; } if (data_handles[x][y]) { starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } } } /* First computation with initial distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Now migrate data to a new distribution */ /* First register newly needed data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { int mpi_rank = my_distrib2(x, y, size); if (!data_handles[x][y] && (mpi_rank == my_rank || my_rank == my_distrib2(x+1, y, size) || my_rank == my_distrib2(x-1, y, size) || my_rank == my_distrib2(x, y+1, size) || my_rank == my_distrib2(x, y-1, size))) { /* Register newly-needed data */ starpu_variable_data_register(&data_handles[x][y], -1, (uintptr_t)NULL, sizeof(float)); starpu_mpi_data_register(data_handles[x][y], (y*X)+x, mpi_rank); } if (data_handles[x][y] && mpi_rank != starpu_mpi_data_get_rank(data_handles[x][y])) { /* Migrate the data */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* And register new rank of the matrix */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); } } } /* Second computation with new distribution */ for(loop=0 ; loop<niter; loop++) { for (x = 1; x < X-1; x++) { for (y = 1; y < Y-1; y++) { starpu_mpi_task_insert(MPI_COMM_WORLD, &stencil5_cl, STARPU_RW, data_handles[x][y], STARPU_R, data_handles[x-1][y], STARPU_R, data_handles[x+1][y], STARPU_R, data_handles[x][y-1], STARPU_R, data_handles[x][y+1], 0); } } } FPRINTF(stderr, "Waiting ...\n"); starpu_task_wait_for_all(); /* Unregister data */ for(x = 0; x < X; x++) { for (y = 0; y < Y; y++) { if (data_handles[x][y]) { int mpi_rank = my_distrib(x, y, size); /* Get back data to original place where the user-provided buffer is. */ starpu_mpi_get_data_on_node_detached(MPI_COMM_WORLD, data_handles[x][y], mpi_rank, NULL, NULL); /* Register original rank of the matrix (although useless) */ starpu_mpi_data_set_rank(data_handles[x][y], mpi_rank); /* And unregister it */ starpu_data_unregister(data_handles[x][y]); } } } starpu_mpi_shutdown(); starpu_shutdown(); if (display) { FPRINTF(stdout, "[%d] mean=%2.2f\n", my_rank, mean); for(x = 0; x < X; x++) { FPRINTF(stdout, "[%d] ", my_rank); for (y = 0; y < Y; y++) { FPRINTF(stdout, "%2.2f ", matrix[x][y]); } FPRINTF(stdout, "\n"); } } return 0; }
int main(int argc, char **argv) { int i, j, ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize"); float *data; starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE); float factors[NB_BUNDLE]; starpu_data_handle_t handles[NB_BUNDLE]; struct starpu_task *task[NB_ITERATION]; starpu_task_bundle_t bundles[NB_BUNDLE]; for (i = 0; i < NB_BUNDLE; i++) { data[i] = i + 1; factors[i] = NB_BUNDLE - i; } for (i = 0; i < NB_BUNDLE; i++) starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float)); FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for (i = 0; i < NB_BUNDLE; i++) { starpu_task_bundle_create(&bundles[i]); for (j = 0; j < NB_ITERATION; j++) { task[j] = starpu_task_create(); task[j]->cl = &codelet; task[j]->cl_arg = &factors[i]; task[j]->cl_arg_size = sizeof(float); task[j]->handles[0] = handles[i]; ret = starpu_task_bundle_insert(bundles[i], task[j]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_remove"); for (j = 0; j < NB_ITERATION; j++) { ret = starpu_task_submit(task[j]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_bundle_close(bundles[i]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); for(i = 0; i < NB_BUNDLE ; i++) { ret = starpu_data_acquire(handles[i], STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for(i = 0; i < NB_BUNDLE ; i++) { starpu_data_release(handles[i]); starpu_data_unregister(handles[i]); } starpu_free(data); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret; unsigned part; double timing; double start, end; unsigned row, pos; unsigned ind; /* CSR matrix description */ float *nzval; uint32_t nnz; uint32_t *colind; uint32_t *rowptr; /* Input and Output vectors */ float *vector_in_ptr; float *vector_out_ptr; /* * Parse command-line arguments */ parse_args(argc, argv); /* * Launch StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* * Create a 3-band sparse matrix as input example */ nnz = 3*size-2; starpu_malloc((void **)&nzval, nnz*sizeof(float)); starpu_malloc((void **)&colind, nnz*sizeof(uint32_t)); starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t)); assert(nzval && colind && rowptr); /* fill the matrix */ for (row = 0, pos = 0; row < size; row++) { rowptr[row] = pos; if (row > 0) { nzval[pos] = 1.0f; colind[pos] = row-1; pos++; } nzval[pos] = 5.0f; colind[pos] = row; pos++; if (row < size - 1) { nzval[pos] = 1.0f; colind[pos] = row+1; pos++; } } STARPU_ASSERT(pos == nnz); rowptr[size] = nnz; /* initiate the 2 vectors */ starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); assert(vector_in_ptr && vector_out_ptr); /* fill them */ for (ind = 0; ind < size; ind++) { vector_in_ptr[ind] = 2.0f; vector_out_ptr[ind] = 0.0f; } /* * Register the CSR matrix and the 2 vectors */ starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float)); starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); /* * Partition the CSR matrix and the output vector */ csr_f.nchildren = nblocks; vector_f.nchildren = nblocks; starpu_data_partition(sparse_matrix, &csr_f); starpu_data_partition(vector_out, &vector_f); /* * If we use OpenCL, we need to compile the SpMV kernel */ #ifdef STARPU_USE_OPENCL compile_spmv_opencl_kernel(); #endif start = starpu_timing_now(); /* * Create and submit StarPU tasks */ for (part = 0; part < nblocks; part++) { struct starpu_task *task = starpu_task_create(); task->cl = &spmv_cl; task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); task->handles[1] = vector_in; task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part); ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); end = starpu_timing_now(); /* * Unregister the CSR matrix and the output vector */ starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); /* * Unregister data */ starpu_data_unregister(sparse_matrix); starpu_data_unregister(vector_in); starpu_data_unregister(vector_out); /* * Display the result */ for (row = 0; row < STARPU_MIN(size, 16); row++) { FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); } starpu_free(nzval); starpu_free(colind); starpu_free(rowptr); starpu_free(vector_in_ptr); starpu_free(vector_out_ptr); /* * Stop StarPU */ starpu_shutdown(); timing = end - start; FPRINTF(stderr, "Computation took (in ms)\n"); FPRINTF(stdout, "%2.2f\n", timing/1000); return 0; }