void free_element(struct element *el) { starpu_data_unregister(el->recv); starpu_data_unregister(el->send); starpu_data_unregister(el->ensure_submitted_order_send); starpu_data_unregister(el->ensure_submitted_order_recv); }
int main(int argc, char **argv) { int ret, rank, size; starpu_data_handle_t handle; int var; MPI_Init(&argc, &argv); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &size); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return STARPU_TEST_SKIPPED; } if (rank == 0) { int n; for(n=1 ; n<size ; n++) { MPI_Status status; FPRINTF_MPI(stderr, "receiving from node %d\n", n); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_recv(handle, n, 42, MPI_COMM_WORLD, &status); starpu_data_acquire(handle, STARPU_R); STARPU_ASSERT_MSG(var == n, "Received incorrect value <%d> from node <%d>\n", var, n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var, n); starpu_data_release(handle); starpu_data_unregister(handle); } } else { FPRINTF_MPI(stderr, "sending to node %d\n", 0); var = rank; starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); starpu_mpi_send(handle, 0, 42, MPI_COMM_WORLD); starpu_data_unregister(handle); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
static void enqueue_readwrite_callback_callback(void*data) { struct starpu_readwrite_buffer_args * arg = (struct starpu_readwrite_buffer_args*)data; _starpu_event_complete(arg->event); _starpu_event_release_private(arg->event); if (arg->direction) starpu_data_unregister(arg->dst_handle); else starpu_data_unregister(arg->src_handle); free(arg); }
void unregister_data(void) { starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); starpu_data_unregister(sparse_matrix); starpu_data_unpartition(vector_in, STARPU_MAIN_RAM); starpu_data_unregister(vector_in); starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); starpu_data_unregister(vector_out); }
static int submit(struct starpu_codelet *codelet, struct starpu_perfmodel *model) { int nloops = 123; int loop; starpu_data_handle_t handle; struct starpu_perfmodel lmodel; int ret; int old_nsamples, new_nsamples; struct starpu_conf conf; unsigned archid, archtype, devid, ncore; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 1; ret = starpu_init(&conf); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); codelet->model = model; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, 100, sizeof(int)); for (loop = 0; loop < nloops; loop++) { ret = starpu_task_insert(codelet, STARPU_W, handle, 0); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(handle); starpu_shutdown(); return EXIT_SUCCESS; }
int STARPU_LU(lu_decomposition)(TYPE *matA, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE)); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); int ret = dw_codelet_facto_v3(dataA, nblocks); /* gather all the data */ starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); return ret; }
void terminate(void) { fprintf(stderr, "unpartition !!\n"); starpu_data_unpartition(C_handle, 0); starpu_data_unregister(C_handle); gettimeofday(&end, NULL); double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec)); display_stats(timing); #ifdef CHECK_OUTPUT /* check results */ /* compute C = C - AB */ SGEMM("N", "N", ydim, xdim, zdim, -1.0f, A, ydim, B, zdim, 1.0f, C, ydim); /* make sure C = 0 */ float err; err = SASUM(xdim*ydim, C, 1); if (err < xdim*ydim*0.001) { fprintf(stderr, "Results are OK\n"); } else { fprintf(stderr, "There were errors ... err = %f\n", err); } #endif // CHECK_OUTPUT }
static void cholesky(float *matA, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); starpu_data_set_sequential_consistency_flag(dataA, 0); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); _cholesky(dataA, nblocks); starpu_data_unregister(dataA); }
int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void**)&data, sizeof(*data)); *data = 42; /* register a piece of data */ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)data, 1, sizeof(unsigned)); struct starpu_task *task = starpu_task_create(); task->cl = &wrong_codelet; task->handles[0] = handle; task->use_tag = 1; task->tag_id = TAG; task->callback_func = wrong_callback; task->detach = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_tag_wait(TAG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); /* This call is valid as it is done by the application outside a * callback */ ret = starpu_data_acquire(handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); starpu_data_release(handle); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_unregister(handle); starpu_free(data); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t)); /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM, (uintptr_t)matA[bi+nblocks*bj], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } double timing; int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing); if (ret) return ret; unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks; double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops"); PRINTF("\n"); PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f); } PRINTF("\n"); for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_data_unregister(dataAp[bi+nblocks*bj]); } free(dataAp); return ret; }
static int test_codelet(struct starpu_codelet *codelet, int task_insert, int args, int x, float f) { starpu_data_handle_t data_handles[2]; int xx = x; float ff = f; int i, ret; starpu_variable_data_register(&data_handles[0], STARPU_MAIN_RAM, (uintptr_t)&xx, sizeof(xx)); starpu_variable_data_register(&data_handles[1], STARPU_MAIN_RAM, (uintptr_t)&ff, sizeof(ff)); FPRINTF(stderr, "values: %d (%d) %f (%f)\n", xx, _ifactor, ff, _ffactor); if (task_insert) { if (args) ret = starpu_task_insert(codelet, STARPU_VALUE, &_ifactor, sizeof(_ifactor), STARPU_VALUE, &_ffactor, sizeof(_ffactor), STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); else ret = starpu_task_insert(codelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } else { struct starpu_task *task; if (args) task = starpu_task_build(codelet, STARPU_VALUE, &_ifactor, sizeof(_ifactor), STARPU_VALUE, &_ffactor, sizeof(_ffactor), STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); else task = starpu_task_build(codelet, STARPU_RW, data_handles[0], STARPU_RW, data_handles[1], 0); task->cl_arg_free = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } enodev: for(i=0 ; i<2 ; i++) { starpu_data_unregister(data_handles[i]); } FPRINTF(stderr, "values: %d (should be %d) %f (should be %f)\n\n", xx, x*_ifactor, ff, f*_ffactor); return (ret == -ENODEV ? ret : xx == x*_ifactor && ff == f*_ffactor); }
int main(int argc, char **argv) { int ret; starpu_init(NULL); starpu_data_malloc_pinned_if_possible((void **)&buffer, VECTORSIZE); starpu_vector_data_register(&v_handle, 0, (uintptr_t)buffer, VECTORSIZE, sizeof(char)); struct starpu_data_filter f = { .filter_func = starpu_vector_divide_in_2_filter_func, /* there are only 2 children */ .nchildren = 2, /* the length of the first part */ .filter_arg = VECTORSIZE/2, .get_nchildren = NULL, .get_child_ops = NULL }; unsigned iter; for (iter = 0; iter < NITER; iter++) { starpu_data_map_filters(v_handle, 1, &f); ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 0)); if (ret == -ENODEV) goto enodev; ret = use_handle(starpu_data_get_sub_data(v_handle, 1, 1)); if (ret == -ENODEV) goto enodev; starpu_task_wait_for_all(); starpu_data_unpartition(v_handle, 0); ret = use_handle(v_handle); if (ret == -ENODEV) goto enodev; starpu_task_wait_for_all(); } starpu_data_unregister(v_handle); starpu_shutdown(); return 0; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return 0; }
int main(int argc, char **argv) { int ret, rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size%2 != 0) { if (rank == 0) FPRINTF(stderr, "We need a even number of processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); tab = malloc(SIZE*sizeof(float)); starpu_vector_data_register(&tab_handle, STARPU_MAIN_RAM, (uintptr_t)tab, SIZE, sizeof(float)); int nloops = NITER; int loop; int other_rank = rank%2 == 0 ? rank+1 : rank-1; for (loop = 0; loop < nloops; loop++) { starpu_tag_t tag = (starpu_tag_t)loop; if ((loop % 2) == (rank%2)) { starpu_mpi_isend_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); } else { starpu_mpi_irecv_detached_unlock_tag(tab_handle, other_rank, loop, MPI_COMM_WORLD, tag); } starpu_tag_wait(tag); } starpu_data_unregister(tab_handle); free(tab); starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int ret; int var = 42; starpu_data_handle_t handle; ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int copy = starpu_asynchronous_copy_disabled(); FPRINTF(stderr, "copy %d\n", copy); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&var, sizeof(var)); ret = starpu_task_insert(&cl, STARPU_R, handle, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); starpu_data_unregister(handle); starpu_shutdown(); return 0; enodev: starpu_data_unregister(handle); starpu_shutdown(); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ fprintf(stderr, "WARNING: No one can execute this task\n"); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int v=40; int ret; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_variable_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)&v, sizeof(int)); double *x = (double*)malloc(sizeof(double)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->prologue_callback_func = callback_func; task->prologue_callback_arg = NULL; task->prologue_callback_pop_func = pop_prologue_callback_func; task->prologue_callback_pop_arg = (void*) 5; task->handles[0] = handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); *x = -999.0; ret = starpu_task_insert(&cl, STARPU_RW, handle, STARPU_PROLOGUE_CALLBACK, prologue_callback_func, STARPU_PROLOGUE_CALLBACK_ARG, x, STARPU_PROLOGUE_CALLBACK_POP, pop_prologue_callback_func, STARPU_PROLOGUE_CALLBACK_POP_ARG, 5, 0); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); starpu_task_wait_for_all(); enodev: starpu_data_unregister(handle); free(x); FPRINTF(stderr, "v -> %d\n", v); starpu_shutdown(); return (ret == -ENODEV) ? 77 : 0; }
int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); unsigned b; for (b = 0; b < NBUFFERS; b++) { buffers[b].index = b; starpu_variable_data_register(&buffers[b].handle, STARPU_MAIN_RAM, (uintptr_t)&buffers[b].val, sizeof(unsigned)); } unsigned iter; for (iter = 0; iter < NITER; iter++) { for (b = 0; b < NBUFFERS; b++) { ret = starpu_data_acquire_cb(buffers[b].handle, STARPU_RW, callback_sync_data, &buffers[b]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb"); } } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* do some cleanup */ ret = EXIT_SUCCESS; for (b = 0; b < NBUFFERS; b++) { starpu_data_unregister(buffers[b].handle); /* check result */ if (buffers[b].val != NITER) { FPRINTF(stderr, "buffer[%u] = %u should be %d\n", b, buffers[b].val, NITER); ret = EXIT_FAILURE; } } starpu_shutdown(); return ret; }
void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle)); /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0, (uintptr_t)matA[bi+nblocks*bj], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); /* We already enforce deps by hand */ starpu_data_set_sequential_consistency_flag(dataAp[bi+nblocks*bj], 0); } unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } double timing; timing = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding); fprintf(stderr, "Computation took (in ms)\n"); fprintf(stderr, "%2.2f\n", timing/1000); unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks; double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f)); for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_data_unregister(dataAp[bi+nblocks*bj]); } }
int exchange_void(int rank, int detached) { int ret, i; starpu_data_handle_t tab_handle[NB]; FPRINTF_MPI(stderr, "Exchanging void data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { starpu_void_data_register(&tab_handle[i]); starpu_mpi_data_register(tab_handle[i], i, rank); } ret = exchange(rank, tab_handle, check_void, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(tab_handle[i]); return ret; }
void dw_factoLU_tag(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned _no_prio) { #ifdef CHECK_RESULTS FPRINTF(stderr, "Checking results ...\n"); float *Asaved; Asaved = malloc((size_t)ld*ld*sizeof(float)); memcpy(Asaved, matA, (size_t)ld*ld*sizeof(float)); #endif no_prio = _no_prio; starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); dw_codelet_facto_v3(dataA, nblocks); /* gather all the data */ starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); #ifdef CHECK_RESULTS compare_A_LU(Asaved, matA, size, ld); #endif }
int exchange_variable(int rank, int detached) { int ret, i; starpu_data_handle_t tab_handle[NB]; int value[NB]; FPRINTF_MPI(stderr, "Exchanging variable data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { value[i]=i*rank; starpu_variable_data_register(&tab_handle[i], STARPU_MAIN_RAM, (uintptr_t)&value[i], sizeof(int)); starpu_mpi_data_register(tab_handle[i], i, rank); } ret = exchange(rank, tab_handle, check_variable, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(tab_handle[i]); return ret; }
int main(int argc, char **argv) { starpu_init(NULL); /* Allocate all buffers and register them to StarPU */ unsigned b; for (b = 0; b < NBUFFERS; b++) { starpu_data_malloc_pinned_if_possible((void **)&buffer[b], VECTORSIZE); starpu_vector_data_register(&v_handle[b], 0, (uintptr_t)buffer[b], VECTORSIZE, sizeof(char)); } unsigned iter; for (iter = 0; iter < NITER; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < NBUFFERS; b++) use_handle(v_handle[b]); starpu_task_wait_for_all(); /* Grab the different pieces of data into main memory */ for (b = 0; b < NBUFFERS; b++) starpu_data_acquire(v_handle[b], STARPU_RW); /* Release them */ for (b = 0; b < NBUFFERS; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < NBUFFERS; b++) starpu_data_unregister(v_handle[b]); starpu_shutdown(); return 0; }
void render() { starpu_data_register(seeds, ...) starpu_insert_task(codelets::init_seeds, seeds); int iteration = 0; while(iteration < config.max_iters) { starpu_data_register(eye_paths, ...) starpu_insert_task(codelets::generate_eye_paths, eye_paths, seeds); starpu_data_register(hit_points, ...) starpu_insert_task(codelets::advance_eye_paths, eye_paths, seeds, hit_points); starpu_data_unregister_submit(eye_paths); ... // all other tasks for this iteration starpu_data_unregister_submit(hit_points); iteration++; } starpu_data_unregister(seeds); }
int exchange_complex(int rank, int detached) { int ret, i; starpu_data_handle_t handle[NB]; double real[NB]; double imaginary[NB]; FPRINTF_MPI(stderr, "Exchanging complex data with detached=%d\n", detached); for(i=0 ; i<NB ; i++) { real[i] = (i*rank)+12; imaginary[i] = (i*rank)+45; starpu_complex_data_register(&handle[i], STARPU_MAIN_RAM, &real[i], &imaginary[i], 1); starpu_mpi_data_register(handle[i], i, rank); } ret = exchange(rank, handle, check_complex, detached); for(i=0 ; i<NB ; i++) starpu_data_unregister(handle[i]); return ret; }
static void test_memset(int nelems) { starpu_data_handle_t handle; starpu_vector_data_register(&handle, -1, (uintptr_t)NULL, nelems, sizeof(int)); int nloops = 200; int loop; for (loop = 0; loop < nloops; loop++) { struct starpu_task *task = starpu_task_create(); task->cl = &memset_cl; task->handles[0] = handle; int ret = starpu_task_submit(task); if (ret == -ENODEV) exit(STARPU_TEST_SKIPPED); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unregister(handle); }
int main(int argc, char **argv) { int i, j, ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize"); float *data; starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE); float factors[NB_BUNDLE]; starpu_data_handle_t handles[NB_BUNDLE]; struct starpu_task *task[NB_ITERATION]; starpu_task_bundle_t bundles[NB_BUNDLE]; for (i = 0; i < NB_BUNDLE; i++) { data[i] = i + 1; factors[i] = NB_BUNDLE - i; } for (i = 0; i < NB_BUNDLE; i++) starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float)); FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for (i = 0; i < NB_BUNDLE; i++) { starpu_task_bundle_create(&bundles[i]); for (j = 0; j < NB_ITERATION; j++) { task[j] = starpu_task_create(); task[j]->cl = &codelet; task[j]->cl_arg = &factors[i]; task[j]->cl_arg_size = sizeof(float); task[j]->handles[0] = handles[i]; ret = starpu_task_bundle_insert(bundles[i], task[j]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_remove"); for (j = 0; j < NB_ITERATION; j++) { ret = starpu_task_submit(task[j]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_bundle_close(bundles[i]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); for(i = 0; i < NB_BUNDLE ; i++) { ret = starpu_data_acquire(handles[i], STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for(i = 0; i < NB_BUNDLE ; i++) { starpu_data_release(handles[i]); starpu_data_unregister(handles[i]); } starpu_free(data); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; }
int dotest(struct starpu_disk_ops *ops, void *param) { double *A,*B,*C,*D,*E,*F; int ret; /* limit main ram to force to push in disk */ setenv("STARPU_LIMIT_CPU_MEM", RAM, 1); /* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */ struct starpu_conf conf; ret = starpu_conf_init(&conf); if (ret == -EINVAL) return EXIT_FAILURE; conf.ncuda = 0; conf.nopencl = 0; ret = starpu_init(&conf); if (ret == -ENODEV) goto enodev; /* register a disk */ int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK); /* can't write on /tmp/ */ if (new_dd == -ENOENT) goto enoent; /* allocate two memory spaces */ starpu_malloc_flags((void **)&A, NX*sizeof(double), STARPU_MALLOC_COUNT); starpu_malloc_flags((void **)&F, NX*sizeof(double), STARPU_MALLOC_COUNT); FPRINTF(stderr, "TEST DISK MEMORY \n"); unsigned int j; /* initialization with bad values */ for(j = 0; j < NX; ++j) { A[j] = j; F[j] = -j; } starpu_data_handle_t vector_handleA, vector_handleB, vector_handleC, vector_handleD, vector_handleE, vector_handleF; /* register vector in starpu */ starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(double)); starpu_vector_data_register(&vector_handleB, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleC, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleD, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleE, -1, (uintptr_t) NULL, NX, sizeof(double)); starpu_vector_data_register(&vector_handleF, STARPU_MAIN_RAM, (uintptr_t)F, NX, sizeof(double)); /* copy vector A->B, B->C... */ starpu_data_cpy(vector_handleB, vector_handleA, 0, NULL, NULL); starpu_data_cpy(vector_handleC, vector_handleB, 0, NULL, NULL); starpu_data_cpy(vector_handleD, vector_handleC, 0, NULL, NULL); starpu_data_cpy(vector_handleE, vector_handleD, 0, NULL, NULL); starpu_data_cpy(vector_handleF, vector_handleE, 0, NULL, NULL); /* StarPU does not need to manipulate the array anymore so we can stop * monitoring it */ /* free them */ starpu_data_unregister(vector_handleA); starpu_data_unregister(vector_handleB); starpu_data_unregister(vector_handleC); starpu_data_unregister(vector_handleD); starpu_data_unregister(vector_handleE); starpu_data_unregister(vector_handleF); /* check if computation is correct */ int try = 1; for (j = 0; j < NX; ++j) if (A[j] != F[j]) { FPRINTF(stderr, "Fail A %f != F %f \n", A[j], F[j]); try = 0; }
static void unregister_handle(void) { starpu_data_unregister(handle); }
int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ int b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); } int iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire(v_handle[b], STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free(buffer[b]); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret; /* Not supported yet */ if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0) return 77; ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/reductions/dot_product_opencl_kernels.cl", &_opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif #ifdef STARPU_USE_CUDA /* cublasSdot has synchronization issues when using a non-blocking stream */ cublasGetVersion(&cublas_version); if (cublas_version >= 7050) starpu_cublas_init(); #endif unsigned long nelems = _nblocks*_entries_per_block; size_t size = nelems*sizeof(float); _x = (float *) malloc(size); _y = (float *) malloc(size); _x_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); _y_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t)); assert(_x && _y); starpu_srand48(0); DOT_TYPE reference_dot = 0.0; unsigned long i; for (i = 0; i < nelems; i++) { _x[i] = (float)starpu_drand48(); _y[i] = (float)starpu_drand48(); reference_dot += (DOT_TYPE)_x[i]*(DOT_TYPE)_y[i]; } unsigned block; for (block = 0; block < _nblocks; block++) { starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM, (uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float)); starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM, (uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float)); } starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE)); /* * Compute dot product with StarPU */ starpu_data_set_reduction_methods(_dot_handle, &redux_codelet, &init_codelet); for (block = 0; block < _nblocks; block++) { struct starpu_task *task = starpu_task_create(); task->cl = &dot_codelet; task->destroy = 1; task->handles[0] = _x_handles[block]; task->handles[1] = _y_handles[block]; task->handles[2] = _dot_handle; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_ASSERT(!ret); } for (block = 0; block < _nblocks; block++) { starpu_data_unregister(_x_handles[block]); starpu_data_unregister(_y_handles[block]); } starpu_data_unregister(_dot_handle); FPRINTF(stderr, "Reference : %e vs. %e (Delta %e)\n", reference_dot, _dot, reference_dot - _dot); #ifdef STARPU_USE_CUDA if (cublas_version >= 7050) starpu_cublas_shutdown(); #endif #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&_opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif starpu_shutdown(); free(_x); free(_y); free(_x_handles); free(_y_handles); if (fabs(reference_dot - _dot) < reference_dot * 1e-6) return EXIT_SUCCESS; else return EXIT_FAILURE; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return 77; }
int main(int argc, char **argv) { int ret, rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size<3) { FPRINTF(stderr, "We need more than 2 processes.\n"); MPI_Finalize(); return STARPU_TEST_SKIPPED; } ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(NULL, NULL, 0); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); if (rank == 0) { int n; for(n=1 ; n<size ; n++) { int i, var[2]; MPI_Status status[3]; starpu_data_handle_t handle[2]; FPRINTF_MPI(stderr, "receiving from node %d\n", n); for(i=0 ; i<2 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[0]); starpu_data_acquire(handle[0], STARPU_R); STARPU_ASSERT_MSG(var[0] == n, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> from node %d\n", var[0], n); starpu_data_release(handle[0]); starpu_mpi_recv(handle[0], n, 42, MPI_COMM_WORLD, &status[1]); starpu_mpi_recv(handle[1], n, 44, MPI_COMM_WORLD, &status[2]); for(i=0 ; i<2 ; i++) starpu_data_acquire(handle[i], STARPU_R); STARPU_ASSERT_MSG(var[0] == n*2, "Received incorrect value <%d> from node <%d>\n", var[0], n); STARPU_ASSERT_MSG(var[1] == n*4, "Received incorrect value <%d> from node <%d>\n", var[0], n); FPRINTF_MPI(stderr, "received <%d> and <%d> from node %d\n", var[0], var[1], n); for(i=0 ; i<2 ; i++) starpu_data_release(handle[i]); for(i=0 ; i<2 ; i++) starpu_data_unregister(handle[i]); } } else { int i, var[3]; starpu_data_handle_t handle[3]; FPRINTF_MPI(stderr, "sending to node %d\n", 0); var[0] = rank; var[1] = var[0] * 2; var[2] = var[0] * 4; for(i=0 ; i<3 ; i++) starpu_variable_data_register(&handle[i], STARPU_MAIN_RAM, (uintptr_t)&var[i], sizeof(var[i])); starpu_mpi_send(handle[0], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[1], 0, 42, MPI_COMM_WORLD); starpu_mpi_send(handle[2], 0, 44, MPI_COMM_WORLD); for(i=0 ; i<3 ; i++) starpu_data_unregister(handle[i]); } starpu_mpi_shutdown(); starpu_shutdown(); MPI_Finalize(); return 0; }