int main(int argc, char **argv) { int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_malloc((void**)&data, sizeof(*data)); *data = 42; /* register a piece of data */ starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)data, 1, sizeof(unsigned)); struct starpu_task *task = starpu_task_create(); task->cl = &wrong_codelet; task->handles[0] = handle; task->use_tag = 1; task->tag_id = TAG; task->callback_func = wrong_callback; task->detach = 0; ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); ret = starpu_tag_wait(TAG); STARPU_CHECK_RETURN_VALUE(ret, "starpu_tag_wait"); /* This call is valid as it is done by the application outside a * callback */ ret = starpu_data_acquire(handle, STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); starpu_data_release(handle); ret = starpu_task_wait(task); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait"); starpu_data_unregister(handle); starpu_free(data); starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
static void shutdown_system(float **matA, unsigned pinned) { if (pinned) { starpu_free(*matA); } else { free(*matA); } starpu_cublas_shutdown(); starpu_shutdown(); }
void matrix_free(float ****bmat, int rank, int nodes, int alloc_everywhere) { unsigned x, y; for(x=0 ; x<nblocks ; x++) { for(y=0 ; y<nblocks ; y++) { int mpi_rank = my_distrib(x, y, nodes); if (alloc_everywhere || (mpi_rank == rank)) { starpu_free((void *)(*bmat)[x][y]); } } free((*bmat)[x]); } free(*bmat); }
int main(int argc, char **argv) { int i, j, ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_initialize"); float *data; starpu_malloc((void**)&data, sizeof(*data) * NB_BUNDLE); float factors[NB_BUNDLE]; starpu_data_handle_t handles[NB_BUNDLE]; struct starpu_task *task[NB_ITERATION]; starpu_task_bundle_t bundles[NB_BUNDLE]; for (i = 0; i < NB_BUNDLE; i++) { data[i] = i + 1; factors[i] = NB_BUNDLE - i; } for (i = 0; i < NB_BUNDLE; i++) starpu_variable_data_register(&handles[i], STARPU_MAIN_RAM, (uintptr_t)&data[i], sizeof(float)); FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for (i = 0; i < NB_BUNDLE; i++) { starpu_task_bundle_create(&bundles[i]); for (j = 0; j < NB_ITERATION; j++) { task[j] = starpu_task_create(); task[j]->cl = &codelet; task[j]->cl_arg = &factors[i]; task[j]->cl_arg_size = sizeof(float); task[j]->handles[0] = handles[i]; ret = starpu_task_bundle_insert(bundles[i], task[j]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); } ret = starpu_task_bundle_remove(bundles[i], task[NB_ITERATION / 2]); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_remove"); for (j = 0; j < NB_ITERATION; j++) { ret = starpu_task_submit(task[j]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_bundle_close(bundles[i]); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); for(i = 0; i < NB_BUNDLE ; i++) { ret = starpu_data_acquire(handles[i], STARPU_R); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } FPRINTF(stderr, "VALUES:"); for (i = 0; i < NB_BUNDLE; i++) FPRINTF(stderr, " %f (%f)", data[i], factors[i]); FPRINTF(stderr, "\n"); for(i = 0; i < NB_BUNDLE ; i++) { starpu_data_release(handles[i]); starpu_data_unregister(handles[i]); } starpu_free(data); starpu_shutdown(); return EXIT_SUCCESS; enodev: starpu_shutdown(); fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret; #ifdef STARPU_QUICK_CHECK nbuffers /= 4; niter /= 4; vectorsize /= 8; #endif ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* Allocate all buffers and register them to StarPU */ int b; for (b = 0; b < nbuffers; b++) { ret = starpu_malloc((void **)&buffer[b], vectorsize); STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc"); starpu_vector_data_register(&v_handle[b], STARPU_MAIN_RAM, (uintptr_t)buffer[b], vectorsize, sizeof(char)); } int iter; for (iter = 0; iter < niter; iter++) { /* Use the buffers on the different workers so that it may not * be in main memory anymore */ for (b = 0; b < nbuffers; b++) { ret = use_handle(v_handle[b]); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); /* Grab the different pieces of data into main memory */ for (b = 0; b < nbuffers; b++) { ret = starpu_data_acquire(v_handle[b], STARPU_RW); STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire"); } /* Release them */ for (b = 0; b < nbuffers; b++) starpu_data_release(v_handle[b]); } /* do some cleanup */ for (b = 0; b < nbuffers; b++) { starpu_data_unregister(v_handle[b]); starpu_free(buffer[b]); } starpu_shutdown(); return EXIT_SUCCESS; enodev: fprintf(stderr, "WARNING: No one can execute this task\n"); /* yes, we do not perform the computation but we did detect that no one * could perform the kernel, so this is not an error from StarPU */ starpu_shutdown(); return STARPU_TEST_SKIPPED; }
int main(int argc, char **argv) { int ret, exit_value = 0; /* Initialize StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("examples/axpy/axpy_opencl_kernel.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif starpu_cublas_init(); /* This is equivalent to vec_a = malloc(N*sizeof(TYPE)); vec_b = malloc(N*sizeof(TYPE)); */ starpu_malloc((void **)&_vec_x, N*sizeof(TYPE)); assert(_vec_x); starpu_malloc((void **)&_vec_y, N*sizeof(TYPE)); assert(_vec_y); unsigned i; for (i = 0; i < N; i++) { _vec_x[i] = 1.0f; /*(TYPE)starpu_drand48(); */ _vec_y[i] = 4.0f; /*(TYPE)starpu_drand48(); */ } FPRINTF(stderr, "BEFORE x[0] = %2.2f\n", _vec_x[0]); FPRINTF(stderr, "BEFORE y[0] = %2.2f\n", _vec_y[0]); /* Declare the data to StarPU */ starpu_vector_data_register(&_handle_x, STARPU_MAIN_RAM, (uintptr_t)_vec_x, N, sizeof(TYPE)); starpu_vector_data_register(&_handle_y, STARPU_MAIN_RAM, (uintptr_t)_vec_y, N, sizeof(TYPE)); /* Divide the vector into blocks */ struct starpu_data_filter block_filter = { .filter_func = starpu_vector_filter_block, .nchildren = NBLOCKS }; starpu_data_partition(_handle_x, &block_filter); starpu_data_partition(_handle_y, &block_filter); double start; double end; start = starpu_timing_now(); unsigned b; for (b = 0; b < NBLOCKS; b++) { struct starpu_task *task = starpu_task_create(); task->cl = &axpy_cl; task->cl_arg = &_alpha; task->cl_arg_size = sizeof(_alpha); task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b); task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b); task->tag_id = b; ret = starpu_task_submit(task); if (ret == -ENODEV) { exit_value = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); enodev: starpu_data_unpartition(_handle_x, STARPU_MAIN_RAM); starpu_data_unpartition(_handle_y, STARPU_MAIN_RAM); starpu_data_unregister(_handle_x); starpu_data_unregister(_handle_y); end = starpu_timing_now(); double timing = end - start; FPRINTF(stderr, "timing -> %2.2f us %2.2f MB/s\n", timing, 3*N*sizeof(TYPE)/timing); FPRINTF(stderr, "AFTER y[0] = %2.2f (ALPHA = %2.2f)\n", _vec_y[0], _alpha); if (exit_value != 77) exit_value = check(); starpu_free((void *)_vec_x); starpu_free((void *)_vec_y); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl"); #endif /* Stop StarPU */ starpu_shutdown(); return exit_value; }
int main(int argc, char **argv) { double start, end; int ret; parse_args(argc, argv); #ifdef STARPU_QUICK_CHECK niter /= 10; #endif ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); starpu_cublas_init(); init_problem_data(); partition_mult_data(); if (bound) starpu_bound_start(0, 0); start = starpu_timing_now(); unsigned x, y, iter; for (iter = 0; iter < niter; iter++) { for (x = 0; x < nslicesx; x++) for (y = 0; y < nslicesy; y++) { struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = starpu_data_get_sub_data(A_handle, 1, y); task->handles[1] = starpu_data_get_sub_data(B_handle, 1, x); task->handles[2] = starpu_data_get_sub_data(C_handle, 2, x, y); task->flops = 2ULL * (xdim/nslicesx) * (ydim/nslicesy) * zdim; ret = starpu_task_submit(task); if (ret == -ENODEV) { ret = 77; goto enodev; } STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_task_wait_for_all(); } end = starpu_timing_now(); if (bound) starpu_bound_stop(); double timing = end - start; double min, min_int; double flops = 2.0*((unsigned long long)niter)*((unsigned long long)xdim) *((unsigned long long)ydim)*((unsigned long long)zdim); if (bound) starpu_bound_compute(&min, &min_int, 1); PRINTF("# x\ty\tz\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops\tTims\tTiGFlops"); PRINTF("\n"); PRINTF("%u\t%u\t%u\t%.0f\t%.1f", xdim, ydim, zdim, timing/niter/1000.0, flops/timing/1000.0); if (bound) PRINTF("\t%.0f\t%.1f\t%.0f\t%.1f", min, flops/min/1000000.0, min_int, flops/min_int/1000000.0); PRINTF("\n"); enodev: starpu_data_unpartition(C_handle, STARPU_MAIN_RAM); starpu_data_unpartition(B_handle, STARPU_MAIN_RAM); starpu_data_unpartition(A_handle, STARPU_MAIN_RAM); starpu_data_unregister(A_handle); starpu_data_unregister(B_handle); starpu_data_unregister(C_handle); if (check) check_output(); starpu_free(A); starpu_free(B); starpu_free(C); starpu_cublas_shutdown(); starpu_shutdown(); return ret; }
int main(int argc, char **argv) { int ret; unsigned part; double timing; double start, end; unsigned row, pos; unsigned ind; /* CSR matrix description */ float *nzval; uint32_t nnz; uint32_t *colind; uint32_t *rowptr; /* Input and Output vectors */ float *vector_in_ptr; float *vector_out_ptr; /* * Parse command-line arguments */ parse_args(argc, argv); /* * Launch StarPU */ ret = starpu_init(NULL); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); /* * Create a 3-band sparse matrix as input example */ nnz = 3*size-2; starpu_malloc((void **)&nzval, nnz*sizeof(float)); starpu_malloc((void **)&colind, nnz*sizeof(uint32_t)); starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t)); assert(nzval && colind && rowptr); /* fill the matrix */ for (row = 0, pos = 0; row < size; row++) { rowptr[row] = pos; if (row > 0) { nzval[pos] = 1.0f; colind[pos] = row-1; pos++; } nzval[pos] = 5.0f; colind[pos] = row; pos++; if (row < size - 1) { nzval[pos] = 1.0f; colind[pos] = row+1; pos++; } } STARPU_ASSERT(pos == nnz); rowptr[size] = nnz; /* initiate the 2 vectors */ starpu_malloc((void **)&vector_in_ptr, size*sizeof(float)); starpu_malloc((void **)&vector_out_ptr, size*sizeof(float)); assert(vector_in_ptr && vector_out_ptr); /* fill them */ for (ind = 0; ind < size; ind++) { vector_in_ptr[ind] = 2.0f; vector_out_ptr[ind] = 0.0f; } /* * Register the CSR matrix and the 2 vectors */ starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float)); starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float)); starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float)); /* * Partition the CSR matrix and the output vector */ csr_f.nchildren = nblocks; vector_f.nchildren = nblocks; starpu_data_partition(sparse_matrix, &csr_f); starpu_data_partition(vector_out, &vector_f); /* * If we use OpenCL, we need to compile the SpMV kernel */ #ifdef STARPU_USE_OPENCL compile_spmv_opencl_kernel(); #endif start = starpu_timing_now(); /* * Create and submit StarPU tasks */ for (part = 0; part < nblocks; part++) { struct starpu_task *task = starpu_task_create(); task->cl = &spmv_cl; task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part); task->handles[1] = vector_in; task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part); ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } starpu_task_wait_for_all(); end = starpu_timing_now(); /* * Unregister the CSR matrix and the output vector */ starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM); starpu_data_unpartition(vector_out, STARPU_MAIN_RAM); /* * Unregister data */ starpu_data_unregister(sparse_matrix); starpu_data_unregister(vector_in); starpu_data_unregister(vector_out); /* * Display the result */ for (row = 0; row < STARPU_MIN(size, 16); row++) { FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]); } starpu_free(nzval); starpu_free(colind); starpu_free(rowptr); starpu_free(vector_in_ptr); starpu_free(vector_out_ptr); /* * Stop StarPU */ starpu_shutdown(); timing = end - start; FPRINTF(stderr, "Computation took (in ms)\n"); FPRINTF(stdout, "%2.2f\n", timing/1000); return 0; }