/* * Function: updo_trsm_starpu_common * * Parameters: * buffers - Data handlers : * 1 - L column block * 2 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. */ static inline void updo_trsm_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_trsm_data_t * args = (starpu_updo_trsm_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; char * transpose = &(args->transpose); char * diag = &(args->diag); PASTIX_INT colnbr = CBLK_COLNBR(cblknum); PASTIX_FLOAT fun = 1.0; ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: SOPALIN_TRSM("L","L",transpose,diag,colnbr,rhsnbr,fun,L,stride,RHS,rhssze); break; case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
void ds_kernel_cpu(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg) { uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]); const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]); uint8_t *output = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[1]); const unsigned output_ld = STARPU_MATRIX_GET_LD(descr[1]); const unsigned ncols = STARPU_MATRIX_GET_NX(descr[0]); const unsigned nlines = STARPU_MATRIX_GET_NY(descr[0]); unsigned line, col; for (line = 0; line < nlines; line+=FACTOR) for (col = 0; col < ncols; col+=FACTOR) { unsigned sum = 0; unsigned lline, lcol; for (lline = 0; lline < FACTOR; lline++) for (lcol = 0; lcol < FACTOR; lcol++) { unsigned in_index = (lcol + col) + (lline + line)*input_ld; sum += input[in_index]; } unsigned out_index = (col / FACTOR) + (line / FACTOR)*output_ld; output[out_index] = (uint8_t)(sum/(FACTOR*FACTOR)); } }
static void strmm_cuda(void *descr[], void *args) { float *a = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *b = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *c = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned w = STARPU_MATRIX_GET_NY(descr[0]); unsigned h = STARPU_MATRIX_GET_NX(descr[1]); unsigned lda = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldb = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldc = STARPU_MATRIX_GET_LD(descr[2]); struct strmm_arg * arg = (struct strmm_arg *)args; cublasSideMode_t side = arg->side ? CUBLAS_SIDE_RIGHT : CUBLAS_SIDE_LEFT; cublasFillMode_t uplo = arg->uplo ? CUBLAS_FILL_MODE_LOWER : CUBLAS_FILL_MODE_UPPER; cublasDiagType_t diag = arg->unit ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT; cublasOperation_t trans = CUBLAS_OP_T; const float factor = 1.0f; cublasSetStream(cublas_handle, starpu_cuda_get_local_stream()); cublasStrmm(cublas_handle, side, uplo, trans, diag, w, h, &factor, a, lda, b, ldb, c, ldc); cudaStreamSynchronize(starpu_cuda_get_local_stream()); free(arg); }
static inline void common_block_spmv(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) { /* printf("22\n"); */ float *block = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *in = (float *)STARPU_VECTOR_GET_PTR(descr[1]); float *out = (float *)STARPU_VECTOR_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[0]); unsigned dy = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld = STARPU_MATRIX_GET_LD(descr[0]); switch (s) { case 0: cblas_sgemv(CblasRowMajor, CblasNoTrans, dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1); break; #ifdef STARPU_USE_CUDA case 1: cublasSgemv ('t', dx, dy, 1.0f, block, ld, in, 1, 1.0f, out, 1); break; #endif default: STARPU_ABORT(); break; } }
static void cublas_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg) { TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); CUBLAS_GEMM('n', 'n', nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); }
static void sgemm_cpu(void *descr[], void *args) { float *a = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *b = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *c = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned w = STARPU_MATRIX_GET_NY(descr[2]); unsigned h = STARPU_MATRIX_GET_NX(descr[2]); unsigned ks = STARPU_MATRIX_GET_NY(descr[0]); unsigned lda = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldb = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldc = STARPU_MATRIX_GET_LD(descr[2]); struct sgemm_arg * arg = (struct sgemm_arg*)args; sgemm_("N", "N", (int*)&h, (int*)&w, (int*)&ks, &arg->alpha, a, (int*)&lda, b, (int*)&ldb, &arg->beta, c, (int*)&ldc); free(arg); }
static void getri(void *descr[], int type) { // Computes the Echelon form of A, stores the corresponding inverted multiples // in the lower part // ------------------------------ // | A | | | | | | | // |----------------------------| // | | | | | | | | // |----------------------------| // | | | | | | | | // |----------------------------| // | | | | | | | | // ------------------------------ unsigned int i, j, k; unsigned int *sub_a = (unsigned int *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned int x_dim = STARPU_MATRIX_GET_NX(descr[0]); unsigned int y_dim = STARPU_MATRIX_GET_NY(descr[0]); unsigned int offset_a = STARPU_MATRIX_GET_OFFSET(descr[0]); unsigned int ld_a = STARPU_MATRIX_GET_LD(descr[0]); unsigned int mult = 0; #if DEBUG0 printf("\n --- GETRI ---\n"); #endif #if DEBUG printf("x_dim = %u\n", x_dim); printf("y_dim = %u\n", y_dim); printf("ld_a = %u\n", ld_a); #endif for (i = 0; i < y_dim; ++i) { // compute inverse neg_inv_piv[i+offset_a] = negInverseModP(sub_a[i+i*ld_a], prime); #if DEBUG printf("sub_a[%u] = %u\n", i+i*ld_a,sub_a[i+i*ld_a]); printf("inv = %u\n", neg_inv_piv[i+offset_a]); #endif for (j = i+1; j < x_dim; ++j) { // multiply by corresponding coeff mult = (neg_inv_piv[i+offset_a] * sub_a[i+j*ld_a]); //% prime; #if DEBUG printf("sub_a[%u] = %u\n", i+j*ld_a,sub_a[i+j*ld_a]); printf("mult = %u\n", mult); #endif sub_a[i+j*ld_a] = mult; for (k = i+1; k < y_dim; ++k) { sub_a[k+j*ld_a] += (sub_a[k+i*ld_a] * mult); //sub_a[k+j*ld_a] %= prime; } } } #if DEBUG0 printf("\n --- GETRI DONE ---\n"); printf("TASKS READY %d\n", starpu_task_nready()); printf("TASKS SUBMITTED %d\n", starpu_task_nsubmitted()); #endif }
static inline void dw_common_codelet_update_u11(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) { float *sub11; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); unsigned long nx = STARPU_MATRIX_GET_NX(descr[0]); unsigned long ld = STARPU_MATRIX_GET_LD(descr[0]); unsigned long z; switch (s) { case 0: for (z = 0; z < nx; z++) { float pivot; pivot = sub11[z+z*ld]; STARPU_ASSERT(pivot != 0.0f); STARPU_SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld); STARPU_SGER(nx - z - 1, nx - z - 1, -1.0f, &sub11[z+(z+1)*ld], ld, &sub11[(z+1)+z*ld], 1, &sub11[(z+1) + (z+1)*ld],ld); } break; #ifdef STARPU_USE_CUDA case 1: for (z = 0; z < nx; z++) { float pivot; cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream()); cudaStreamSynchronize(starpu_cuda_get_local_stream()); STARPU_ASSERT(pivot != 0.0f); cublasSscal(nx - z - 1, 1.0f/pivot, &sub11[z+(z+1)*ld], ld); cublasSger(nx - z - 1, nx - z - 1, -1.0f, &sub11[z+(z+1)*ld], ld, &sub11[(z+1)+z*ld], 1, &sub11[(z+1) + (z+1)*ld],ld); } cudaStreamSynchronize(starpu_cuda_get_local_stream()); break; #endif default: STARPU_ABORT(); break; } }
static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) { float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus status; #endif switch (s) { case 0: STARPU_SGEMM("N", "N", dy, dx, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); break; #ifdef STARPU_USE_CUDA case 1: cublasSgemm('n', 'n', dx, dy, dz, -1.0f, left, ld21, right, ld12, 1.0f, center, ld22); status = cublasGetError(); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } }
static void sgemm_cuda(void *descr[], void *args) { float *a = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *b = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *c = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned w = STARPU_MATRIX_GET_NY(descr[2]); unsigned h = STARPU_MATRIX_GET_NX(descr[2]); unsigned k = STARPU_MATRIX_GET_NY(descr[0]); unsigned lda = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldb = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldc = STARPU_MATRIX_GET_LD(descr[2]); struct sgemm_arg * arg = (struct sgemm_arg*)args; cublasSetStream(cublas_handle, starpu_cuda_get_local_stream()); cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, h, w, k, &arg->alpha, a, lda, b, ldb, &arg->beta, c, ldc); cudaStreamSynchronize(starpu_cuda_get_local_stream()); free(arg); }
static inline void dw_common_codelet_update_u12(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) { float *sub11; float *sub12; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub12 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx12 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny12 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus status; #endif /* solve L11 U12 = A12 (find U12) */ switch (s) { case 0: STARPU_STRSM("L", "L", "N", "N", nx12, ny12, 1.0f, sub11, ld11, sub12, ld12); break; #ifdef STARPU_USE_CUDA case 1: cublasStrsm('L', 'L', 'N', 'N', ny12, nx12, 1.0f, sub11, ld11, sub12, ld12); status = cublasGetError(); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } }
/* * Function: updo_diag_starpu_common * * Divide the right-hand-side(s) by the diagonal * * Parameters: * buffers - Data handlers : * 0 - L column block * 1 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. * arch - Type of architecture : ARCH_CPU | ARCH_CUDA */ static inline void updo_diag_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_diag_data_t * args = (starpu_updo_diag_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; PASTIX_INT colnbr = CBLK_COLNBR(cblknum); ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: { PASTIX_INT i, j; PASTIX_FLOAT * myRHS = RHS; for (j = 0; j < rhsnbr; j++) { for (i = 0; i < colnbr; i++) { myRHS[i] /= L[i*(stride+1)]; } myRHS += rhssze; } break; } case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
/* * Function: updo_up_gemm_starpu_common * * Parameters: * buffers - Data handlers : * 1 - L column block * 2 - Right-hand-side block facing the column block. * _args - Codelet arguments: * sopalin_data - global PaStiX internal data. * cblknum - Current column block index. */ static inline void updo_up_gemm_starpu_common(void * buffers[], void * _args, int arch) { starpu_updo_gemm_data_t * args = (starpu_updo_gemm_data_t*)_args; Sopalin_Data_t * sopalin_data = args->sopalin_data; SolverMatrix * datacode = sopalin_data->datacode; PASTIX_FLOAT * L = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[0]); PASTIX_FLOAT * RHS = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[1]); PASTIX_FLOAT * RHS2 = (PASTIX_FLOAT*)STARPU_MATRIX_GET_PTR(buffers[2]); PASTIX_INT stride = STARPU_MATRIX_GET_LD(buffers[0]); PASTIX_INT rhsnbr = STARPU_MATRIX_GET_NY(buffers[1]); PASTIX_INT rhssze = STARPU_MATRIX_GET_LD(buffers[1]); PASTIX_INT cblknum = args->cblknum; PASTIX_INT bloknum = args->bloknum; char * transpose = &(args->transpose); PASTIX_INT fcblknum = SYMB_CBLKNUM(bloknum); PASTIX_INT colnbr = CBLK_COLNBR(cblknum); PASTIX_INT rownbr = BLOK_ROWNBR(bloknum); PASTIX_FLOAT fun = 1.0; PASTIX_FLOAT * ga = L + SOLV_COEFIND(bloknum); PASTIX_FLOAT * gc = RHS2 + SYMB_FROWNUM(bloknum) - SYMB_FCOLNUM(fcblknum); ASSERTDBG(UPDOWN_SM2XNBR == rhsnbr, MOD_SOPALIN); ASSERTDBG(UPDOWN_SM2XSZE == rhssze, MOD_SOPALIN); switch(arch) { case ARCH_CPU: SOPALIN_GEMM(transpose,"N",colnbr,rhsnbr,rownbr,-fun,ga,stride, gc,rhssze,fun,RHS, UPDOWN_SM2XSZE); break; case ARCH_CUDA: default: errorPrint("Unknown Architecture"); assert(0); break; } }
static inline void dw_common_codelet_update_u21(void *descr[], int s, STARPU_ATTRIBUTE_UNUSED void *_args) { float *sub11; float *sub21; sub11 = (float *)STARPU_MATRIX_GET_PTR(descr[0]); sub21 = (float *)STARPU_MATRIX_GET_PTR(descr[1]); unsigned ld11 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned nx21 = STARPU_MATRIX_GET_NX(descr[1]); unsigned ny21 = STARPU_MATRIX_GET_NY(descr[1]); #ifdef STARPU_USE_CUDA cublasStatus status; #endif switch (s) { case 0: STARPU_STRSM("R", "U", "N", "U", nx21, ny21, 1.0f, sub11, ld11, sub21, ld21); break; #ifdef STARPU_USE_CUDA case 1: cublasStrsm('R', 'U', 'N', 'U', ny21, nx21, 1.0f, sub11, ld11, sub21, ld21); status = cublasGetError(); if (status != CUBLAS_STATUS_SUCCESS) STARPU_CUBLAS_REPORT_ERROR(status); break; #endif default: STARPU_ABORT(); break; } }
static void cpu_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED void *arg) { TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]); TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]); TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]); unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]); unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]); unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]); unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]); unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]); int worker_size = starpu_combined_worker_get_size(); if (worker_size == 1) { /* Sequential CPU task */ CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC); } else { /* Parallel CPU task */ unsigned rank = starpu_combined_worker_get_rank(); unsigned block_size = (nyC + worker_size - 1)/worker_size; unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank; STARPU_ASSERT(nyC = STARPU_MATRIX_GET_NY(descr[1])); TYPE *new_subB = &subB[block_size*rank]; TYPE *new_subC = &subC[block_size*rank]; CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC); } }
{ char name[32]; starpu_worker_get_name(worker, name, 32); fprintf(stderr, "\t\t%s -> %d / %d (%2.2f %%)\n", name, count_22_per_worker[worker], count_22_total, (100.0*count_22_per_worker[worker])/count_22_total); } } } /* * U22 */ static inline void dw_common_cpu_codelet_update_u22(void *descr[], int s, __attribute__((unused)) void *_args) { float *left = (float *)STARPU_MATRIX_GET_PTR(descr[0]); float *right = (float *)STARPU_MATRIX_GET_PTR(descr[1]); float *center = (float *)STARPU_MATRIX_GET_PTR(descr[2]); unsigned dx = STARPU_MATRIX_GET_NX(descr[2]); unsigned dy = STARPU_MATRIX_GET_NY(descr[2]); unsigned dz = STARPU_MATRIX_GET_NY(descr[0]); unsigned ld12 = STARPU_MATRIX_GET_LD(descr[0]); unsigned ld21 = STARPU_MATRIX_GET_LD(descr[1]); unsigned ld22 = STARPU_MATRIX_GET_LD(descr[2]); #ifdef STARPU_USE_CUDA cublasStatus status; #endif
for (i=0;i<xdim;i++) { C[j+i*ydim]=(float)(0); } } } //Cpu function static void cpu_mult(void *descr[], __attribute__((unused)) void *arg) { float *subA, *subB, *subC; uint32_t nxC, nyC, nyA; uint32_t ldA, ldB, ldC; //The below function would return pointer to the block subA = (float *)STARPU_MATRIX_GET_PTR(descr[0]); subB = (float *)STARPU_MATRIX_GET_PTR(descr[1]); subC = (float *)STARPU_MATRIX_GET_PTR(descr[2]); //nx = return the number of elements in x -axis of the handle //ny = return the number of elements in y- axis of the handle nxC = STARPU_MATRIX_GET_NX(descr[2]); nyC = STARPU_MATRIX_GET_NY(descr[2]); nyA = STARPU_MATRIX_GET_NY(descr[0]); //LD returns the number of elements in each row of matrix //It will be useful in calculating the offset to read and write in memory address ldA = STARPU_MATRIX_GET_LD(descr[0]); ldB = STARPU_MATRIX_GET_LD(descr[1]); ldC = STARPU_MATRIX_GET_LD(descr[2]);
#define NX 6 #define NY 6 #define PARTS 2 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void matrix_fill(void *buffers[], void *cl_arg STARPU_ATTRIBUTE_UNUSED) { unsigned i, j; /* length of the matrix */ unsigned nx = STARPU_MATRIX_GET_NX(buffers[0]); unsigned ny = STARPU_MATRIX_GET_NY(buffers[0]); unsigned ld = STARPU_MATRIX_GET_LD(buffers[0]); int *val = (int *)STARPU_MATRIX_GET_PTR(buffers[0]); for(j=0; j<ny ; j++) { for(i=0; i<nx ; i++) val[(j*ld)+i] = i+100*j; } } struct starpu_codelet cl_fill = { .cpu_funcs = {matrix_fill}, .cpu_funcs_name = {"matrix_fill"}, .nbuffers = 1, .modes = {STARPU_W}, .name = "matrix_fill"
static unsigned int parts = 2; static unsigned xdim = 3; static unsigned ydim = 3; static int no_of_iteration=1; static void cpu_jacobi(void *descr[], __attribute__((unused)) void *arg) { float *subB; uint32_t nxB, nyB; uint32_t ldB; float east,west,north,south; uint32_t offset,dimension,offset_row_shift; subB = (float *)STARPU_MATRIX_GET_PTR(descr[0]); nxB = STARPU_MATRIX_GET_NX(descr[0]); nyB = STARPU_MATRIX_GET_NY(descr[0]); ldB = STARPU_MATRIX_GET_LD(descr[0]); unsigned i,j; unsigned int x,y; offset=STARPU_MATRIX_GET_OFFSET(descr[0]); //Based on offset we need to calculate the location of north,south,east and west in matrix A. offset_row_shift=offset; for(i=0;i<nxB;i++) { for(j=0;j<nyB;j++) {