void sparselu_seq_call(float **BENCH) { int ii; int jj; int kk; for (kk = 0; kk < bots_arg_size; kk++) { lu0((BENCH[(kk * bots_arg_size) + kk])); for (jj = (kk + 1); jj < bots_arg_size; jj++) if ((BENCH[(kk * bots_arg_size) + jj]) != ((0L))) { fwd((BENCH[(kk * bots_arg_size) + kk]),(BENCH[(kk * bots_arg_size) + jj])); } for (ii = (kk + 1); ii < bots_arg_size; ii++) if ((BENCH[(ii * bots_arg_size) + kk]) != ((0L))) { bdiv((BENCH[(kk * bots_arg_size) + kk]),(BENCH[(ii * bots_arg_size) + kk])); } for (ii = (kk + 1); ii < bots_arg_size; ii++) if ((BENCH[(ii * bots_arg_size) + kk]) != ((0L))) for (jj = (kk + 1); jj < bots_arg_size; jj++) if ((BENCH[(kk * bots_arg_size) + jj]) != ((0L))) { if ((BENCH[(ii * bots_arg_size) + jj]) == ((0L))) BENCH[(ii * bots_arg_size) + jj] = allocate_clean_block(); bmod((BENCH[(ii * bots_arg_size) + kk]),(BENCH[(kk * bots_arg_size) + jj]),(BENCH[(ii * bots_arg_size) + jj])); } } }
void lu_dependencies( double* M[NB][NB] ) { float t_start,t_end; float time; t_start=mysecond(); int ii, jj, kk; for (kk=0; kk<NB; kk++) { { double *diag = M[kk][kk]; #pragma omp task depend(inout: [BSIZE][BSIZE]diag) lu0(diag); } for (jj=kk+1; jj<NB; jj++) if (M[kk][jj] != NULL) { double *diag = M[kk][kk]; double *col = M[kk][jj]; #pragma omp task depend(in: [BSIZE][BSIZE]diag) depend(inout: [BSIZE][BSIZE]col) fwd(diag, col); } for (ii=kk+1; ii<NB; ii++) { if (M[ii][kk] != NULL) { { double *row = M[kk][kk]; double *diag = M[ii][kk]; #pragma omp task depend(in: [BSIZE][BSIZE]diag) depend(inout: [BSIZE][BSIZE]row) bdiv (diag, row); } for (jj=kk+1; jj<NB; jj++) { if (M[kk][jj] != NULL) { if (M[ii][jj]==NULL) M[ii][jj]=allocate_clean_block(); { double *row = M[ii][kk]; double *col = M[kk][jj]; double *inner = M[ii][jj]; #pragma omp task depend(in: [BSIZE][BSIZE]row, [BSIZE][BSIZE]col) depend(inout: [BSIZE][BSIZE]inner) bmod(row, col, inner); } } } } } } #pragma omp taskwait t_end=mysecond(); time = t_end-t_start; printf("Dependencies time to compute = %f usec\n", time); }
static void OUT__1__1527__(void *__out_argv) { float ***BENCH = (float ***)(((struct OUT__1__1527___data *)__out_argv) -> OUT__1__1527___data::BENCH_p); int ii = (int )(((struct OUT__1__1527___data *)__out_argv) -> OUT__1__1527___data::ii); int jj = (int )(((struct OUT__1__1527___data *)__out_argv) -> OUT__1__1527___data::jj); int kk = (int )(((struct OUT__1__1527___data *)__out_argv) -> OUT__1__1527___data::kk); int _p_ii = ii; int _p_jj = jj; int _p_kk = kk; if ((( *BENCH)[(_p_ii * bots_arg_size) + _p_jj]) == ((0L))) ( *BENCH)[(_p_ii * bots_arg_size) + _p_jj] = allocate_clean_block(); bmod((( *BENCH)[(_p_ii * bots_arg_size) + _p_kk]),(( *BENCH)[(_p_kk * bots_arg_size) + _p_jj]),(( *BENCH)[(_p_ii * bots_arg_size) + _p_jj])); }
void lu_serial( double* M[NB][NB] ) { float t_start,t_end; float time; t_start= mysecond(); int ii, jj, kk; for (kk=0; kk<NB; kk++) { { double *diag = M[kk][kk]; lu0(diag); } for (jj=kk+1; jj<NB; jj++) if (M[kk][jj] != NULL) { double *diag = M[kk][kk]; double *col = M[kk][jj]; fwd(diag, col); } for (ii=kk+1; ii<NB; ii++) { if (M[ii][kk] != NULL) { { double *row = M[kk][kk]; double *diag = M[ii][kk]; bdiv (diag, row); } for (jj=kk+1; jj<NB; jj++) { if (M[kk][jj] != NULL) { if (M[ii][jj]==NULL) M[ii][jj]=allocate_clean_block(); { double *row = M[ii][kk]; double *col = M[kk][jj]; double *inner = M[ii][jj]; bmod(row, col, inner); } } } } } } t_end=mysecond(); time = t_end-t_start; printf("Serial time to compute = %f usec\n", time); }
void sparselu_par_call(float **BENCH, int matrix_size, int submatrix_size) { int ii, jj, kk; #pragma omp parallel private(kk,ii,jj) shared(BENCH) #pragma omp single /* nowait */ { /*#pragma omp task untied*/ for (kk=0; kk<matrix_size; kk++) { #pragma omp task firstprivate(kk) shared(BENCH) depend(inout: BENCH[kk*matrix_size+kk:submatrix_size*submatrix_size]) lu0(BENCH[kk*matrix_size+kk], submatrix_size); for (jj=kk+1; jj<matrix_size; jj++) if (BENCH[kk*matrix_size+jj] != NULL) { #pragma omp task firstprivate(kk, jj) shared(BENCH) depend(in: BENCH[kk*matrix_size+kk:submatrix_size*submatrix_size]) depend(inout: BENCH[kk*matrix_size+jj:submatrix_size*submatrix_size]) fwd(BENCH[kk*matrix_size+kk], BENCH[kk*matrix_size+jj], submatrix_size); } for (ii=kk+1; ii<matrix_size; ii++) if (BENCH[ii*matrix_size+kk] != NULL) { #pragma omp task firstprivate(kk, ii) shared(BENCH) depend(in: BENCH[kk*matrix_size+kk:submatrix_size*submatrix_size]) depend(inout: BENCH[ii*matrix_size+kk:submatrix_size*submatrix_size]) bdiv (BENCH[kk*matrix_size+kk], BENCH[ii*matrix_size+kk], submatrix_size); } for (ii=kk+1; ii<matrix_size; ii++) if (BENCH[ii*matrix_size+kk] != NULL) for (jj=kk+1; jj<matrix_size; jj++) if (BENCH[kk*matrix_size+jj] != NULL) { if (BENCH[ii*matrix_size+jj]==NULL) BENCH[ii*matrix_size+jj] = allocate_clean_block(submatrix_size); #pragma omp task firstprivate(kk, jj, ii) shared(BENCH) \ depend(in: BENCH[ii*matrix_size+kk:submatrix_size*submatrix_size], BENCH[kk*matrix_size+jj:submatrix_size*submatrix_size]) \ depend(inout: BENCH[ii*matrix_size+jj:submatrix_size*submatrix_size]) bmod(BENCH[ii*matrix_size+kk], BENCH[kk*matrix_size+jj], BENCH[ii*matrix_size+jj], submatrix_size); } } #pragma omp taskwait } }
void sparselu_par_call(float **BENCH) { int ii, jj, kk; bots_message("Computing SparseLU Factorization (%dx%d matrix with %dx%d blocks) ", bots_arg_size,bots_arg_size,bots_arg_size_1,bots_arg_size_1); #pragma omp parallel private(kk) { for (kk=0; kk<bots_arg_size; kk++) { #pragma omp single lu0(BENCH[kk*bots_arg_size+kk]); #pragma omp for nowait for (jj=kk+1; jj<bots_arg_size; jj++) if (BENCH[kk*bots_arg_size+jj] != NULL) #pragma omp task untied firstprivate(kk, jj) shared(BENCH) { fwd(BENCH[kk*bots_arg_size+kk], BENCH[kk*bots_arg_size+jj]); } #pragma omp for for (ii=kk+1; ii<bots_arg_size; ii++) if (BENCH[ii*bots_arg_size+kk] != NULL) #pragma omp task untied firstprivate(kk, ii) shared(BENCH) { bdiv (BENCH[kk*bots_arg_size+kk], BENCH[ii*bots_arg_size+kk]); } #pragma omp for private(jj) for (ii=kk+1; ii<bots_arg_size; ii++) if (BENCH[ii*bots_arg_size+kk] != NULL) for (jj=kk+1; jj<bots_arg_size; jj++) if (BENCH[kk*bots_arg_size+jj] != NULL) #pragma omp task untied firstprivate(kk, jj, ii) shared(BENCH) { if (BENCH[ii*bots_arg_size+jj]==NULL) BENCH[ii*bots_arg_size+jj] = allocate_clean_block(); bmod(BENCH[ii*bots_arg_size+kk], BENCH[kk*bots_arg_size+jj], BENCH[ii*bots_arg_size+jj]); } } } bots_message(" completed!\n"); }
void sparselu_seq_call(float **BENCH) { int ii, jj, kk; for (kk=0; kk<bots_arg_size; kk++) { lu0(BENCH[kk*bots_arg_size+kk]); for (jj=kk+1; jj<bots_arg_size; jj++) if (BENCH[kk*bots_arg_size+jj] != NULL) { fwd(BENCH[kk*bots_arg_size+kk], BENCH[kk*bots_arg_size+jj]); } for (ii=kk+1; ii<bots_arg_size; ii++) if (BENCH[ii*bots_arg_size+kk] != NULL) { bdiv (BENCH[kk*bots_arg_size+kk], BENCH[ii*bots_arg_size+kk]); } for (ii=kk+1; ii<bots_arg_size; ii++) if (BENCH[ii*bots_arg_size+kk] != NULL) for (jj=kk+1; jj<bots_arg_size; jj++) if (BENCH[kk*bots_arg_size+jj] != NULL) { if (BENCH[ii*bots_arg_size+jj]==NULL) BENCH[ii*bots_arg_size+jj] = allocate_clean_block(); bmod(BENCH[ii*bots_arg_size+kk], BENCH[kk*bots_arg_size+jj], BENCH[ii*bots_arg_size+jj]); } } }