/***************************************************************************//** * Parallel tile Cholesky factorization - static scheduling **/ void plasma_pdplgsy(plasma_context_t *plasma) { double bump; PLASMA_desc A; unsigned long long int seed; PLASMA_sequence *sequence; PLASMA_request *request; int m, n; int next_m; int next_n; int ldam; int tempmm, tempnn; plasma_unpack_args_5(bump, A, seed, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; n = 0; m = PLASMA_RANK; while (m >= A.mt) { n++; m = m - A.mt; } while ( n < A.nt ) { next_n = n; next_m = m; next_m += PLASMA_SIZE; while ( next_m >= A.mt && next_n < A.nt ) { next_n++; next_m = next_m - A.mt; } tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; ldam = BLKLDD(A, m); CORE_dplgsy( bump, tempmm, tempnn, A(m, n), ldam, A.m, m*A.mb, n*A.nb, seed ); m = next_m; n = next_n; } }
/***************************************************************************//** * Parallel tile Cholesky factorization - dynamic scheduling **/ void plasma_pdplgsy_quark( double bump, PLASMA_desc A, unsigned long long int seed) { int m, n; int ldam; int tempmm, tempnn; for (m = 0; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); for (n = 0; n < A.nt; n++) { tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; double *dA = A(m, n); #if defined(USE_OMPEXT) omp_set_task_affinity( (n%4)*6+(m%6) ); #endif hclib_pragma_marker("omp", "task depend(out:dA[0:ldam*tempnn])", "pragma44_omp_task"); CORE_dplgsy( bump, tempmm, tempnn, dA, ldam, A.m, m*A.mb, n*A.nb, seed ); } } }