PT_TREE_T ATL_Strmm ( const PT_LVL3_TYPE_T * PLVL3, const unsigned int NODE, const unsigned int NTHREADS, pthread_attr_t * ATTR, const int NB, const enum ATLAS_SIDE SIDE, const enum ATLAS_UPLO UPLO, const enum ATLAS_TRANS TRANS, const enum ATLAS_DIAG DIAG, const int M, const int N, const void * ALPHA, const void * A, const int LDA, void * B, const int LDB ) { /* * .. Local Variables .. */ #if 0 PT_TREE_T left, right, root = NULL; PT_TRMM_ARGS_T * a_mm = NULL; size_t size; unsigned int next = ( NODE << 1 ), nt1, nt2; int incA, incB, m1, m2, n1, n2; #else PT_TREE_T left, right, tree = NULL; PT_TRMM_ARGS_T * a_mm = NULL; double rm, rn; DIM_1DSPLIT_T splitM, splitN; size_t size; unsigned int next = ( NODE << 1 ), ntm1, ntm2, ntn1, ntn2; int incB, m1, m2, n1, n2; #endif /* .. * .. Executable Statements .. * */ if( NTHREADS < 1 ) return( tree ); size = PLVL3->size; if( SIDE == AtlasLeft ) { #if 0 if( M > N ) { ATL_gesplit( NTHREADS, M, NB, &nt1, &nt2, &m1, &m2 ); incA = m1 + m1 * LDA; incB = m1; root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG, m2, N, ALPHA, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, AtlasNoTrans, m2, N, m1, ALPHA, Mvpt3( A, m1, size ), LDA, B, LDB, PLVL3->one, Mvpt3( B, m1, size ), LDB ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->pttrmm( NTHREADS, ATTR, SIDE, UPLO, TRANS, DIAG, m1, N, ALPHA, A, LDA, B, LDB ); ATL_join_tree ( root ); ATL_free_tree ( root ); return( NULL ); } else { ATL_gesplit( NTHREADS, N, NB, &nt1, &nt2, &n1, &n2 ); left = ATL_Strmm( PLVL3, next+1, nt1, ATTR, NB, SIDE, UPLO, TRANS, DIAG, M, n1, ALPHA, A, LDA, B, LDB ); incB = n1 * LDB; right = ATL_Strmm( PLVL3, next+2, nt2, ATTR, NB, SIDE, UPLO, TRANS, DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB, size ), LDB ); } #else splitN = ATL_1dsplit( NTHREADS, N, NB, &ntn1, &ntn2, &n1, &n2, &rn ); if( splitN == Atlas1dSplit ) { left = ATL_Strmm( PLVL3, next+1, ntn1, ATTR, NB, SIDE, UPLO, TRANS, DIAG, M, n1, ALPHA, A, LDA, B, LDB ); incB = n1 * LDB; right = ATL_Strmm( PLVL3, next+2, ntn2, ATTR, NB, SIDE, UPLO, TRANS, DIAG, M, n2, ALPHA, A, LDA, Mvpt3( B, incB, size ), LDB ); tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else { a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) ); ATL_assert( a_mm != NULL ); a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG; a_mm->m = M; a_mm->n = N; a_mm->al = ALPHA; a_mm->a = A; a_mm->la = LDA; a_mm->b = B; a_mm->lb = LDB; tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0, (void *)(a_mm) ); } #endif } else { splitM = ATL_1dsplit( NTHREADS, M, NB, &ntm1, &ntm2, &m1, &m2, &rm ); if( splitM == Atlas1dSplit ) { left = ATL_Strmm( PLVL3, next+1, ntm1, ATTR, NB, SIDE, UPLO, TRANS, DIAG, m1, N, ALPHA, A, LDA, B, LDB ); incB = m1; right = ATL_Strmm( PLVL3, next+2, ntm2, ATTR, NB, SIDE, UPLO, TRANS, DIAG, m2, N, ALPHA, A, LDA, Mvpt3( B, incB, size ), LDB ); tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else { a_mm = (PT_TRMM_ARGS_T *)malloc( sizeof( PT_TRMM_ARGS_T ) ); ATL_assert( a_mm != NULL ); a_mm->si = SIDE; a_mm->up = UPLO; a_mm->tr = TRANS; a_mm->di = DIAG; a_mm->m = M; a_mm->n = N; a_mm->al = ALPHA; a_mm->a = A; a_mm->la = LDA; a_mm->b = B; a_mm->lb = LDB; tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PLVL3->trmm0, (void *)(a_mm) ); } } return( tree ); /* * End of ATL_Strmm */ }
PT_TREE_T ATL_Ssyr2k ( const PT_LVL3_TYPE_T * PLVL3, const unsigned int NODE, const unsigned int NTHREADS, pthread_attr_t * ATTR, const int NB, const enum ATLAS_UPLO UPLO, const enum ATLAS_TRANS TRANS, const enum ATLAS_TRANS TGEMM, const int M, const int N, const int K, const int L, const void * ALPHA, const void * ALPHC, const void * A, const int LDA, const void * B, const int LDB, const void * BETA, void * C, const int LDC ) { /* * .. Local Variables .. */ PT_TREE_T left, right, tree = NULL, root; PT_SYR2K_ARGS_T * a_syr2k = NULL; #if 1 double rn; DIM_1DSPLIT_T splitN; size_t size; unsigned int next = ( NODE << 1 ), nt1, nt2; int failed, incA, incB, incC, mnk1, mnk2; #else size_t size; DIM_TZSPLIT_T split = AtlasTzNoSplit; unsigned int next = ( NODE << 1 ), nt1, nt2; int failed, incA, incB, incC, mnk1, mnk2; #endif /* .. * .. Executable Statements .. * */ if( NTHREADS < 1 ) return( tree ); size = PLVL3->size; if( M < 0 || N < 0 ) return( tree ); /* temporary */ #if 1 if( TGEMM == AtlasTrans ) { failed = PLVL3->ptsyr2k0( NTHREADS, ATTR, UPLO, TRANS, TGEMM, K, L, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } else { failed = PLVL3->pther2k0( NTHREADS, ATTR, UPLO, TRANS, TGEMM, K, L, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } if( failed ) { splitN = ATL_1dsplit( NTHREADS, K, NB, &nt1, &nt2, &mnk1, &mnk2, &rn ); if( splitN == Atlas1dSplit ) { left = ATL_Ssyr2k( PLVL3, next+1, NTHREADS, ATTR, NB, UPLO, TRANS, TGEMM, 0, 0, mnk1, L, ALPHA, ALPHC, A, LDA, B, LDB, BETA, C, LDC ); if( UPLO == AtlasLower ) { if( TRANS == AtlasNoTrans ) { incA = mnk1; incB = mnk1; incC = mnk1; root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk2, mnk1, L, ALPHA, Mvpt3( A, incA, size ), LDA, B, LDB, BETA, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk2, mnk1, L, ALPHC, Mvpt3( B, incB, size ), LDB, A, LDA, PLVL3->one, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); incC += mnk1 * LDC; } else { incA = mnk1 * LDA; incB = mnk1 * LDB; incC = mnk1; root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk2, mnk1, L, ALPHA, Mvpt3( A, incA, size ), LDA, B, LDB, BETA, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk2, mnk1, L, ALPHC, Mvpt3( B, incB, size ), LDB, A, LDA, PLVL3->one, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); incC += mnk1 * LDC; } } else { if( TRANS == AtlasNoTrans ) { incA = mnk1; incB = mnk1; incC = mnk1 * LDC; root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk1, mnk2, L, ALPHA, A, LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->ptgemm( NTHREADS, ATTR, AtlasNoTrans, TGEMM, mnk1, mnk2, L, ALPHC, B, LDB, Mvpt3( A, incA, size ), LDA, PLVL3->one, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); incC += mnk1; } else { incA = mnk1 * LDA; incB = mnk1 * LDB; incC = mnk1 * LDC; root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk1, mnk2, L, ALPHA, A, LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); root = PLVL3->ptgemm( NTHREADS, ATTR, TGEMM, AtlasNoTrans, mnk1, mnk2, L, ALPHC, B, LDB, Mvpt3( A, incA, size ), LDA, PLVL3->one, Mvpt3( C, incC, size ), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); incC += mnk1; } } right = ATL_Ssyr2k( PLVL3, next+2, NTHREADS, ATTR, NB, UPLO, TRANS, TGEMM, 0, 0, mnk2, L, ALPHA, ALPHC, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); } else { a_syr2k = (PT_SYR2K_ARGS_T *)malloc( sizeof( PT_SYR2K_ARGS_T ) ); ATL_assert( a_syr2k != NULL ); a_syr2k->up = UPLO; a_syr2k->tr = TRANS; a_syr2k->m = 0; a_syr2k->n = 0; a_syr2k->k = K; a_syr2k->l = L; a_syr2k->al = ALPHA; a_syr2k->ac = ALPHA; a_syr2k->a = A; a_syr2k->la = LDA; a_syr2k->b = B; a_syr2k->lb = LDB; a_syr2k->be = BETA; a_syr2k->c = C; a_syr2k->lc = LDC; tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, ( TGEMM == AtlasTrans ? PLVL3->syr2k0 : PLVL3->her2k0 ), (void *)(a_syr2k) ); } } return( tree ); #else split = ATL_tzsplit( UPLO, NTHREADS, M, N, K, NB, &nt1, &nt2, &mnk1, &mnk2 ); if( split == AtlasSplitKrow ) { if( UPLO == AtlasLower ) { left = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM, 0, N, mnk1, L, ALPHA, ALPHC, A, LDA, B, LDB, BETA, C, LDC ); incA = ( TRANS == AtlasNoTrans ? mnk1 : 0 ); incB = ( TRANS == AtlasNoTrans ? mnk1 : 0 ); incC = mnk1; right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM, M, N + mnk1, mnk2, L, ALPHA, ALPHC, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); } else { left = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM, M, N + mnk2, mnk1, L, ALPHA, ALPHC, A, LDA, B, LDB, BETA, C, LDC ); incA = ( TRANS == AtlasNoTrans ? mnk1 : mnk1 * LDA ); incB = ( TRANS == AtlasNoTrans ? mnk1 : mnk1 * LDB ); incC = mnk1 * ( LDC + 1 ); right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM, 0, N, mnk2, L, ALPHA, ALPHC, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); } tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else if( split == AtlasSplitKcol ) { if( UPLO == AtlasLower ) { left = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM, M + mnk2, N, mnk1, L, ALPHA, ALPHC, A, LDA, B, LDB, BETA, C, LDC ); incA = ( TRANS == AtlasNoTrans ? mnk1 : ( N + mnk1 ) * LDA ); incB = ( TRANS == AtlasNoTrans ? mnk1 : ( N + mnk1 ) * LDB ); incC = mnk1 + ( N + mnk1 ) * LDC; right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM, M, 0, mnk2, L, ALPHA, ALPHC, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); } else { left = ATL_Ssyr2k( PLVL3, next+1, nt1, ATTR, NB, UPLO, TRANS, TGEMM, M, 0, mnk1, L, ALPHA, ALPHC, A, LDA, B, LDB, BETA, C, LDC ); incA = ( TRANS == AtlasNoTrans ? 0 : mnk1 * LDA ); incB = ( TRANS == AtlasNoTrans ? 0 : mnk1 * LDB ); incC = mnk1 * LDC; right = ATL_Ssyr2k( PLVL3, next+2, nt2, ATTR, NB, UPLO, TRANS, TGEMM, M + mnk1, N, mnk2, L, ALPHA, ALPHC, Mvpt3( A, incA, size ), LDA, Mvpt3( B, incB, size ), LDB, BETA, Mvpt3( C, incC, size ), LDC ); } tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else { a_syr2k = (PT_SYR2K_ARGS_T *)malloc( sizeof( PT_SYR2K_ARGS_T ) ); ATL_assert( a_syr2k != NULL ); a_syr2k->up = UPLO; a_syr2k->tr = TRANS; a_syr2k->m = M; a_syr2k->n = N; a_syr2k->k = K; a_syr2k->l = L; a_syr2k->al = ALPHA; a_syr2k->ac = ALPHA; a_syr2k->a = A; a_syr2k->la = LDA; a_syr2k->b = B; a_syr2k->lb = LDB; a_syr2k->be = BETA; a_syr2k->c = C; a_syr2k->lc = LDC; tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, ( TGEMM == AtlasTrans ? PLVL3->syr2k0 : PLVL3->her2k0 ), (void *)(a_syr2k) ); } return( tree ); #endif /* * End of ATL_Ssyr2k */ }
PT_TREE_T ATL_Sgezero ( const PT_MISC_TYPE_T * PTYPE, const unsigned int NODE, const unsigned int THREADS, pthread_attr_t * ATTR, const int NB, const int M, const int N, void * A, const int LDA ) { /* * Purpose * ======= * * ATL_Sgezero recursively builds the binary tasks tree to be threaded * for the matrix zero operation. * * Arguments * ========= * * PTYPE (input) const PT_MISC_TYPE_T * * On entry, PTYPE points to the data structure containing the * type information. * * NODE (input) const unsigned int * On entry, NODE specifies the current node number. * * THREADS (input) const unsigned int * On entry, THREADS specifies the number of threads to be used * for the current operation. * * ATTR (input) pthread_attr_t * * On entry, ATTR specifies the thread attribute object to be * used for the node functions to be threaded. * * NB (input) const int * On entry, NB specifies the blocksize to be used for the * problem size partitioning. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ PT_TREE_T left, right, tree = NULL; PT_GEZERO_ARGS_T * a_zero = NULL; double rm, rn; DIM_1DSPLIT_T splitM, splitN; size_t size; int split, m1, m2, n1, n2; unsigned int next = (NODE << 1), ntm1, ntm2, ntn1, ntn2; /* .. * .. Executable Statements .. * */ if( THREADS < 1 ) return( tree ); size = PTYPE->size; splitM = ATL_1dsplit( THREADS, M, NB, &ntm1, &ntm2, &m1, &m2, &rm ); splitN = ATL_1dsplit( THREADS, N, NB, &ntn1, &ntn2, &n1, &n2, &rn ); if( ( splitM == Atlas1dSplit ) && ( splitN == Atlas1dSplit ) ) { split = ( rm < rn ? SPLIT_M : SPLIT_N ); } else if( splitM == Atlas1dSplit ) { split = SPLIT_M; } else if( splitN == Atlas1dSplit ) { split = SPLIT_N; } else { split = NOSPLIT; } if( split == SPLIT_N ) { left = ATL_Sgezero( PTYPE, next+1, ntn1, ATTR, NB, M, n1, A, LDA ); right = ATL_Sgezero( PTYPE, next+2, ntn2, ATTR, NB, M, n2, Mvptm( A, n1*LDA, size ), LDA ); tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else if( split == SPLIT_M ) { left = ATL_Sgezero( PTYPE, next+1, ntm1, ATTR, NB, m1, N, A, LDA ); right = ATL_Sgezero( PTYPE, next+2, ntm2, ATTR, NB, m2, N, Mvptm( A, m1, size ), LDA ); tree = ATL_init_node( NODE, left, right, NULL, NULL, NULL, NULL ); } else { a_zero = (PT_GEZERO_ARGS_T *)malloc( sizeof( PT_GEZERO_ARGS_T ) ); ATL_assert( a_zero != NULL ); a_zero->m = M; a_zero->n = N; a_zero->a = A; a_zero->la = LDA; tree = ATL_init_node( NODE, NULL, NULL, NULL, NULL, PTYPE->fun, (void *)(a_zero) ); } return( tree ); /* * End of ATL_Sgezero */ }