void Mjoin( PATL, pthescal ) ( const enum ATLAS_UPLO UPLO, const int M, const int N, const TYPE ALPHA, TYPE * A, const int LDA ) { /* * Purpose * ======= * * Mjoin( PATL, pthescal ) scales a trapezoidal Hermitian m-by-n matrix * A by the real scalar alpha. The imaginary parts of the diagonal ele- * ments of A need not be set on input, they are assumed to be zero, and * on exit they are set to zero. * * This is a multi-threaded version of the algorithm. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ pthread_attr_t attr; PT_TREE_T root = NULL; TYPE alpha[2]; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) ) return; alpha[0] = ALPHA; alpha[1] = ATL_rzero; ATL_thread_init( &attr ); root = Mjoin( PATL, pthescal_nt )( ATL_NTHREADS, &attr, UPLO, M, N, (void *)(alpha), (void *)(A), LDA ); ATL_join_tree ( root ); ATL_free_tree ( root ); ATL_thread_exit( &attr ); /* * End of Mjoin( PATL, pthescal ) */ }
void Mjoin( PATL, ptherk ) ( const enum ATLAS_UPLO UPLO, const enum ATLAS_TRANS TRANS, const int N, const int K, const TYPE ALPHA, const TYPE * A, const int LDA, const TYPE BETA, TYPE * C, const int LDC ) { /* * Purpose * ======= * * Mjoin( PATL, ptherk ) performs one of the Hermitian rank k operations * * C := alpha * A * conjg( A' ) + beta * C, * * or * * C := alpha * conjg( A' ) * A + beta * C, * * where alpha and beta are real scalars, C is an n by n Hermitian ma- * trix and A is an n by k matrix in the first case and a k by n matrix * in the second case. * * For a more detailed description of the arguments of this function, * see the reference implementation in the ATLAS/src/blas/reference di- * rectory. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ PT_TREE_T root = NULL; pthread_attr_t attr; TYPE Calph[2], Cbeta[2]; void * alpha, * beta; /* .. * .. Executable Statements .. * */ if( ( N == 0 ) || ( ( ( ALPHA == ATL_rzero ) || ( K == 0 ) ) && ( BETA == ATL_rone ) ) ) return; if( ( ( ALPHA == ATL_rzero ) ) || ( K == 0 ) ) { Mjoin( PATL, pthescal )( UPLO, N, N, BETA, C, LDC ); return; } ATL_thread_init( &attr ); *Calph = ALPHA; Calph[1] = ATL_rzero; alpha = (void *)(Calph); *Cbeta = BETA; Cbeta[1] = ATL_rzero; beta = (void *)(Cbeta); root = Mjoin( PATL, ptherk_nt )( ATL_NTHREADS, &attr, UPLO, TRANS, N, K, alpha, (void *)(A), LDA, beta, (void *)(C), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); ATL_thread_exit( &attr ); /* * End of Mjoin( PATL, ptherk ) */ }
void *ATL_dyntlaunch(void *vp) #endif { ATL_thread_t *tp = vp, *btp; ATL_LAUNCHSTRUCT_t *lp; const int iam = tp->rank, P = tp->P; int i, src, dest, nthrP2, mask, abit; void *acnt; lp = tp->vp; acnt = lp->acounts[0]; btp = tp - iam; /* * Set my affinity if I haven't already */ #ifdef ATL_PAFF_SELF if (!tp->paff_set) ATL_setmyaffinity(tp); #endif dest = ATL_DecGlobalAtomicCount(acnt, iam); while(dest) { dest = tp->P - dest; ATL_thread_start(btp+dest, dest, 0, ATL_dyntlaunch, btp+dest); dest = ATL_DecGlobalAtomicCount(acnt, iam); } /* * Do the operation */ lp->DoWork(lp, tp); /* * Do combine in minimum spanning tree, combining results as required */ for (i=0; (1<<i) < P; i++); nthrP2 = i; mask = 0; for (i=0; i < nthrP2; i++) { if (!(iam & mask)) { abit = (1<<i); if (!(iam & abit)) { src = iam ^ abit; if (src < P) { while (lp->chkin[src] != ATL_CHK_DONE_OP) ATL_POLL; if (lp->DoComb) lp->DoComb(lp->opstruct, iam, src); } } else { lp->chkin[iam] = ATL_CHK_DONE_OP; ATL_thread_exit(NULL); } } mask |= abit; } return(NULL); }
void Mjoin( PATL, ptgeadd ) ( const int M, const int N, const SCALAR ALPHA, const TYPE * A, const int LDA, const SCALAR BETA, TYPE * C, const int LDC ) { /* * Purpose * ======= * * Mjoin( PATL, ptgeadd ) adds an m-by-n matrix A to the matrix B. * * This is a multi-threaded version of the algorithm. * * Arguments * ========= * * PTYPE (input) const PT_MISC_TYPE_T * * On entry, PTYPE points to the data structure containing the * type information. * * NODE (input) const unsigned int * On entry, NODE specifies the current node number. * * THREADS (input) const unsigned int * On entry, THREADS specifies the number of threads to be used * for the current operation. * * ATTR (input) pthread_attr_t * * On entry, ATTR specifies the thread attribute object to be * used for the node functions to be threaded. * * NB (input) const int * On entry, NB specifies the blocksize to be used for the * problem size partitioning. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ pthread_attr_t attr; PT_TREE_T root = NULL; #ifdef TREAL TYPE alpha0 = (TYPE)(ALPHA), beta0 = (TYPE)(BETA); #endif void * alpha, * beta; /* .. * .. Executable Statements .. * */ if( ( M <= 0 ) || ( N <= 0 ) || ( SCALAR_IS_ZERO( ALPHA ) && SCALAR_IS_ONE( BETA ) ) ) return; #ifdef TREAL alpha = (void *)(&alpha0); beta = (void *)(&beta0); #else alpha = (void *)(ALPHA); beta = (void *)(BETA); #endif ATL_thread_init( &attr ); root = Mjoin( PATL, ptgeadd_nt )( ATL_NTHREADS, &attr, M, N, alpha, (void *)(A), LDA, beta, (void *)(C), LDC ); ATL_join_tree ( root ); ATL_free_tree ( root ); ATL_thread_exit( &attr ); /* * End of Mjoin( PATL, ptgeadd ) */ }