int checkY(int N, TYPE *Yg, int incYg, TYPE *Yc, int incYc) { int i, iret=0; TYPE rdiff, idiff, eps, maxerr; TYPE Mjoin(PATL, epsilon)(void); eps = Mjoin(PATL,epsilon)(); maxerr = 8*eps; incYg *= 2; incYc *= 2; for (i=0; i < N; i++, Yg += incYg, Yc += incYc) { rdiff = *Yg - *Yc; idiff = Yg[1] - Yc[1]; rdiff = Mabs(rdiff); idiff = Mabs(idiff); if (rdiff > maxerr) { iret = i; fprintf(stderr, "ERROR: Y[%d], real, correct=%e, computed=%e\n", i, *Yg, *Yc); } if (idiff > maxerr) { iret = i; fprintf(stderr, "ERROR: Y[%d], imag, correct=%e, computed=%e\n", i, Yg[1], Yc[1]); } } return(iret); }
void Mjoin(PATL,cplxinvert)(ATL_CINT N, TYPE *X, ATL_CINT incx, TYPE *Y, ATL_CINT incy) /* * Y(:) = 1 / X(:) * Invert N complex scalars held in X, and write answer to Y. * X & Y can be same space */ { int i; const TYPE one=1.0, none=(-1.0); ATL_CINT incX=incx+incx, incY=incy+incy; register TYPE rtmp, itmp, t0; for (i=N; i; i--, X += incX, Y += incY) { rtmp = *X; itmp = X[1]; if (Mabs(itmp) <= Mabs(rtmp)) { t0 = itmp / rtmp; *Y = rtmp = one / (rtmp + itmp*t0); Y[1] = -rtmp * t0; } else { t0 = rtmp / itmp; Y[1] = rtmp = none / (itmp + rtmp*t0); *Y = -t0 * rtmp; } } }
TYPE Mjoin(PATL,gbnrm1)(const int M, const int N, const int KL, const int KU, const TYPE *A, const int LDA) /* * Calculates the 1-norm of a general band rectangular matrix */ { int i, i0, i1, iaij, j, jaj, k, lda2 = ( LDA SHIFT ); TYPE max=ATL_rzero, t0; for( j = 0, jaj = 0; j < N; j++, jaj += lda2 ) { k = KU - j; i0 = ( j - KU > 0 ? j - KU : 0 ); i1 = ( M - 1 > j + KL ? j + KL : M - 1 ); t0 = ATL_rzero; for( i = i0, iaij = ((k+i0) SHIFT)+jaj; i <= i1; i++, iaij += (1 SHIFT) ) { #ifdef TREAL t0 += Mabs( A[iaij] ); #else t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } if (t0 > max) max = t0; } return(max); }
TYPE Mjoin(PATL,hbnrm) (const enum ATLAS_UPLO UPLO, const int N, const int K, const TYPE *A, const int LDA) { int i, i0, i1, iaij, iy, j, jaj, ky = 0, l, lda2 = (LDA SHIFT); TYPE max=ATL_rzero, t0, * work= NULL; if( N <= 0 ) return( ATL_rzero ); work = (TYPE *)malloc( N * sizeof( TYPE ) ); if( work == NULL ) {fprintf( stderr, "mem alloc failed in [sb,hb]nrm, bye ...\n" ); exit( 1 );} else { for( i = 0; i < N; i++ ) work[i] = ATL_rzero; } if( UPLO == AtlasUpper ) { for( j = 0, jaj = 0; j < N; j++, jaj += lda2 ) { t0 = ATL_rzero; l = K - j; i0 = ( j - K > 0 ? j - K : 0 ); for( i = i0, iaij = ((l+i0) SHIFT)+jaj, iy = ky; i < j; i++, iaij += (1 SHIFT), iy += 1 ) { work[iy] += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); } work[j] += Mabs( A[iaij] ) + t0; if( j >= K ) { ky += 1; } } } else { for( j = 0, jaj = 0; j < N; j++, jaj += lda2 ) { t0 = ATL_rzero; work[j] = Mabs( A[jaj] ); i1 = ( N - 1 > j + K ? j + K : N - 1 ); for( i = j+1, iaij = (1 SHIFT)+jaj; i <= i1; i++, iaij += (1 SHIFT) ) { work[i] += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); } work[j] += t0; } } max = work[0]; for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; if( work ) free( work ); return( max ); }
void Mjoin( ATLUPF77WRAP, asum ) ( F77_INTEGER * N, TYPE * X, F77_INTEGER * INCX, TYPE * ASUM ) { /* * Purpose * ======= * * ATL_F77wrap_asum computes the sum of absolute values of the entries * of an n-vector x. * * Notes * ===== * * This routine is an internal wrapper function written in C called by * the corresponding Fortran 77 user callable subroutine. It calls the * appropriate ATLAS routine performing the actual computation. * * This wrapper layer resolves the following portability issues: * * - the routines' name sheme translation imposed by the Fortran / C * compilers of your target computer, * - the translation of Fortran characters into the ATLAS correspon- * ding C enumerated type (in cooperation with the Fortan user cal- * lable subroutine), * - the translation of Fortran integers into the proper C correspon- * ding native type; * * and the following ease-of-programming issue: * * - a pointer to the the first entry of vector operands (when appli- * cable) is passed to the ATLAS computational routine even if the * corresponding input increment value is negative. This allows for * a more natural expression in C of the computation performed by * these ATLAS functions. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. * */ #ifdef TREAL *ASUM = Mjoin( PATL, asum )( *N, X, Mabs( *INCX ) ); #else *ASUM = Mjoin( Mjoin( PATLU, PRE ), asum )( *N, X, Mabs( *INCX ) ); #endif /* * End of Mjoin( ATLUPF77WRAP, asum ) */ }
TYPE Mjoin(PATL,infnrm)(const int N, const TYPE *X, const int incX) { int i; i = Mjoin(Mjoin(ATL_i,PRE),amax)(N, X, incX); #ifdef TREAL return(Mabs(X[i*incX])); #else i *= (incX<<1); return(Mabs(X[i]) + Mabs(X[i+1])); #endif }
static int CheckY(int npad, TYPE padval, int N, TYPE *Yg, int incYg, TYPE *Yt, int incYt) { int i0, i1; incYg = Mabs(incYg); incYt = Mabs(incYt); i0 = checkY(N, Yg+(npad SHIFT), incYg, Yt+(npad SHIFT), incYt); i1 = CheckPad(npad, padval, N, Yt, incYt); if (!i0 && !i1) return(0); return(1); }
static int LU1(ATL_CINT M, ATL_CINT N, ATL_CINT j, TYPE *A, ATL_CINT lda, int *ipiv) /* * Performs an LU factorization on jth column. N is the full width of * column panel, A is ptr to beginning of panel. * RETURNS: 0 on success, non-zero if no non-zero pivot exists */ { #ifdef TCPLX ATL_CINT lda2 = lda+lda; TYPE invs[2]; const TYPE none[2] = {ATL_rnone, ATL_rzero}; #else #define lda2 lda #define none ATL_rnone #endif TYPE *Ac = A + j*lda2; /* active column */ TYPE pivval=Ac[j]; ATL_INT ip; ipiv[j] = ip = j + cblas_iamax(M-j, Ac+(j SHIFT), 1); #ifdef TCPLX pivval = Mabs(Ac[ip+ip]) + Mabs(Ac[ip+ip+1]); #else pivval = Ac[ip]; #endif if (pivval != ATL_rzero) { if (ip != j) cblas_swap(N, A+(j SHIFT), lda, A+(ip SHIFT), lda); #ifdef TCPLX if (pivval >= ATL_laSAFMIN) { TYPE invs[2]; Mjoin(PATL,cplxinvert)(1, Ac+j+j, 1, invs, 1); cblas_scal(M-j-1, invs, Ac+j+j+2, 1); } else Mjoin(PATL,cplxdivide)(M-j-1, Ac+j+j, Ac+j+j+2, 1, Ac+j+j+2, 1); #else if (Mabs(pivval) >= ATL_laSAFMIN) cblas_scal(M-j-1, ATL_rone/pivval, Ac+j+1, 1); else { ATL_INT i; for (i=j+1; i < M; i++) Ac[j] /= pivval; } #endif return(0); } return(1); }
TYPE Mjoin(PATL,hpnrm) (const enum ATLAS_UPLO UPLO, const int N, const TYPE *A) { int i, iaij, j; TYPE max=ATL_rzero, t0, * work= NULL; if( N <= 0 ) return( ATL_rzero ); work = (TYPE *)malloc( N * sizeof( TYPE ) ); if( work == NULL ) {fprintf( stderr, "mem alloc failed in [sp,hp]nrm, bye ...\n" ); exit( 1 );} else { for( i = 0; i < N; i++ ) work[i] = ATL_rzero; } if( UPLO == AtlasUpper ) { for( j = 0, iaij = 0; j < N; j++ ) { work[j] = t0 = ATL_rzero; for( i = 0; i < j; i++, iaij += (1 SHIFT) ) { work[i] += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); } work[j] += Mabs( A[iaij] ) + t0; iaij += (1 SHIFT); } } else { for( j = 0, iaij = 0; j < N; j++ ) { t0 = ATL_rzero; work[j] = Mabs( A[iaij] ); iaij += (1 SHIFT); for( i = j+1; i < N; i++, iaij += (1 SHIFT) ) { work[i] += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); } work[j] += t0; } } max = work[0]; for( j = 1; j < N; j++ ) if( max < work[j] ) max = work[j]; if( work ) free( work ); return( max ); }
TYPE good_asum(const int N, const TYPE *X, const int incx) { int i; const int incX=incx+incx; register TYPE rx, ix, t0=ATL_rzero; for (i=0; i < N; i++, X += incX) { rx = Mabs(*X); ix = Mabs(X[1]); t0 += rx + ix; } return(t0); }
TYPE good_asum(const int N, const TYPE *X, const int incX) { int i; TYPE asum=ATL_rzero; for (i=0; i < N; i++, X += incX) asum += Mabs(*X); return(asum); }
int DoTest(int N, TYPE *alpha0, int incY) { int iret; const int npad=Mmax(4*Mabs(incY), 16); const TYPE padval=(-2271.0); TYPE *Yg, *Yt, *y; #ifdef TREAL TYPE alpha = *alpha0; #else TYPE *alpha = alpha0; #endif Yg = getvec(npad, padval, N, incY); Yt = dupvec(npad, N, Yg, incY); y = Yg + (npad SHIFT); if (incY < 1) y -= ((N-1)SHIFT) * incY; good_set(N, alpha, y, incY); y = Yt + (npad SHIFT); if (incY < 1) y -= ((N-1)SHIFT) * incY; TEST_SET(N, alpha, Yt+(npad SHIFT), incY); iret = CheckY(npad, padval, N, Yg, incY, Yt, incY); free(Yg); free(Yt); return(iret); }
int DoTest(int N, TYPE *alpha0, int incX, int incY) { int iret; const int npad=Mmax(4*Mabs(incY), 16); const TYPE padval=(-2271.0); TYPE *Yg, *Yt, *X, *x, *y; #ifdef TREAL TYPE alpha = *alpha0; #else TYPE *alpha = alpha0; #endif Yg = getvec(npad, padval, N, incY); Yt = dupvec(npad, N, Yg, incY); X = getvec(0, padval, N, incX); /* no padding for read-only X */ x = X; y = Yg + (npad SHIFT); if (incX < 1) x -= ((N-1)SHIFT) * incX; if (incY < 1) y -= ((N-1)SHIFT) * incY; good_axpy(N, alpha, x, incX, y, incY); y = Yt + (npad SHIFT); if (incY < 1) y -= ((N-1)SHIFT) * incY; TEST_AXPY(N, alpha, x, incX, y, incY); iret = CheckY(npad, padval, N, Yg, incY, Yt, incY); free(X); free(Yg); free(Yt); return(iret); }
int DoTest(int N, int incX) { int iret=0; const TYPE padval=(-2271.0); TYPE *X, *x, eps, diff, maxdiff; TYPE ansG, ansT; TYPE Mjoin(PATL, epsilon)(void); eps = Mjoin(PATL,epsilon)(); maxdiff = (2 SHIFT)*N*eps; x = X = getvec(0, padval, N, incX); if (incX < 0) x -= ((N-1)SHIFT) * incX; ansG = good_asum(N, x, incX); ansT = TEST_ASUM(N, x, incX); maxdiff *= 0.5 * ansG; diff = ansG - ansT; diff = Mabs(diff); if (diff > maxdiff) { fprintf(stderr, " asum ERROR: N=%d, correct=%e, computed=%e, diff=%e!!\n", N, ansG, ansT, diff); iret = 1; } free(X); return(iret); }
static TYPE *getvec(int npad, TYPE padval, int N, int incX) { TYPE *X, *x; int i, n; if (N <= 0) return(NULL); incX = Mabs(incX); n = 2*npad + 1+(N-1)*incX; X = malloc( ATL_sizeof*n ); assert(X); vecset(n, padval, X); #ifdef TCPLX npad *= 2; incX *= 2; #endif x = X + npad; for (i=0; i < N; i++, x += incX) { #ifdef TREAL *x = dumb_rand(); #else *x = dumb_rand(); x[1] = dumb_rand(); #endif } return(X); }
void Mjoin( ATLPUF77WRAP, scal ) ( F77_INTEGER * N, TYPE * ALPHA, TYPE * X, F77_INTEGER * INCX ) { /* * Purpose * ======= * * ATL_F77wrap_rscal scales an n-vector x by a real scalar alpha. * * Notes * ===== * * This routine is an internal wrapper function written in C called by * the corresponding Fortran 77 user callable subroutine. It calls the * appropriate ATLAS routine performing the actual computation. * * This wrapper layer resolves the following portability issues: * * - the routines' name sheme translation imposed by the Fortran / C * compilers of your target computer, * - the translation of Fortran characters into the ATLAS correspon- * ding C enumerated type (in cooperation with the Fortan user cal- * lable subroutine), * - the translation of Fortran integers into the proper C correspon- * ding native type; * * and the following ease-of-programming issue: * * - a pointer to the the first entry of vector operands (when appli- * cable) is passed to the ATLAS computational routine even if the * corresponding input increment value is negative. This allows for * a more natural expression in C of the computation performed by * these ATLAS functions. * * --------------------------------------------------------------------- */ /* * .. Local Arrays .. */ TYPE Calpha[2]; /* .. * .. Executable Statements .. * */ Calpha[0] = *ALPHA; Calpha[1] = ATL_rzero; Mjoin( PATL, scal )( *N, Calpha, X, Mabs( *INCX ) ); /* * End of Mjoin( ATLPUF77WRAP, scal ) */ }
TYPE ATL_lapy2(TYPE X, TYPE Y) { TYPE ONE=1.0, ZERO=0.0, W, Z, XABS, YABS, TEMP; /* Find absolute values */ XABS = Mabs(X); YABS = Mabs(Y); W = (XABS<YABS)?YABS:XABS; Z = (XABS<YABS)?XABS:YABS; if (Z == ZERO) return(W); /* NOTE: If Z != 0, then W != 0 */ TEMP = Z/W; TEMP = ONE + TEMP*TEMP; #if defined(SREAL) || defined(SCPLX) return(W * sqrtf(TEMP)); /* Use single precision sqrt. */ #else return(W * sqrt(TEMP)); /* Use double precision sqrt. */ #endif } /* END ATL_?lapy2 */
void Mjoin(PATL,cplxdivide) (ATL_CINT N, TYPE *beta, TYPE *X, ATL_CINT incx, TYPE *Y, ATL_CINT incy) /* * Y(:) = X(:)/beta, wt division done with safe complex arithmetic. * It is OK for Y & X to be the same pointer * This code is straight adaptation of LAPACK's DLADIV, which comes from * the algorithm developed by Robert L. Smith (Art of Comp Prog, Vol.2 p.195) */ { ATL_CINT incY=incy+incy, incX = incx+incx; ATL_INT i; const register TYPE rb = beta[0], ib = beta[1]; register TYPE rx, ix, e, f; if (Mabs(ib) < Mabs(rb)) { e = ib / rb; f = rb + ib*e; for (i=N; i; i--, X += incX, Y += incY) { rx = *X; ix = X[1]; Y[0] = (rx + ix*e) / f; Y[1] = (ix - rx*e) / f; } } else { e = rb / ib; f = ib + rb*e; for (i=N; i; i--, X += incX, Y += incY) { rx = *X; ix = X[1]; Y[0] = (ix + rx*e) / f; Y[1] = (ix*e - rx) / f; } } }
int CheckPad(int npad, TYPE padval, int N, TYPE *Y, int incY) { int i, n, iret=0; incY = Mabs(incY); npad *= 2; for (i=0; i < npad; i++) { if (Y[i] != padval) { iret = i; fprintf(stderr, "OVERWRITE %f IN PREPAD %d before beginning of Y!!\n", Y[i], npad-i); } } Y += npad; if (incY != 1) { for (i=0; i < N*incY; i++) { if (i%incY) { if (Y[2*i] != padval) { iret = i; fprintf(stderr, "INTERNAL REAL OVERWRITE %f AT POSITION %d!!\n", Y[2*i], i); } if (Y[2*i+1] != padval) { iret = i+1; fprintf(stderr, "INTERNAL IMAG OVERWRITE %f AT POSITION %d!!\n", Y[2*i+1], i); } } } } Y += 2 + 2*(N-1)*incY; for (i=0; i < npad; i++) { if (Y[i] != padval) { iret = i; fprintf(stderr, "OVERWRITE %f IN POSTPAD %d past end of Y!!\n", Y[i], i+1); } } return(iret); }
static TYPE *dupvec(int npad, int N, TYPE *X, int incX) { int i, n; TYPE *y; incX = Mabs(incX); n = 1+(N-1)*incX + 2*npad; y = malloc(ATL_sizeof*n); assert(y); #ifdef TCPLX n *= 2; #endif for (i=0; i < n; i++) y[i] = X[i]; return(y); }
void Mjoin( PATL, Mjoin( UPR, f77scal ) ) ( const int N, const TYPE ALPHA, TYPE * X, const int INCX ) { const F77_INTEGER F77N = N, F77incx = Mabs(INCX); TYPE alpha = ALPHA; if( INCX < 0 ) X -= ( ( 1 - N ) * INCX ) SHIFT; F77rscal( &F77N, &alpha, X, &F77incx ); }
void ATL_ladiv(const TYPE *X, const TYPE *Y, TYPE *Z) { TYPE E, F; /* If X[0], X[1], Y[0], Y[1], &Z[0], &Z[1] is mapped to */ /* real numbers A, B, C, D, *P, *Q */ /* the computation is as below */ /* */ /* if ( fabs(D) < fabs( C) ) { */ /* E = D / C ; */ /* F = C + D*E ; */ /* *P = ( A+B*E ) / F ; */ /* *Q = ( B-A*E ) / F ; */ /* */ /* } else{ */ /* E = C / D ; */ /* F = D + C*E ; */ /* *P = ( B+A*E ) / F ; */ /* *Q = ( -A+B*E ) / F ; */ /* } */ if ( Mabs(Y[1]) < Mabs(Y[0]) ) { E = Y[1]/Y[0]; F = Y[0] + Y[1]*E; *(Z) = ( X[0] + X[1]*E ) / F ; *(Z+1) = ( X[1] - X[0]*E ) / F ; } else { E = Y[0]/Y[1]; F = Y[1] + Y[0]*E; *(Z) = (X[1]+ X[0]*E ) / F ; *(Z+1) = (-X[0] + X[1]*E ) / F ; } } /* END AL_ladiv */
TYPE Mjoin( PATLU, Mjoin( PRE, f77nrm2 ) ) #endif ( const int N, const TYPE * X, const int INCX ) { TYPE nrm2; const F77_INTEGER F77N=N, F77incx = Mabs(INCX); if( INCX < 0 ) X -= ( ( 1 - N ) * INCX ) SHIFT; F77nrm2( &F77N, X, &F77incx, &nrm2 ); return( nrm2 ); }
TYPE Mjoin( PATLU, Mjoin( PRE, f77asum ) ) #endif ( const int N, const TYPE * X, const int INCX ) { TYPE asum; const F77_INTEGER F77N = N, F77incx = Mabs(INCX); if( INCX < 0 ) X -= ( ( 1 - N ) * INCX ) SHIFT; asum=F77asum( &F77N, X, &F77incx ); return( asum ); }
int checkY(int N, TYPE *Yg, int incYg, TYPE *Yc, int incYc) { int i, iret=0; TYPE eps, diff; TYPE Mjoin(PATL, epsilon)(void); eps = Mjoin(PATL,epsilon)(); for (i=0; i < N; i++, Yg += incYg, Yc += incYc) { diff = *Yg - *Yc; diff = Mabs(diff); if (diff > 3*eps) { iret = i; fprintf(stderr, "ERROR: Y[%d], correct=%f, computed=%f\n", i, *Yg, *Yc); } } return(iret); }
TYPE Mjoin(PATL,tpnrm1)(const enum ATLAS_UPLO UPLO, const enum ATLAS_DIAG DIAG, const int N, const TYPE *A) /* * Calculates the 1-norm of a triangular packed matrix */ { int i, iaij, j; TYPE max=0.0, t0; if( UPLO == AtlasUpper ) { for( j = 0, iaij= 0; j < N; j++ ) { t0 = ATL_rzero; for( i = 0; i < j; i++, iaij += (1 SHIFT) ) { #ifdef TREAL t0 += Mabs( A[iaij] ); #else t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } if( DIAG == AtlasNonUnit ) t0 += ATL_rone; if (t0 > max) max = t0; iaij += (1 SHIFT); } } else { for( j = N-1, iaij = ((((N-1)*(N+2)) >> 1) SHIFT); j >= 0; j-- ) { t0 = ATL_rzero; if( DIAG == AtlasNonUnit ) t0 += ATL_rone; iaij += (1 SHIFT); for( i = j+1; i < N; i++, iaij += (1 SHIFT) ) { #ifdef TREAL t0 += Mabs( A[iaij] ); #else t0 += Mabs( A[iaij] ) + Mabs( A[iaij+1] ); #endif } if (t0 > max) max = t0; iaij -= ( ( N - j ) << (1 SHIFT) ) + (1 SHIFT); } } return( max ); }
void Mjoin( PATL, f77scal ) ( const int N, const SCALAR ALPHA, TYPE * X, const int INCX ) { const F77_INTEGER F77N = N, F77incx = Mabs(INCX); #ifdef TCPLX TYPE alpha[2]; *alpha = *ALPHA; alpha[1] = ALPHA[1]; #else TYPE alpha = ALPHA; #endif if( INCX < 0 ) X -= ( ( 1 - N ) * INCX ) SHIFT; F77scal( &F77N, SADD alpha, X, &F77incx ); }
TYPE Mjoin(PATL,gediffnrm1) (const int M, const int N, const TYPE *A, const int lda, const TYPE *B, const int ldb) /* * Calculates the 1-norm of (A-B) */ { const int lda2 = lda SHIFT, ldb2 = ldb SHIFT; const int M2 = M SHIFT; int i, j; TYPE max=0.0, t0; for (j=0; j < N; j++) { t0 = ATL_rzero; for (i=0; i != M2; i++) t0 += Mabs(A[i] - B[i]); if (t0 > max) max = t0; A += lda2; B += ldb2; } return(max); }
int Mjoin( PATL, f77amin ) ( const int N, const TYPE * X, const int INCX ) { const F77_INTEGER F77N = N, F77incx = Mabs(INCX); int imin = 0; int i ; if( N > 0 ) { if( INCX < 0 ) X -= ( ( 1 - N ) * INCX ) SHIFT; /* for(i=0; i<N; i++) */ /* printf("%f\n",X[i]); */ imin = F77amin( &F77N, X, &F77incx ); /* printf("%d\n",imin); */ } return( imin ); }
int mmtst(void) { char fnam[80]; #if defined(LDA) && LDA != 0 const int lda=LDA; #else const int lda=2*LDA2; #endif #if defined(LDB) && LDB != 0 const int ldb=LDB; #else const int ldb=2*LDB2; #endif #if defined(LDC) && LDC != 0 const int ldc=LDC; #else const int ldc=2*LDC2; #endif int nA, nB; #ifdef TCPLX int inca, incb, incc; const TYPE one=1.0, none=(-1.0); #if (ALPHA == 1) TYPE alpha[2] = {1.0, 0.0}; #elif (ALPHA == -1) TYPE alpha[2] = {-1.0, 0.0}; #else TYPE alpha[2] = {2.3, 0.0}; #endif #if (BETA == 1) TYPE beta[2] = {1.0, 0.0}; #elif (BETA == -1) TYPE beta[2] = {-1.0, 0.0}; #elif (BETA == 0) TYPE beta[2] = {0.0, 0.0}; #else TYPE beta[2] = {1.3, 0.0}; #endif #else #ifdef ALPHA TYPE alpha=ALPHA; #else TYPE alpha=1.0; #endif #ifdef BETA TYPE beta=BETA; #else TYPE beta=1.0; #endif #endif const TYPE rone=1.0, rnone=(-1.0); void *va=NULL, *vb=NULL, *vc=NULL; TYPE *C0, *C1, *A, *B; TYPE diff, tmp; int i, j, k, n, nerr; int M=MB, N=NB, K=KB; TYPE ErrBound; if (!M) M = MB0; if (!N) N = NB0; if (!K) K = KB0; #ifdef TREAL ErrBound = 2.0 * (Mabs(alpha) * 2.0*K*EPS + Mabs(beta) * EPS) + EPS; #else diff = Mabs(*alpha) + Mabs(alpha[1]); tmp = Mabs(*beta) + Mabs(beta[1]); ErrBound = 2.0 * (diff*8.0*K*EPS + tmp*EPS) + EPS; #endif #ifdef NoTransA nA = K; #else nA = M; #endif #ifdef NoTransB nB = N; #else nB = K; #endif #ifdef TCPLX inca = lda*nA; incb = ldb*nB; #endif #ifdef ATL_MinMMAlign va = malloc(ATL_MinMMAlign + lda*nA*ATL_sizeof); vb = malloc(ATL_MinMMAlign + ldb*nB*ATL_sizeof); vc = C0 = malloc(2*ldc*N*ATL_sizeof); assert(va && vb && C0); A = (TYPE *) ( ( ((size_t) va)/ATL_MinMMAlign ) * ATL_MinMMAlign + ATL_MinMMAlign ); B = (TYPE *) ( ( ((size_t) vb)/ATL_MinMMAlign ) * ATL_MinMMAlign + ATL_MinMMAlign ); #else C0 = vc = malloc( (2*ldc*N + lda*nA + ldb*nB) * ATL_sizeof); assert(vc); A = C1 + (ldc * N SHIFT); B = A + (lda * nA SHIFT); #endif C1 = C0 + (ldc * N SHIFT); for (n=lda*nA SHIFT, i=0; i < n; i++) A[i] = dumb_rand(); for (n=ldb*nB SHIFT, i=0; i < n; i++) B[i] = dumb_rand(); for (n=ldc*N SHIFT, i=0; i < n; i++) C0[i] = C1[i] = dumb_rand(); tst_mm(M, N, K, alpha, A, lda, B, ldb, beta, C0, ldc); NBmm(M, N, K, alpha, A, lda, B, ldb, beta, C1, ldc); nerr = 0; for (j=0; j < N; j++) { for (i=0; i < M SHIFT; i++) { k = i + j*(ldc SHIFT); diff = C0[k] - C1[k]; if (diff < 0.0) diff = -diff; if (diff > ErrBound) { fprintf(stderr, "C(%d,%d) : expected=%f, got=%f\n", i, j, C0[k], C1[k]); nerr++; } } } free(vc); if (va) free(va); if (vb) free(vb); return(nerr); }