C++ (Cpp) magma_cblas_cdotuの例

プログラミング言語: C++ (Cpp)

メソッド/関数: magma_cblas_cdotu

hotexamples.comのコード掲載数: 2

C++ (Cpp) magma_cblas_cdotu - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のmagma_cblas_cdotuの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: clatrsd.cpp プロジェクト: maxhutch/magma

magma_int_t magma_clatrsd(
    magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_bool_t normin,
    magma_int_t n, const magmaFloatComplex *A, magma_int_t lda,
    magmaFloatComplex lambda,
    magmaFloatComplex *x,
    float *scale, float *cnorm,
    magma_int_t *info)
{
#define A(i,j) (A + (i) + (j)*lda)

    /* constants */
    const magma_int_t ione = 1;
    const float d_half = 0.5;
    const magmaFloatComplex c_zero = MAGMA_C_ZERO;
    const magmaFloatComplex c_one  = MAGMA_C_ONE;

    /* System generated locals */
    magma_int_t len;
    magmaFloatComplex ztmp;

    /* Local variables */
    magma_int_t i, j;
    float xj, rec, tjj;
    magma_int_t jinc;
    float xbnd;
    magma_int_t imax;
    float tmax;
    magmaFloatComplex tjjs;
    float xmax, grow;

    float tscal;
    magmaFloatComplex uscal;
    magma_int_t jlast;
    magmaFloatComplex csumj;

    float bignum;
    magma_int_t jfirst;
    float smlnum;

    /* Function Body */
    *info = 0;
    magma_int_t upper  = (uplo  == MagmaUpper);
    magma_int_t notran = (trans == MagmaNoTrans);
    magma_int_t nounit = (diag  == MagmaNonUnit);

    /* Test the input parameters. */
    if ( ! upper && uplo != MagmaLower ) {
        *info = -1;
    }
    else if (! notran &&
             trans != MagmaTrans &&
             trans != MagmaConjTrans) {
        *info = -2;
    }
    else if ( ! nounit && diag != MagmaUnit ) {
        *info = -3;
    }
    else if ( ! (normin == MagmaTrue) &&
              ! (normin == MagmaFalse) ) {
        *info = -4;
    }
    else if ( n < 0 ) {
        *info = -5;
    }
    else if ( lda < max(1,n) ) {
        *info = -7;
    }
    if ( *info != 0 ) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return if possible */
    if ( n == 0 ) {
        return *info;
    }

    /* Determine machine dependent parameters to control overflow. */
    smlnum = lapackf77_slamch( "Safe minimum" );
    bignum = 1. / smlnum;
    lapackf77_slabad( &smlnum, &bignum );
    smlnum /= lapackf77_slamch( "Precision" );
    bignum = 1. / smlnum;
    *scale = 1.;

    if ( normin == MagmaFalse ) {
        /* Compute the 1-norm of each column, not including the diagonal. */
        if ( upper ) {
            /* A is upper triangular. */
            cnorm[0] = 0.;
            for( j = 1; j < n; ++j ) {
                cnorm[j] = magma_cblas_scasum( j, A(0,j), ione );
            }
        }
        else {
            /* A is lower triangular. */
            for( j = 0; j < n-1; ++j ) {
                cnorm[j] = magma_cblas_scasum( n-(j+1), A(j+1,j), ione );
            }
            cnorm[n-1] = 0.;
        }
    }

    /* Scale the column norms by TSCAL if the maximum element in CNORM is */
    /* greater than BIGNUM/2. */
    imax = blasf77_isamax( &n, &cnorm[0], &ione ) - 1;
    tmax = cnorm[imax];
    if ( tmax <= bignum * 0.5 ) {
        tscal = 1.;
    }
    else {
        tscal = 0.5 / (smlnum * tmax);
        blasf77_sscal( &n, &tscal, &cnorm[0], &ione );
    }

    /* ================================================================= */
    /* Compute a bound on the computed solution vector to see if the */
    /* Level 2 BLAS routine CTRSV can be used. */
    xmax = 0.;
    for( j = 0; j < n; ++j ) {
        xmax = max( xmax, 0.5*MAGMA_C_ABS1( x[j] ));
    }
    xbnd = xmax;

    if ( notran ) {
        /* ---------------------------------------- */
        /* Compute the growth in A * x = b. */
        if ( upper ) {
            jfirst = n-1;
            jlast  = 0;
            jinc   = -1;
        }
        else {
            jfirst = 0;
            jlast  = n;
            jinc   = 1;
        }

        if ( tscal != 1. ) {
            grow = 0.;
            goto L60;
        }

        /* A is non-unit triangular. */
        /* Compute GROW = 1/G(j) and XBND = 1/M(j). */
        /* Initially, G(0) = max{x(i), i=1,...,n}. */
        grow = 0.5 / max( xbnd, smlnum );
        xbnd = grow;
        for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
            /* Exit the loop if the growth factor is too small. */
            if ( grow <= smlnum ) {
                goto L60;
            }

            if ( nounit ) {
                tjjs = *A(j,j) - lambda;
            }
            else {
                tjjs = c_one - lambda;
            }
            tjj = MAGMA_C_ABS1( tjjs );

            if ( tjj >= smlnum ) {
                /* M(j) = G(j-1) / abs(A(j,j)) */
                xbnd = min( xbnd, min(1.,tjj)*grow );
            }
            else {
                /* M(j) could overflow, set XBND to 0. */
                xbnd = 0.;
            }

            if ( tjj + cnorm[j] >= smlnum ) {
                /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
                grow *= (tjj / (tjj + cnorm[j]));
            }
            else {
                /* G(j) could overflow, set GROW to 0. */
                grow = 0.;
            }
        }
        grow = xbnd;
L60:
        ;
    }
    else {
        /* ---------------------------------------- */
        /* Compute the growth in A**T * x = b  or  A**H * x = b. */
        if ( upper ) {
            jfirst = 0;
            jlast  = n;
            jinc   = 1;
        }
        else {
            jfirst = n-1;
            jlast  = 0;
            jinc   = -1;
        }

        if ( tscal != 1. ) {
            grow = 0.;
            goto L90;
        }

        /* A is non-unit triangular. */
        /* Compute GROW = 1/G(j) and XBND = 1/M(j). */
        /* Initially, M(0) = max{x(i), i=1,...,n}. */
        grow = 0.5 / max( xbnd, smlnum );
        xbnd = grow;
        for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
            /* Exit the loop if the growth factor is too small. */
            if ( grow <= smlnum ) {
                goto L90;
            }

            /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
            xj = 1. + cnorm[j];
            grow = min( grow, xbnd / xj );

            if ( nounit ) {
                tjjs = *A(j,j) - lambda;
            }
            else {
                tjjs = c_one - lambda;
            }
            tjj = MAGMA_C_ABS1( tjjs );

            if ( tjj >= smlnum ) {
                /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
                if ( xj > tjj ) {
                    xbnd *= (tjj / xj);
                }
            }
            else {
                /* M(j) could overflow, set XBND to 0. */
                xbnd = 0.;
            }
        }
        grow = min( grow, xbnd );
L90:
        ;
    }
        
    /* ================================================================= */
    /* Due to modified diagonal, we can't use regular BLAS ctrsv. */
    
    /* Use a Level 1 BLAS solve, scaling intermediate results. */
    if ( xmax > bignum * 0.5 ) {
        /* Scale X so that its components are less than or equal to */
        /* BIGNUM in absolute value. */
        *scale = (bignum * 0.5) / xmax;
        blasf77_csscal( &n, scale, &x[0], &ione );
        xmax = bignum;
    }
    else {
        xmax *= 2.;
    }

    if ( notran ) {
        /* ---------------------------------------- */
        /* Solve A * x = b */
        for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
            /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
            xj = MAGMA_C_ABS1( x[j] );
            if ( nounit ) {
                tjjs = (*A(j,j) - lambda ) * tscal;
            }
            else {
                tjjs = (c_one - lambda) * tscal;
                if ( tscal == 1. ) {
                    goto L110;
                }
            }
            tjj = MAGMA_C_ABS1( tjjs );
            if ( tjj > smlnum ) {
                /* abs(A(j,j)) > SMLNUM: */
                if ( tjj < 1. ) {
                    if ( xj > tjj * bignum ) {
                        /* Scale x by 1/b(j). */
                        rec = 1. / xj;
                        blasf77_csscal( &n, &rec, &x[0], &ione );
                        *scale *= rec;
                        xmax *= rec;
                    }
                }
                x[j] = x[j] / tjjs;
                xj   = MAGMA_C_ABS1( x[j] );
            }
            else if ( tjj > 0. ) {
                /* 0 < abs(A(j,j)) <= SMLNUM: */
                if ( xj > tjj * bignum ) {
                    /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
                    /* to avoid overflow when dividing by A(j,j). */
                    rec = (tjj * bignum) / xj;
                    if ( cnorm[j] > 1. ) {
                        /* Scale by 1/CNORM(j) to avoid overflow when */
                        /* multiplying x(j) times column j. */
                        rec /= cnorm[j];
                    }
                    blasf77_csscal( &n, &rec, &x[0], &ione );
                    *scale *= rec;
                    xmax *= rec;
                }
                x[j] = x[j] / tjjs;
                xj   = MAGMA_C_ABS1( x[j] );
            }
            else {
                /* A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
                /* scale = 0, and compute a solution to A*x = 0. */
                for( i = 0; i < n; ++i ) {
                    x[i] = c_zero;
                }
                x[j]   = c_one;
                xj     = 1.;
                *scale = 0.;
                xmax   = 0.;
            }
L110:

            /* Scale x if necessary to avoid overflow when adding a */
            /* multiple of column j of A. */
            if ( xj > 1. ) {
                rec = 1. / xj;
                if ( cnorm[j] > (bignum - xmax) * rec ) {
                    /* Scale x by 1/(2*abs(x(j))). */
                    rec *= 0.5;
                    blasf77_csscal( &n, &rec, &x[0], &ione );
                    *scale *= rec;
                }
            }
            else if ( xj * cnorm[j] > bignum - xmax ) {
                /* Scale x by 1/2. */
                blasf77_csscal( &n, &d_half, &x[0], &ione );
                *scale *= 0.5;
            }

            if ( upper ) {
                if ( j > 0 ) {
                    /* Compute the update */
                    /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */
                    len = j;
                    ztmp = -tscal * x[j];
                    blasf77_caxpy( &len, &ztmp, A(0,j), &ione, &x[0], &ione );
                    i = blasf77_icamax( &len, &x[0], &ione ) - 1;
                    xmax = MAGMA_C_ABS1( x[i] );
                }
            }
            else {
                if ( j < n-1 ) {
                    /* Compute the update */
                    /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */
                    len = n - (j+1);
                    ztmp = -tscal * x[j];
                    blasf77_caxpy( &len, &ztmp, A(j+1,j), &ione, &x[j + 1], &ione );
                    i = j + blasf77_icamax( &len, &x[j + 1], &ione );
                    xmax = MAGMA_C_ABS1( x[i] );
                }
            }
        }
    }
    else if ( trans == MagmaTrans ) {
        /* ---------------------------------------- */
        /* Solve A**T * x = b */
        for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
            /* Compute x(j) = b(j) - sum A(k,j)*x(k). */
            /*                       k<>j             */
            xj = MAGMA_C_ABS1( x[j] );
            uscal = MAGMA_C_MAKE( tscal, 0. );
            rec = 1. / max( xmax, 1. );
            if ( cnorm[j] > (bignum - xj) * rec ) {
                /* If x(j) could overflow, scale x by 1/(2*XMAX). */
                rec *= 0.5;
                if ( nounit ) {
                    tjjs = (*A(j,j) - lambda) * tscal;
                }
                else {
                    tjjs = (c_one - lambda) * tscal;
                }
                tjj = MAGMA_C_ABS1( tjjs );
                if ( tjj > 1. ) {
                    /* Divide by A(j,j) when scaling x if A(j,j) > 1. */
                    rec = min( 1., rec * tjj );
                    uscal = uscal / tjjs;
                }
                if ( rec < 1. ) {
                    blasf77_csscal( &n, &rec, &x[0], &ione );
                    *scale *= rec;
                    xmax *= rec;
                }
            }

            csumj = c_zero;
            if ( uscal == c_one ) {
                /* If the scaling needed for A in the dot product is 1, */
                /* call CDOTU to perform the dot product. */
                if ( upper ) {
                    csumj = magma_cblas_cdotu( j, A(0,j), ione, &x[0], ione );
                }
                else if ( j < n-1 ) {
                    csumj = magma_cblas_cdotu( n-(j+1), A(j+1,j), ione, &x[j+1], ione );
                }
            }
            else {
                /* Otherwise, use in-line code for the dot product. */
                if ( upper ) {
                    for( i = 0; i < j; ++i ) {
                        csumj += (*A(i,j) * uscal) * x[i];
                    }
                }
                else if ( j < n-1 ) {
                    for( i = j+1; i < n; ++i ) {
                        csumj += (*A(i,j) * uscal) * x[i];
                    }
                }
            }

            if ( uscal == MAGMA_C_MAKE( tscal, 0. )) {
                /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */
                /* was not used to scale the dotproduct. */
                x[j] -= csumj;
                xj = MAGMA_C_ABS1( x[j] );
                if ( nounit ) {
                    tjjs = (*A(j,j) - lambda) * tscal;
                }
                else {
                    tjjs = (c_one - lambda) * tscal;
                    if ( tscal == 1. ) {
                        goto L160;
                    }
                }

                /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */
                tjj = MAGMA_C_ABS1( tjjs );
                if ( tjj > smlnum ) {
                    /* abs(A(j,j)) > SMLNUM: */
                    if ( tjj < 1. ) {
                        if ( xj > tjj * bignum ) {
                            /* Scale X by 1/abs(x(j)). */
                            rec = 1. / xj;
                            blasf77_csscal( &n, &rec, &x[0], &ione );
                            *scale *= rec;
                            xmax   *= rec;
                        }
                    }
                    x[j] = x[j] / tjjs;
                }
                else if ( tjj > 0. ) {
                    /* 0 < abs(A(j,j)) <= SMLNUM: */
                    if ( xj > tjj * bignum ) {
                        /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
                        rec = (tjj * bignum) / xj;
                        blasf77_csscal( &n, &rec, &x[0], &ione );
                        *scale *= rec;
                        xmax   *= rec;
                    }
                    x[j] = x[j] / tjjs;
                }
                else {
                    /* A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
                    /* scale = 0 and compute a solution to A**T *x = 0. */
                    for( i = 0; i < n; ++i ) {
                        x[i] = c_zero;
                    }
                    x[j]   = c_one;
                    *scale = 0.;
                    xmax   = 0.;
                }
L160:
                ;
            }
            else {
                /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */
                /* product has already been divided by 1/A(j,j). */
                x[j] = (x[j] / tjjs) - csumj;
            }
            xmax = max( xmax, MAGMA_C_ABS1( x[j] ));
        }
    }
    else {
        /* ---------------------------------------- */
        /* Solve A**H * x = b */
        for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
            /* Compute x(j) = b(j) - sum A(k,j)*x(k). */
            /*                       k<>j             */
            xj = MAGMA_C_ABS1( x[j] );
            uscal = MAGMA_C_MAKE( tscal, 0. );
            rec = 1. / max(xmax, 1.);
            if ( cnorm[j] > (bignum - xj) * rec ) {
                /* If x(j) could overflow, scale x by 1/(2*XMAX). */
                rec *= 0.5;
                if ( nounit ) {
                    tjjs = MAGMA_C_CONJ( *A(j,j) - lambda ) * tscal;
                }
                else {
                    tjjs = (c_one - lambda) * tscal;
                }
                tjj = MAGMA_C_ABS1( tjjs );
                if ( tjj > 1. ) {
                    /* Divide by A(j,j) when scaling x if A(j,j) > 1. */
                    rec = min( 1., rec * tjj );
                    uscal = uscal / tjjs;
                }
                if ( rec < 1. ) {
                    blasf77_csscal( &n, &rec, &x[0], &ione );
                    *scale *= rec;
                    xmax   *= rec;
                }
            }

            csumj = c_zero;
            if ( uscal == c_one ) {
                /* If the scaling needed for A in the dot product is 1, */
                /* call CDOTC to perform the dot product. */
                if ( upper ) {
                    csumj = magma_cblas_cdotc( j, A(0,j), ione, &x[0], ione );
                }
                else if ( j < n-1 ) {
                    csumj = magma_cblas_cdotc( n-(j+1), A(j+1,j), ione, &x[j+1], ione );
                }
            }
            else {
                /* Otherwise, use in-line code for the dot product. */
                if ( upper ) {
                    for( i = 0; i < j; ++i ) {
                        csumj += (MAGMA_C_CONJ( *A(i,j) ) * uscal) * x[i];
                    }
                }
                else if ( j < n-1 ) {
                    for( i = j + 1; i < n; ++i ) {
                        csumj += (MAGMA_C_CONJ( *A(i,j) ) * uscal) * x[i];
                    }
                }
            }

            if ( uscal == tscal ) {
                /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */
                /* was not used to scale the dotproduct. */
                x[j] -= csumj;
                xj = MAGMA_C_ABS1( x[j] );
                if ( nounit ) {
                    tjjs = MAGMA_C_CONJ( *A(j,j) - lambda ) * tscal;
                }
                else {
                    tjjs = (c_one - lambda) * tscal;
                    if ( tscal == 1. ) {
                        goto L210;
                    }
                }

                /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */
                tjj = MAGMA_C_ABS1( tjjs );
                if ( tjj > smlnum ) {
                    /* abs(A(j,j)) > SMLNUM: */
                    if ( tjj < 1. ) {
                        if ( xj > tjj * bignum ) {
                            /* Scale X by 1/abs(x(j)). */
                            rec = 1. / xj;
                            blasf77_csscal( &n, &rec, &x[0], &ione );
                            *scale *= rec;
                            xmax   *= rec;
                        }
                    }
                    x[j] = x[j] / tjjs;
                }
                else if ( tjj > 0. ) {
                    /* 0 < abs(A(j,j)) <= SMLNUM: */
                    if ( xj > tjj * bignum ) {
                        /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
                        rec = (tjj * bignum) / xj;
                        blasf77_csscal( &n, &rec, &x[0], &ione );
                        *scale *= rec;
                        xmax   *= rec;
                    }
                    x[j] = x[j] / tjjs;
                }
                else {
                    /* A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
                    /* scale = 0 and compute a solution to A**H *x = 0. */
                    for( i = 0; i < n; ++i ) {
                        x[i] = c_zero;
                    }
                    x[j] = c_one;
                    *scale = 0.;
                    xmax   = 0.;
                }
L210:
                ;
            }
            else {
                /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */
                /* product has already been divided by 1/A(j,j). */
                x[j] = (x[j] / tjjs) - csumj;
            }
            xmax = max( xmax, MAGMA_C_ABS1( x[j] ));
        }
    }
    *scale /= tscal;
    
    /* Scale the column norms by 1/TSCAL for return. */
    if ( tscal != 1. ) {
        float d = 1. / tscal;
        blasf77_sscal( &n, &d, &cnorm[0], &ione );
    }

    return *info;
} /* end clatrsd */

コード例 #2

ファイルを表示

ファイル: testing_cblas_c.cpp プロジェクト: cjy7117/DVFS-MAGMA

// ----------------------------------------
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    //real_Double_t   t_m, t_c, t_f;
    magma_int_t ione = 1;
    
    magmaFloatComplex  *A, *B;
    float diff, error;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t m, n, k, size, maxn, ld;
    magmaFloatComplex x2_m, x2_c;  // complex x for magma, cblas/fortran blas respectively
    float x_m, x_c;  // x for magma, cblas/fortran blas respectively
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    opts.tolerance = max( 100., opts.tolerance );
    float tol = opts.tolerance * lapackf77_slamch("E");
    gTol = tol;
    
    printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n"
            "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA.  !!\n"
            "\n"
            "Diff  compares MAGMA wrapper        to CBLAS and BLAS function; should be exactly 0.\n"
            "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n"
            "\n" );
    
    float total_diff  = 0.;
    float total_error = 0.;
    int inc[] = { 1 };  //{ -2, -1, 1, 2 };  //{ 1 };  //{ -1, 1 };
    int ninc = sizeof(inc)/sizeof(*inc);
    
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        m = opts.msize[itest];
        n = opts.nsize[itest];
        k = opts.ksize[itest];
        
    for( int iincx = 0; iincx < ninc; ++iincx ) {
        magma_int_t incx = inc[iincx];
        
    for( int iincy = 0; iincy < ninc; ++iincy ) {
        magma_int_t incy = inc[iincy];
        
        printf("=========================================================================\n");
        printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n",
                (int) m, (int) n, (int) k, (int) incx, (int) incy );
        printf( "Function              MAGMA     CBLAS     BLAS        Diff      Error\n"
                "                      msec      msec      msec\n" );
        
        // allocate matrices
        // over-allocate so they can be any combination of
        // {m,n,k} * {abs(incx), abs(incy)} by
        // {m,n,k} * {abs(incx), abs(incy)}
        maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) );
        ld = max( 1, maxn );
        size = ld*maxn;
        magma_cmalloc_pinned( &A,  size );  assert( A   != NULL );
        magma_cmalloc_pinned( &B,  size );  assert( B   != NULL );
        
        // initialize matrices
        lapackf77_clarnv( &ione, ISEED, &size, A );
        lapackf77_clarnv( &ione, ISEED, &size, B );
        
        printf( "Level 1 BLAS ----------------------------------------------------------\n" );
        
        
        // ----- test SCASUM
        // get one-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_scasum( m, A(0,j), incx );
                
                x_c = cblas_scasum( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_scasum( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "scasum", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test SCNRM2
        // get two-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_scnrm2( m, A(0,j), incx );
                
                x_c = cblas_scnrm2( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_scnrm2( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "scnrm2", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test CDOTC
        // dot columns, Aj^H Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_cdotc( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_cdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_cdotc( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_cdotc( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "cdotc", diff, error );
        total_diff  += diff;
        total_error += error;
        total_error += error;
        
        // ----- test CDOTU
        // dot columns, Aj^T * Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_cdotu( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_cdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_cdotu( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_cdotu( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "cdotu", diff, error );
        total_diff  += diff;
        total_error += error;
        
        // tell user about disabled functions
        #if defined( MAGMA_WITH_MKL )
            printf( "cblas_cdotc and cblas_cdotu disabled with MKL (segfaults)\n" );
        #endif
        
        #if defined( __APPLE__ )
            printf( "blasf77_cdotc and blasf77_cdotu disabled on MacOS (segfaults)\n" );
        #endif
            
        // cleanup
        magma_free_pinned( A );
        magma_free_pinned( B );
        fflush( stdout );
    }}}  // itest, incx, incy
    
    // TODO use average error?
    printf( "sum diffs  = %8.2g, MAGMA wrapper        compared to CBLAS and Fortran BLAS; should be exactly 0.\n"
            "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n",
            total_diff, total_error );
    if ( total_diff != 0. ) {
        printf( "some tests failed diff == 0.; see above.\n" );
    }
    else {
        printf( "all tests passed diff == 0.\n" );
    }
    
    TESTING_FINALIZE();
    
    int status = (total_diff != 0.);
    return status;
}