Ejemplo n.º 1
0
void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
{
  if(n == 1)
  {
    incx = 1;
    incy = 1;
  }

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
  {
    int i_n = (int)n;
    int i_incx = (int)incx;
    int i_incy = (int)incy;

#if defined(TH_REAL_IS_DOUBLE)
    cblas_dswap(i_n, x, i_incx, y, i_incy);
#else
    cblas_sswap(i_n, x, i_incx, y, i_incy);
#endif
    return;
  }
#endif
  {
    long i;
    for(i = 0; i < n; i++)
    {
      real z = x[i*incx];
      x[i*incx] = y[i*incy];
      y[i*incy] = z;
    }
  }
}
Ejemplo n.º 2
0
void THBlas_swap(long size, real *x, long xStride, real *y, long yStride)
{
  if(size == 1)
  {
    xStride = 1;
    yStride = 1;
  }

#if USE_CBLAS
  if( (size < INT_MAX) && (xStride < INT_MAX) && (yStride < INT_MAX) )
  {
#ifdef USE_DOUBLE
    cblas_dswap(size, x, xStride, y, yStride);
#else
    cblas_sswap(size, x, xStride, y, yStride);
#endif
    return;
  }
#endif
  {
    long i;
    for(i = 0; i < size; i++)
    {
      real z = x[i*xStride];
      x[i*xStride] = y[i*yStride];
      y[i*yStride] = z;
    }
  }
}
Ejemplo n.º 3
0
int main(int argc, char const *argv[])
{
    cblas_dswap(N, X, incx, Y, incy);
    printResult();

    ContextDestroy();

    return 0;
}
Ejemplo n.º 4
0
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_dswap
(JNIEnv *env, jclass clazz, jint N,
 jobject X, jint offsetX, jint incX,
 jobject Y, jint offsetY, jint incY) {

    double *cX = (double *) (*env)->GetDirectBufferAddress(env, X);
    double *cY = (double *) (*env)->GetDirectBufferAddress(env, Y);
    cblas_dswap(N, cX + offsetX, incX, cY + offsetY, incY);

};
Ejemplo n.º 5
0
void bli_dswap( int n, double* x, int incx, double* y, int incy )
{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
	cblas_dswap( n,
	             x, incx, 
	             y, incy );
#else
	F77_dswap( &n,
	           x, &incx, 
	           y, &incy );
#endif
}
Ejemplo n.º 6
0
int CORE_dlaswp_ontile(PLASMA_desc descA, int i1, int i2, const int *ipiv, int inc)
{
    int i, j, ip, it;
    double *A1;
    int lda1, lda2;

    /* Change i1 to C notation */
    i1--;

    /* Check parameters */
    if ( descA.nt > 1 ) {
        coreblas_error(1, "Illegal value of descA.nt");
        return -1;
    }
    if ( i1 < 0 ) {
        coreblas_error(2, "Illegal value of i1");
        return -2;
    }
    if ( (i2 <= i1) || (i2 > descA.m) ) {
        coreblas_error(3, "Illegal value of i2");
        return -3;
    }
    if ( ! ( (i2 - i1 - i1%descA.mb -1) < descA.mb ) ) {
        coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
        return -3;
    }

    if (inc > 0) {
        it = i1 / descA.mb;
        A1 = A(it, 0);
        lda1 = BLKLDD(descA, 0);

        for (j = i1; j < i2; ++j, ipiv+=inc) {
            ip = (*ipiv) - descA.i - 1;
            if ( ip != j )
            {
                it = ip / descA.mb;
                i  = ip % descA.mb;
                lda2 = BLKLDD(descA, it);
                cblas_dswap(descA.n, A1       + j, lda1,
                            A(it, 0) + i, lda2 );
            }
        }
    }
    else
    {
        it = (i2-1) / descA.mb;
        A1 = A(it, 0);
        lda1 = BLKLDD(descA, it);

        i1--;
        ipiv = &ipiv[(1-i2)*inc];
        for (j = i2-1; j > i1; --j, ipiv+=inc) {
            ip = (*ipiv) - descA.i - 1;
            if ( ip != j )
            {
                it = ip / descA.mb;
                i  = ip % descA.mb;
                lda2 = BLKLDD(descA, it);
                cblas_dswap(descA.n, A1       + j, lda1,
                            A(it, 0) + i, lda2 );
            }
        }
    }

    return PLASMA_SUCCESS;
}
Ejemplo n.º 7
0
static void
CORE_dgetrf_rectil_rec(const PLASMA_desc A, int *IPIV, int *info,
                       double *pivot,
                       int thidx,  int thcnt,
                       int column, int width,
                       int ft,     int lt)
{
    int ld, jp, n1, n2, lm, tmpM, piv_sf;
    int ip, j, it, i, ldft;
    int max_i, max_it, thwin;
    double zone  = 1.0;
    double mzone = -1.0;
    double tmp1;
    double tmp2 = 0.;
    double pivval;
    double *Atop, *Atop2, *U, *L;
    double             abstmp1;
    int offset = A.i;

    ldft = BLKLDD(A, 0);
    Atop = A(0, 0) + column * ldft;

    if ( width > 1 ) {
        /* Assumption: N = min( M, N ); */
        n1 = width / 2;
        n2 = width - n1;

        Atop2 = Atop + n1 * ldft;

        CORE_dgetrf_rectil_rec( A, IPIV, info, pivot,
                                thidx, thcnt, column, n1, ft, lt );

        if ( *info != 0 )
            return;

        if (thidx == 0)
        {
            /* Swap to the right */
            int *lipiv = IPIV+column;
            int idxMax = column+n1;
            for (j = column; j < idxMax; ++j, ++lipiv) {
                ip = (*lipiv) - offset - 1;
                if ( ip != j )
                {
                    it = ip / A.mb;
                    i  = ip % A.mb;
                    ld = BLKLDD(A, it);
                    cblas_dswap(n2, Atop2                     + j, ldft,
                                    A(it, 0) + (column+n1)*ld + i, ld   );
                }
            }
            /* Trsm on the upper part */
            U = Atop2 + column;
            cblas_dtrsm( CblasColMajor, CblasLeft, CblasLower,
                         CblasNoTrans, CblasUnit,
                         n1, n2, (zone),
                         Atop  + column, ldft,
                         U,              ldft );

            /* SIgnal to other threads that they can start update */
            CORE_dbarrier_thread( thidx, thcnt );
            pivval = *pivot;
            if ( pivval == 0.0 ) {
                *info = column+n1;
                return;
            } else {
                if ( fabs(pivval) >= sfmin ) {
                    piv_sf = 1;
                    pivval = 1.0 / pivval;
                } else {
                    piv_sf = 0;
                }
            }

            /* First tile */
            {
                L = Atop + column + n1;
                tmpM = min(ldft, A.m) - column - n1;

                /* Scale last column of L */
                if ( piv_sf ) {
                    cblas_dscal( tmpM, (pivval), L+(n1-1)*ldft, 1 );
                } else {
                    int i;
                    Atop2 = L+(n1-1)*ldft;
                    for( i=0; i < tmpM; i++, Atop2++)
                        *Atop2 = *Atop2 / pivval;
                }

                /* Apply the GEMM */
                cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                             tmpM, n2, n1,
                             (mzone), L,      ldft,
                                                 U,      ldft,
                             (zone),  U + n1, ldft );

                /* Search Max in first column of U+n1 */
                tmp2    = U[n1];
                max_it  = ft;
                max_i   = cblas_idamax( tmpM, U+n1, 1 ) + n1;
                tmp1    = U[max_i];
                abstmp1 = fabs(tmp1);
                max_i   += column;
            }
        }
        else
        {
            pivval = *pivot;
            if ( pivval == 0.0 ) {
                *info = column+n1;
                return;
            } else {
                if ( fabs(pivval) >= sfmin ) {
                    piv_sf = 1;
                    pivval = 1.0 / pivval;
                } else {
                    piv_sf = 0;
                }
            }

            ld = BLKLDD( A, ft );
            L  = A( ft, 0 ) + column * ld;
            lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb;

            U = Atop2 + column;

            /* First tile */
            /* Scale last column of L */
            if ( piv_sf ) {
                cblas_dscal( lm, (pivval), L+(n1-1)*ld, 1 );
            } else {
                int i;
                Atop2 = L+(n1-1)*ld;
                for( i=0; i < lm; i++, Atop2++)
                    *Atop2 = *Atop2 / pivval;
            }

            /* Wait for pivoting and triangular solve to be finished
             * before to really start the update */
            CORE_dbarrier_thread( thidx, thcnt );

            /* Apply the GEMM */
            cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                         lm, n2, n1,
                         (mzone), L,         ld,
                                             U,         ldft,
                         (zone),  L + n1*ld, ld );

            /* Search Max in first column of L+n1*ld */
            max_it  = ft;
            max_i   = cblas_idamax( lm, L+n1*ld, 1 );
            tmp1    = L[n1*ld+max_i];
            abstmp1 = fabs(tmp1);
        }

        /* Update the other blocks */
        for( it = ft+1; it < lt; it++)
        {
            ld = BLKLDD( A, it );
            L  = A( it, 0 ) + column * ld;
            lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;

            /* Scale last column of L */
            if ( piv_sf ) {
                cblas_dscal( lm, (pivval), L+(n1-1)*ld, 1 );
            } else {
                int i;
                Atop2 = L+(n1-1)*ld;
                for( i=0; i < lm; i++, Atop2++)
                    *Atop2 = *Atop2 / pivval;
            }

            /* Apply the GEMM */
            cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                         lm, n2, n1,
                         (mzone), L,          ld,
                                             U,          ldft,
                         (zone),  L + n1*ld,  ld );

            /* Search the max on the first column of L+n1*ld */
            jp = cblas_idamax( lm, L+n1*ld, 1 );
            if ( fabs( L[n1*ld+jp] ) > abstmp1 ) {
                tmp1 = L[n1*ld+jp];
                abstmp1 = fabs(tmp1);
                max_i  = jp;
                max_it = it;
            }
        }

        jp = offset + max_it*A.mb + max_i;
        CORE_damax1_thread( tmp1, thidx, thcnt, &thwin,
                            &tmp2, pivot, jp + 1, IPIV + column + n1 );

        if ( thidx == 0 ) {
            U[n1] = *pivot; /* all threads have the pivot element: no need for synchronization */
        }
        if (thwin == thidx) { /* the thread that owns the best pivot */
            if ( jp-offset != column+n1 ) /* if there is a need to exchange the pivot */
            {
                ld = BLKLDD(A, max_it);
                Atop2 = A( max_it, 0 ) + (column + n1 )* ld + max_i;
                *Atop2 = tmp2;
            }
        }

        CORE_dgetrf_rectil_rec( A, IPIV, info, pivot,
                                thidx, thcnt, column+n1, n2, ft, lt );
        if ( *info != 0 )
            return;

        if ( thidx == 0 )
        {
            /* Swap to the left */
            int *lipiv = IPIV+column+n1;
            int idxMax = column+width;
            for (j = column+n1; j < idxMax; ++j, ++lipiv) {
                ip = (*lipiv) - offset - 1;
                if ( ip != j )
                {
                    it = ip / A.mb;
                    i  = ip % A.mb;
                    ld = BLKLDD(A, it);
                    cblas_dswap(n1, Atop + j,                 ldft,
                                    A(it, 0) + column*ld + i, ld  );
                }
            }
        }

    } else if ( width == 1 ) {

        /* Search maximum for column 0 */
        if ( column == 0 )
        {
            if ( thidx == 0 )
              tmp2 = Atop[column];

            /* First tmp1 */
            ld = BLKLDD(A, ft);
            Atop2   = A( ft, 0 );
            lm      = ft == A.mt-1 ? A.m - ft * A.mb : A.mb;
            max_it  = ft;
            max_i   = cblas_idamax( lm, Atop2, 1 );
            tmp1    = Atop2[max_i];
            abstmp1 = fabs(tmp1);

            /* Update */
            for( it = ft+1; it < lt; it++)
            {
                Atop2= A( it, 0 );
                lm   = it == A.mt-1 ? A.m - it * A.mb : A.mb;
                jp   = cblas_idamax( lm, Atop2, 1 );
                if (  fabs(Atop2[jp]) > abstmp1 ) {
                    tmp1 = Atop2[jp];
                    abstmp1 = fabs(tmp1);
                    max_i  = jp;
                    max_it = it;
                }
            }

            jp = offset + max_it*A.mb + max_i;
            CORE_damax1_thread( tmp1, thidx, thcnt, &thwin,
                                &tmp2, pivot, jp + 1, IPIV + column );

            if ( thidx == 0 ) {
                Atop[0] = *pivot; /* all threads have the pivot element: no need for synchronization */
            }
            if (thwin == thidx) { /* the thread that owns the best pivot */
                if ( jp-offset != 0 ) /* if there is a need to exchange the pivot */
                {
                    Atop2 = A( max_it, 0 ) + max_i;
                    *Atop2 = tmp2;
                }
            }
        }

        CORE_dbarrier_thread( thidx, thcnt );

        /* If it is the last column, we just scale */
        if ( column == (min(A.m, A.n))-1 ) {

            pivval = *pivot;
            if ( pivval != 0.0 ) {
                if ( thidx == 0 ) {
                    if ( fabs(pivval) >= sfmin ) {
                        pivval = 1.0 / pivval;

                        /*
                         * We guess than we never enter the function with m == A.mt-1
                         * because it means that there is only one thread
                         */
                        lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb;
                        cblas_dscal( lm - column - 1, (pivval), Atop+column+1, 1 );

                        for( it = ft+1; it < lt; it++)
                        {
                            ld = BLKLDD(A, it);
                            Atop2 = A( it, 0 ) + column * ld;
                            lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;
                            cblas_dscal( lm, (pivval), Atop2, 1 );
                        }
                    } else {
                        /*
                         * We guess than we never enter the function with m == A.mt-1
                         * because it means that there is only one thread
                         */
                        int i;
                        Atop2 = Atop + column + 1;
                        lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb;

                        for( i=0; i < lm-column-1; i++, Atop2++)
                            *Atop2 = *Atop2 / pivval;

                        for( it = ft+1; it < lt; it++)
                            {
                                ld = BLKLDD(A, it);
                                Atop2 = A( it, 0 ) + column * ld;
                                lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;

                                for( i=0; i < lm; i++, Atop2++)
                                    *Atop2 = *Atop2 / pivval;
                            }
                    }
                }
                else
                {
                    if ( fabs(pivval) >= sfmin ) {
                        pivval = 1.0 / pivval;

                        for( it = ft; it < lt; it++)
                        {
                            ld = BLKLDD(A, it);
                            Atop2 = A( it, 0 ) + column * ld;
                            lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;
                            cblas_dscal( lm, (pivval), Atop2, 1 );
                        }
                    } else {
                        /*
                         * We guess than we never enter the function with m == A.mt-1
                         * because it means that there is only one thread
                         */
                        int i;
                        for( it = ft; it < lt; it++)
                        {
                            ld = BLKLDD(A, it);
                            Atop2 = A( it, 0 ) + column * ld;
                            lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;

                            for( i=0; i < lm; i++, Atop2++)
                                *Atop2 = *Atop2 / pivval;
                        }
                    }
                }
            }
            else {
                *info = column + 1;
                return;
            }
        }
    }
}
Ejemplo n.º 8
0
static inline void
CORE_dgetrf_rectil_update(const PLASMA_desc A, int *IPIV,
                          int column, int n1,     int n2,
                          int thidx,  int thcnt,
                          int ft,     int lt)
{
    int ld, lm, tmpM;
    int ip, j, it, i, ldft;
    double zone  = 1.0;
    double mzone = -1.0;
    double *Atop, *Atop2, *U, *L;
    int offset = A.i;

    ldft = BLKLDD(A, 0);
    Atop  = A(0, 0) + column * ldft;
    Atop2 = Atop    + n1     * ldft;

    if (thidx == 0)
    {
        /* Swap to the right */
        int *lipiv = IPIV+column;
        int idxMax = column+n1;
        for (j = column; j < idxMax; ++j, ++lipiv) {
            ip = (*lipiv) - offset - 1;
            if ( ip != j )
            {
                it = ip / A.mb;
                i  = ip % A.mb;
                ld = BLKLDD(A, it);
                cblas_dswap(n2, Atop2                     + j, ldft,
                            A(it, 0) + (column+n1)*ld + i, ld   );
            }
        }

        /* Trsm on the upper part */
        U = Atop2 + column;
        cblas_dtrsm( CblasColMajor, CblasLeft, CblasLower,
                     CblasNoTrans, CblasUnit,
                     n1, n2, (zone),
                     Atop  + column, ldft,
                     U,              ldft );

        /* Signal to other threads that they can start update */
        CORE_dbarrier_thread( thidx, thcnt );

        /* First tile */
        L = Atop + column + n1;
        tmpM = min(ldft, A.m) - column - n1;

        /* Apply the GEMM */
        cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                     tmpM, n2, n1,
                     (mzone), L,      ldft,
                     U,      ldft,
                     (zone),  U + n1, ldft );

    }
    else
    {
        ld = BLKLDD( A, ft );
        L  = A( ft, 0 ) + column * ld;
        lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb;

        U = Atop2 + column;

        /* Wait for pivoting and triangular solve to be finished
         * before to really start the update */
        CORE_dbarrier_thread( thidx, thcnt );

        /* First tile */
        /* Apply the GEMM */
        cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                     lm, n2, n1,
                     (mzone), L,         ld,
                                         U,         ldft,
                     (zone),  L + n1*ld, ld );
    }

    /* Update the other blocks */
    for( it = ft+1; it < lt; it++)
    {
        ld = BLKLDD( A, it );
        L  = A( it, 0 ) + column * ld;
        lm = it == A.mt-1 ? A.m - it * A.mb : A.mb;

        /* Apply the GEMM */
        cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                     lm, n2, n1,
                     (mzone), L,          ld,
                                         U,          ldft,
                     (zone),  L + n1*ld,  ld );
    }
}
Ejemplo n.º 9
0
void
test_swap (void) {
const double flteps = 1e-4, dbleps = 1e-6;
  {
   int N = 1;
   float X[] = { 0.539f };
   int incX = 1;
   float Y[] = { -0.262f };
   int incY = -1;
   float expected1[] = { -0.262f };
   float expected2[] = { 0.539f };
   cblas_sswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], flteps, "sswap(case 88)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], flteps, "sswap(case 89)");
     }
   };
  };


  {
   int N = 1;
   double X[] = { 0.906 };
   int incX = 1;
   double Y[] = { 0.373 };
   int incY = -1;
   double expected1[] = { 0.373 };
   double expected2[] = { 0.906 };
   cblas_dswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], dbleps, "dswap(case 90)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], dbleps, "dswap(case 91)");
     }
   };
  };


  {
   int N = 1;
   float X[] = { -0.316f, -0.529f };
   int incX = 1;
   float Y[] = { -0.313f, 0.363f };
   int incY = -1;
   float expected1[] = { -0.313f, 0.363f };
   float expected2[] = { -0.316f, -0.529f };
   cblas_cswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], flteps, "cswap(case 92) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], flteps, "cswap(case 92) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], flteps, "cswap(case 93) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], flteps, "cswap(case 93) imag");
     };
   };
  };


  {
   int N = 1;
   double X[] = { 0.512, -0.89 };
   int incX = 1;
   double Y[] = { -0.225, -0.511 };
   int incY = -1;
   double expected1[] = { -0.225, -0.511 };
   double expected2[] = { 0.512, -0.89 };
   cblas_zswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], dbleps, "zswap(case 94) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], dbleps, "zswap(case 94) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], dbleps, "zswap(case 95) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], dbleps, "zswap(case 95) imag");
     };
   };
  };


  {
   int N = 1;
   float X[] = { 0.336f };
   int incX = -1;
   float Y[] = { -0.431f };
   int incY = 1;
   float expected1[] = { -0.431f };
   float expected2[] = { 0.336f };
   cblas_sswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], flteps, "sswap(case 96)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], flteps, "sswap(case 97)");
     }
   };
  };


  {
   int N = 1;
   double X[] = { 0.764 };
   int incX = -1;
   double Y[] = { -0.293 };
   int incY = 1;
   double expected1[] = { -0.293 };
   double expected2[] = { 0.764 };
   cblas_dswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], dbleps, "dswap(case 98)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], dbleps, "dswap(case 99)");
     }
   };
  };


  {
   int N = 1;
   float X[] = { -0.239f, 0.361f };
   int incX = -1;
   float Y[] = { 0.149f, 0.347f };
   int incY = 1;
   float expected1[] = { 0.149f, 0.347f };
   float expected2[] = { -0.239f, 0.361f };
   cblas_cswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], flteps, "cswap(case 100) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], flteps, "cswap(case 100) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], flteps, "cswap(case 101) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], flteps, "cswap(case 101) imag");
     };
   };
  };


  {
   int N = 1;
   double X[] = { -0.171, -0.936 };
   int incX = -1;
   double Y[] = { 0.495, -0.835 };
   int incY = 1;
   double expected1[] = { 0.495, -0.835 };
   double expected2[] = { -0.171, -0.936 };
   cblas_zswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], dbleps, "zswap(case 102) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], dbleps, "zswap(case 102) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], dbleps, "zswap(case 103) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], dbleps, "zswap(case 103) imag");
     };
   };
  };


  {
   int N = 1;
   float X[] = { -0.405f };
   int incX = -1;
   float Y[] = { -0.213f };
   int incY = -1;
   float expected1[] = { -0.213f };
   float expected2[] = { -0.405f };
   cblas_sswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], flteps, "sswap(case 104)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], flteps, "sswap(case 105)");
     }
   };
  };


  {
   int N = 1;
   double X[] = { -0.761 };
   int incX = -1;
   double Y[] = { -0.585 };
   int incY = -1;
   double expected1[] = { -0.585 };
   double expected2[] = { -0.761 };
   cblas_dswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[i], expected1[i], dbleps, "dswap(case 106)");
     }
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[i], expected2[i], dbleps, "dswap(case 107)");
     }
   };
  };


  {
   int N = 1;
   float X[] = { 0.853f, 0.146f };
   int incX = -1;
   float Y[] = { 0.009f, -0.178f };
   int incY = -1;
   float expected1[] = { 0.009f, -0.178f };
   float expected2[] = { 0.853f, 0.146f };
   cblas_cswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], flteps, "cswap(case 108) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], flteps, "cswap(case 108) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], flteps, "cswap(case 109) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], flteps, "cswap(case 109) imag");
     };
   };
  };


  {
   int N = 1;
   double X[] = { -0.228, 0.386 };
   int incX = -1;
   double Y[] = { 0.988, -0.084 };
   int incY = -1;
   double expected1[] = { 0.988, -0.084 };
   double expected2[] = { -0.228, 0.386 };
   cblas_zswap(N, X, incX, Y, incY);
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(X[2*i], expected1[2*i], dbleps, "zswap(case 110) real");
       gsl_test_rel(X[2*i+1], expected1[2*i+1], dbleps, "zswap(case 110) imag");
     };
   };
   {
     int i;
     for (i = 0; i < 1; i++) {
       gsl_test_rel(Y[2*i], expected2[2*i], dbleps, "zswap(case 111) real");
       gsl_test_rel(Y[2*i+1], expected2[2*i+1], dbleps, "zswap(case 111) imag");
     };
   };
  };


}
Ejemplo n.º 10
0
int CORE_dtstrf(int M, int N, int IB, int NB,
                double *U, int LDU,
                double *A, int LDA,
                double *L, int LDL,
                int *IPIV,
                double *WORK, int LDWORK,
                int *INFO)
{
    static double zzero = 0.0;
    static double mzone =-1.0;

    double alpha;
    int i, j, ii, sb;
    int im, ip;

    /* Check input arguments */
    *INFO = 0;
    if (M < 0) {
        coreblas_error(1, "Illegal value of M");
        return -1;
    }
    if (N < 0) {
        coreblas_error(2, "Illegal value of N");
        return -2;
    }
    if (IB < 0) {
        coreblas_error(3, "Illegal value of IB");
        return -3;
    }
    if ((LDU < max(1,NB)) && (NB > 0)) {
        coreblas_error(6, "Illegal value of LDU");
        return -6;
    }
    if ((LDA < max(1,M)) && (M > 0)) {
        coreblas_error(8, "Illegal value of LDA");
        return -8;
    }
    if ((LDL < max(1,IB)) && (IB > 0)) {
        coreblas_error(10, "Illegal value of LDL");
        return -10;
    }

    /* Quick return */
    if ((M == 0) || (N == 0) || (IB == 0))
        return PLASMA_SUCCESS;

    /* Set L to 0 */
    memset(L, 0, LDL*N*sizeof(double));

    ip = 0;
    for (ii = 0; ii < N; ii += IB) {
        sb = min(N-ii, IB);

        for (i = 0; i < sb; i++) {
            im = cblas_idamax(M, &A[LDA*(ii+i)], 1);
            IPIV[ip] = ii+i+1;

            if (fabs(A[LDA*(ii+i)+im]) > fabs(U[LDU*(ii+i)+ii+i])) {
                /*
                 * Swap behind.
                 */
                cblas_dswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK );
                /*
                 * Swap ahead.
                 */
                cblas_dswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA );
                /*
                 * Set IPIV.
                 */
                IPIV[ip] = NB + im + 1;

                for (j = 0; j < i; j++) {
                    A[LDA*(ii+j)+im] = zzero;
                }
            }

            if ((*INFO == 0) && (fabs(A[LDA*(ii+i)+im]) == zzero)
                && (fabs(U[LDU*(ii+i)+ii+i]) == zzero)) {
                *INFO = ii+i+1;
            }

            alpha = ((double)1. / U[LDU*(ii+i)+ii+i]);
            cblas_dscal(M, (alpha), &A[LDA*(ii+i)], 1);
            cblas_dcopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1);
            cblas_dger(
                CblasColMajor, M, sb-i-1,
                (mzone), &A[LDA*(ii+i)], 1,
                &U[LDU*(ii+i+1)+ii+i], LDU,
                &A[LDA*(ii+i+1)], LDA );
            ip = ip+1;
        }
        /*
         * Apply the subpanel to the rest of the panel.
         */
        if(ii+i < N) {
            for(j = ii; j < ii+sb; j++) {
                if (IPIV[j] <= NB) {
                    IPIV[j] = IPIV[j] - ii;
                }
            }

            CORE_dssssm(
                NB, N-(ii+sb), M, N-(ii+sb), sb, sb,
                &U[LDU*(ii+sb)+ii], LDU,
                &A[LDA*(ii+sb)], LDA,
                &L[LDL*ii], LDL,
                WORK, LDWORK, &IPIV[ii]);

            for(j = ii; j < ii+sb; j++) {
                if (IPIV[j] <= NB) {
                    IPIV[j] = IPIV[j] + ii;
                }
            }
        }
    }
    return PLASMA_SUCCESS;
}
Ejemplo n.º 11
0
__cminpack_attr__
void __cminpack_func__(lmpar)(int n, real *r, int ldr, 
	const int *ipvt, const real *diag, const real *qtb, real delta, 
	real *par, real *x, real *sdiag, real *wa1, 
	real *wa2)
{
    /* Initialized data */

#define p1 .1
#define p001 .001

    /* System generated locals */
    real d1, d2;

    /* Local variables */
    int j, l;
    real fp;
    real parc, parl;
    int iter;
    real temp, paru, dwarf;
    int nsing;
    real gnorm;
    real dxnorm;

/*     ********** */

/*     subroutine lmpar */

/*     given an m by n matrix a, an n by n nonsingular diagonal */
/*     matrix d, an m-vector b, and a positive number delta, */
/*     the problem is to determine a value for the parameter */
/*     par such that if x solves the system */

/*           a*x = b ,     sqrt(par)*d*x = 0 , */

/*     in the least squares sense, and dxnorm is the euclidean */
/*     norm of d*x, then either par is zero and */

/*           (dxnorm-delta) .le. 0.1*delta , */

/*     or par is positive and */

/*           abs(dxnorm-delta) .le. 0.1*delta . */

/*     this subroutine completes the solution of the problem */
/*     if it is provided with the necessary information from the */
/*     qr factorization, with column pivoting, of a. that is, if */
/*     a*p = q*r, where p is a permutation matrix, q has orthogonal */
/*     columns, and r is an upper triangular matrix with diagonal */
/*     elements of nonincreasing magnitude, then lmpar expects */
/*     the full upper triangle of r, the permutation matrix p, */
/*     and the first n components of (q transpose)*b. on output */
/*     lmpar also provides an upper triangular matrix s such that */

/*            t   t                   t */
/*           p *(a *a + par*d*d)*p = s *s . */

/*     s is employed within lmpar and may be of separate interest. */

/*     only a few iterations are generally needed for convergence */
/*     of the algorithm. if, however, the limit of 10 iterations */
/*     is reached, then the output par will contain the best */
/*     value obtained so far. */

/*     the subroutine statement is */

/*       subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, */
/*                        wa1,wa2) */

/*     where */

/*       n is a positive integer input variable set to the order of r. */

/*       r is an n by n array. on input the full upper triangle */
/*         must contain the full upper triangle of the matrix r. */
/*         on output the full upper triangle is unaltered, and the */
/*         strict lower triangle contains the strict upper triangle */
/*         (transposed) of the upper triangular matrix s. */

/*       ldr is a positive integer input variable not less than n */
/*         which specifies the leading dimension of the array r. */

/*       ipvt is an integer input array of length n which defines the */
/*         permutation matrix p such that a*p = q*r. column j of p */
/*         is column ipvt(j) of the identity matrix. */

/*       diag is an input array of length n which must contain the */
/*         diagonal elements of the matrix d. */

/*       qtb is an input array of length n which must contain the first */
/*         n elements of the vector (q transpose)*b. */

/*       delta is a positive input variable which specifies an upper */
/*         bound on the euclidean norm of d*x. */

/*       par is a nonnegative variable. on input par contains an */
/*         initial estimate of the levenberg-marquardt parameter. */
/*         on output par contains the final estimate. */

/*       x is an output array of length n which contains the least */
/*         squares solution of the system a*x = b, sqrt(par)*d*x = 0, */
/*         for the output par. */

/*       sdiag is an output array of length n which contains the */
/*         diagonal elements of the upper triangular matrix s. */

/*       wa1 and wa2 are work arrays of length n. */

/*     subprograms called */

/*       minpack-supplied ... dpmpar,enorm,qrsolv */

/*       fortran-supplied ... dabs,dmax1,dmin1,dsqrt */

/*     argonne national laboratory. minpack project. march 1980. */
/*     burton s. garbow, kenneth e. hillstrom, jorge j. more */

/*     ********** */

/*     dwarf is the smallest positive magnitude. */

    dwarf = __cminpack_func__(dpmpar)(2);

/*     compute and store in x the gauss-newton direction. if the */
/*     jacobian is rank-deficient, obtain a least squares solution. */

    nsing = n;
    for (j = 0; j < n; ++j) {
	wa1[j] = qtb[j];
	if (r[j + j * ldr] == 0. && nsing == n) {
	    nsing = j;
	}
	if (nsing < n) {
	    wa1[j] = 0.;
	}
    }
# ifdef USE_CBLAS
    cblas_dtrsv(CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit, nsing, r, ldr, wa1, 1);
# else
    if (nsing >= 1) {
        int k;
        for (k = 1; k <= nsing; ++k) {
            j = nsing - k;
            wa1[j] /= r[j + j * ldr];
            temp = wa1[j];
            if (j >= 1) {
                int i;
                for (i = 0; i < j; ++i) {
                    wa1[i] -= r[i + j * ldr] * temp;
                }
            }
        }
    }
# endif
    for (j = 0; j < n; ++j) {
	l = ipvt[j]-1;
	x[l] = wa1[j];
    }

/*     initialize the iteration counter. */
/*     evaluate the function at the origin, and test */
/*     for acceptance of the gauss-newton direction. */

    iter = 0;
    for (j = 0; j < n; ++j) {
	wa2[j] = diag[j] * x[j];
    }
    dxnorm = __cminpack_enorm__(n, wa2);
    fp = dxnorm - delta;
    if (fp <= p1 * delta) {
	goto TERMINATE;
    }

/*     if the jacobian is not rank deficient, the newton */
/*     step provides a lower bound, parl, for the zero of */
/*     the function. otherwise set this bound to zero. */

    parl = 0.;
    if (nsing >= n) {
        for (j = 0; j < n; ++j) {
            l = ipvt[j]-1;
            wa1[j] = diag[l] * (wa2[l] / dxnorm);
        }
#     ifdef USE_CBLAS
        cblas_dtrsv(CblasColMajor, CblasUpper, CblasTrans, CblasNonUnit, n, r, ldr, wa1, 1);
#     else
        for (j = 0; j < n; ++j) {
            real sum = 0.;
            if (j >= 1) {
                int i;
                for (i = 0; i < j; ++i) {
                    sum += r[i + j * ldr] * wa1[i];
                }
            }
            wa1[j] = (wa1[j] - sum) / r[j + j * ldr];
        }
#     endif
        temp = __cminpack_enorm__(n, wa1);
        parl = fp / delta / temp / temp;
    }

/*     calculate an upper bound, paru, for the zero of the function. */

    for (j = 0; j < n; ++j) {
        real sum;
#     ifdef USE_CBLAS
        sum = cblas_ddot(j+1, &r[j*ldr], 1, qtb, 1);
#     else
	sum = 0.;
        int i;
	for (i = 0; i <= j; ++i) {
	    sum += r[i + j * ldr] * qtb[i];
	}
#     endif
	l = ipvt[j]-1;
	wa1[j] = sum / diag[l];
    }
    gnorm = __cminpack_enorm__(n, wa1);
    paru = gnorm / delta;
    if (paru == 0.) {
	paru = dwarf / min(delta,(real)p1) /* / p001 ??? */;
    }

/*     if the input par lies outside of the interval (parl,paru), */
/*     set par to the closer endpoint. */

    *par = max(*par,parl);
    *par = min(*par,paru);
    if (*par == 0.) {
	*par = gnorm / dxnorm;
    }

/*     beginning of an iteration. */

    for (;;) {
        ++iter;

/*        evaluate the function at the current value of par. */

        if (*par == 0.) {
            /* Computing MAX */
            d1 = dwarf, d2 = p001 * paru;
            *par = max(d1,d2);
        }
        temp = sqrt(*par);
        for (j = 0; j < n; ++j) {
            wa1[j] = temp * diag[j];
        }
        __cminpack_func__(qrsolv)(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
        for (j = 0; j < n; ++j) {
            wa2[j] = diag[j] * x[j];
        }
        dxnorm = __cminpack_enorm__(n, wa2);
        temp = fp;
        fp = dxnorm - delta;

/*        if the function is small enough, accept the current value */
/*        of par. also test for the exceptional cases where parl */
/*        is zero or the number of iterations has reached 10. */

        if (fabs(fp) <= p1 * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) {
            goto TERMINATE;
        }

/*        compute the newton correction. */

#     ifdef USE_CBLAS
        for (j = 0; j < nsing; ++j) {
            l = ipvt[j]-1;
            wa1[j] = diag[l] * (wa2[l] / dxnorm);
        }
        for (j = nsing; j < n; ++j) {
            wa1[j] = 0.;
        }
        /* exchange the diagonal of r with sdiag */
        cblas_dswap(n, r, ldr+1, sdiag, 1);
        /* solve lower(r).x = wa1, result id put in wa1 */
        cblas_dtrsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, nsing, r, ldr, wa1, 1);
        /* exchange the diagonal of r with sdiag */
        cblas_dswap( n, r, ldr+1, sdiag, 1);
#     else /* !USE_CBLAS */
        for (j = 0; j < n; ++j) {
            l = ipvt[j]-1;
            wa1[j] = diag[l] * (wa2[l] / dxnorm);
        }
        for (j = 0; j < n; ++j) {
            wa1[j] /= sdiag[j];
            temp = wa1[j];
            if (n > j+1) {
                int i;
                for (i = j+1; i < n; ++i) {
                    wa1[i] -= r[i + j * ldr] * temp;
                }
            }
        }
#     endif /* !USE_CBLAS */
        temp = __cminpack_enorm__(n, wa1);
        parc = fp / delta / temp / temp;

/*        depending on the sign of the function, update parl or paru. */

        if (fp > 0.) {
            parl = max(parl,*par);
        }
        if (fp < 0.) {
            paru = min(paru,*par);
        }

/*        compute an improved estimate for par. */

        /* Computing MAX */
        d1 = parl, d2 = *par + parc;
        *par = max(d1,d2);

/*        end of an iteration. */

    }
TERMINATE:

/*     termination. */

    if (iter == 0) {
	*par = 0.;
    }

/*     last card of subroutine lmpar. */

} /* lmpar_ */
Ejemplo n.º 12
0
// flips the left-right ordering of the columns of a matrix stored in rowmajor format
void flipcolslr(double * A, long m, long n) {
    for(long idx = 0; idx < n/2; idx = idx + 1) 
        cblas_dswap(m, A + idx, n, A + (n - 1 - idx), n);
}
Ejemplo n.º 13
0
void dswap_(int *n, double *X,int *incx, double *Y, int *incy)
{
    Blasx_Debug_Output("Calling dswap_\n ");
    cblas_dswap(*n,X,*incx,Y,*incy);
}
Ejemplo n.º 14
0
//
// Overloaded function for dispatching to
// * CBLAS backend, and
// * double value-type.
//
inline void swap( const int n, double* x, const int incx, double* y,
        const int incy ) {
    cblas_dswap( n, x, incx, y, incy );
}
Ejemplo n.º 15
0
void F77_dswap( const int *N, double *X, const int *incX,
                          double *Y, const int *incY)
{
   cblas_dswap(*N,X,*incX,Y,*incY);
   return;
}
void My_dswap(gsl_vector* x, gsl_vector* y)
{
	cblas_dswap(x->size, x->data, x->stride, y->data, y->stride);
}
Ejemplo n.º 17
0
void dlaswp( long n, double a[], long lda, long k1, long k2,
            long ipiv[], long incx )
{
  /**
   *  -- LAPACK auxiliary routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     October 31, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef ipiv_1
#define ipiv_1(a1) ipiv[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DLASWP performs a series of row interchanges on the matrix A.
   *  One row interchange is initiated for each of rows K1 through K2 of A.
   *
   *  Arguments
   *  =========
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix A.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the matrix of column dimension N to which the row
   *          interchanges will be applied.
   *          On exit, the permuted matrix.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.
   *
   *  K1      (input) INTEGER
   *          The first element of IPIV for which a row interchange will
   *          be done.
   *
   *  K2      (input) INTEGER
   *          The last element of IPIV for which a row interchange will
   *          be done.
   *
   *  IPIV    (input) INTEGER array, dimension (M*abs(INCX))
   *          The vector of pivot indices.  Only the elements in positions
   *          K1 through K2 of IPIV are accessed.
   *          IPIV(K) = L implies rows K and L are to be interchanged.
   *
   *  INCX    (input) INTEGER
   *          The increment between successive values of IPIV.  If IPIV
   *          is negative, the pivots are applied in reverse order.
   *
   * =====================================================================
   *
   *     .. Local Scalars ..*/
  long            i, ip, ix;
  /**     ..
   *     .. Executable Statements ..
   *
   *     Interchange row I with row IPIV(I) for each of rows K1 through K2.
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  if( incx==0 )
    return;
  if( incx>0 ) {
    ix = k1;
  } else {
    ix = 1 + ( 1-k2 )*incx;
  }
  if( incx==1 ) {
    for (i=k1 ; i<=k2 ; i+=1) {
      ip = ipiv_1( i );
      if( ip!=i )
        cblas_dswap( n, &a_2( i, 1 ), lda, &a_2( ip, 1 ), lda );
    }
  } else if( incx>1 ) {
    for (i=k1 ; i<=k2 ; i+=1) {
      ip = ipiv_1( ix );
      if( ip!=i )
        cblas_dswap( n, &a_2( i, 1 ), lda, &a_2( ip, 1 ), lda );
      ix = ix + incx;
    }
  } else if( incx<0 ) {
    for (i=k2 ; i>=k1 ; i+=-1) {
      ip = ipiv_1( ix );
      if( ip!=i )
        cblas_dswap( n, &a_2( i, 1 ), lda, &a_2( ip, 1 ), lda );
      ix = ix + incx;
    }
  }

  return;
  /**
   *     End of DLASWP
   **/
}
Ejemplo n.º 18
0
void dgetf2( long m, long n, double a[], long lda, long ipiv[], long *info )
{
  /**
   *  -- LAPACK routine (version 1.1) --
   *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
   *     Courant Institute, Argonne National Lab, and Rice University
   *     June 30, 1992
   *
   *     .. Scalar Arguments ..*/
  /**     ..
   *     .. Array Arguments ..*/
#undef ipiv_1
#define ipiv_1(a1) ipiv[a1-1]
#undef a_2
#define a_2(a1,a2) a[a1-1+lda*(a2-1)]
  /**     ..
   *
   *  Purpose
   *  =======
   *
   *  DGETF2 computes an LU factorization of a general m-by-n matrix A
   *  using partial pivoting with row interchanges.
   *
   *  The factorization has the form
   *     A = P * L * U
   *  where P is a permutation matrix, L is lower triangular with unit
   *  diagonal elements (lower trapezoidal if m > n), and U is upper
   *  triangular (upper trapezoidal if m < n).
   *
   *  This is the right-looking Level 2 BLAS version of the algorithm.
   *
   *  Arguments
   *  =========
   *
   *  M       (input) INTEGER
   *          The number of rows of the matrix A.  M >= 0.
   *
   *  N       (input) INTEGER
   *          The number of columns of the matrix A.  N >= 0.
   *
   *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
   *          On entry, the m by n matrix to be factored.
   *          On exit, the factors L and U from the factorization
   *          A = P*L*U; the unit diagonal elements of L are not stored.
   *
   *  LDA     (input) INTEGER
   *          The leading dimension of the array A.  LDA >= max(1,M).
   *
   *  IPIV    (output) INTEGER array, dimension (min(M,N))
   *          The pivot indices; for 1 <= i <= min(M,N), row i of the
   *          matrix was interchanged with row IPIV(i).
   *
   *  INFO    (output) INTEGER
   *          = 0: successful exit
   *          < 0: if INFO = -k, the k-th argument had an illegal value
   *          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
   *               has been completed, but the factor U is exactly
   *               singular, and division by zero will occur if it is used
   *               to solve a system of equations.
   *
   *  =====================================================================
   *
   *     .. Parameters ..*/
#undef one
#define one 1.0e+0
#undef zero
#define zero 0.0e+0
  /**     ..
   *     .. Local Scalars ..*/
  long            j, jp;
  /**     ..
   *     .. Intrinsic Functions ..*/
  /*      intrinsic          max, min;*/
  /**     ..
   *     .. Executable Statements ..
   *
   *     Test the input parameters.
   **/
  /*-----implicit-declarations-----*/
  /*-----end-of-declarations-----*/
  *info = 0;
  if( m<0 ) {
    *info = -1;
  } else if( n<0 ) {
    *info = -2;
  } else if( lda<max( 1, m ) ) {
    *info = -4;
  }
  if( *info!=0 ) {
    xerbla( "dgetf2", -*info );
    return;
  }
  /**
   *     Quick return if possible
   **/
  if( m==0 || n==0 )
    return;

  for (j=1 ; j<=min( m, n ) ; j+=1) {
    /**
     *        Find pivot and test for singularity.
     **/
    jp = j + cblas_idamax( m-j+1, &a_2( j, j ), 1 );
    ipiv_1( j ) = jp;
    if( a_2( jp, j )!=zero ) {
      /**
       *           Apply the interchange to columns 1:N.
       **/
      if( jp!=j )
        cblas_dswap( n, &a_2( j, 1 ), lda, &a_2( jp, 1 ), lda );
      /**
       *           Compute elements J+1:M of J-th column.
       **/
      if( j<m )
        cblas_dscal( m-j, one / a_2( j, j ), &a_2( j+1, j ), 1 );

    } else if( *info==0 ) {

      *info = j;
    }

    if( j<min( m, n ) ) {
      /**
       *           Update trailing submatrix.
       **/
      cblas_dger(CblasColMajor, m-j, n-j, -one, &a_2( j+1, j ), 1,
                 &a_2( j, j+1 ), lda, &a_2( j+1, j+1 ), lda );
    }
  }
  return;
  /**
   *     End of DGETF2
   **/
}
void STARPU_DSWAP(const int n, double *x, const int incx, double *y, const int incy)
{
	cblas_dswap(n, x, incx, y, incy);
}