示例#1
0
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                 const Array<T> &b, const af_mat_prop options)
{
    if(OpenCLCPUOffload()) {
        return cpu::solveLU(A, pivot, b, options);
    }

    int N = A.dims()[0];
    int NRHS = b.dims()[1];

    std::vector<int> ipiv(N);
    copyData(&ipiv[0], pivot);

    Array< T > B = copyArray<T>(b);

    const cl::Buffer *A_buf = A.get();
    cl::Buffer *B_buf = B.get();

    int info = 0;
    magma_getrs_gpu<T>(MagmaNoTrans, N, NRHS,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Array<T> cholesky(int *info, const Array<T> &in, const bool is_upper)
{
    if(OpenCLCPUOffload()) {
        return cpu::cholesky(info, in, is_upper);
    }

    Array<T> out = copyArray<T>(in);
    *info = cholesky_inplace(out, is_upper);

    if (is_upper) triangle<T, true , false>(out, out);
    else          triangle<T, false, false>(out, out);

    return out;
}
int cholesky_inplace(Array<T> &in, const bool is_upper)
{
    if(OpenCLCPUOffload()) {
        return cpu::cholesky_inplace(in, is_upper);
    }

    dim4 iDims = in.dims();
    int N = iDims[0];

    magma_uplo_t uplo = is_upper ? MagmaUpper : MagmaLower;

    int info = 0;
    cl::Buffer *in_buf = in.get();
    magma_potrf_gpu<T>(uplo, N,
                        (*in_buf)(), in.getOffset(),  in.strides()[1],
                        getQueue()(), &info);
    return info;
}
示例#4
0
Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
{
    try {
        if(OpenCLCPUOffload()) {
            return cpu::solve(a, b, options);
        }

        initBlas();

        if (options & AF_MAT_UPPER ||
            options & AF_MAT_LOWER) {
            return triangleSolve<T>(a, b, options);
        }

        if(a.dims()[0] == a.dims()[1]) {
            return generalSolve<T>(a, b);
        } else {
            return leastSquares<T>(a, b);
        }
    } catch(cl::Error &err) {
        CL_TO_AF_ERROR(err);
    }
}
示例#5
0
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                af_mat_prop optLhs, af_mat_prop optRhs)
{
#if defined(WITH_LINEAR_ALGEBRA)
    if(OpenCLCPUOffload(false)) {   // Do not force offload gemm on OSX Intel devices
        return cpu::matmul(lhs, rhs, optLhs, optRhs);
    }
#endif
    const auto lOpts = toBlasTranspose(optLhs);
    const auto rOpts = toBlasTranspose(optRhs);

    const auto aRowDim = (lOpts == OPENCL_BLAS_NO_TRANS) ? 0 : 1;
    const auto aColDim = (lOpts == OPENCL_BLAS_NO_TRANS) ? 1 : 0;
    const auto bColDim = (rOpts == OPENCL_BLAS_NO_TRANS) ? 1 : 0;

    const dim4 lDims = lhs.dims();
    const dim4 rDims = rhs.dims();
    const int M = lDims[aRowDim];
    const int N = rDims[bColDim];
    const int K = lDims[aColDim];

    dim_t d2 = std::max(lDims[2], rDims[2]);
    dim_t d3 = std::max(lDims[3], rDims[3]);
    dim4 oDims = af::dim4(M, N, d2, d3);
    Array<T> out = createEmptyArray<T>(oDims);

    const auto alpha = scalar<T>(1);
    const auto beta  = scalar<T>(0);

    const dim4 lStrides = lhs.strides();
    const dim4 rStrides = rhs.strides();
    const dim4 oStrides = out.strides();

    int batchSize = oDims[2] * oDims[3];

    bool is_l_d2_batched = oDims[2] == lDims[2];
    bool is_l_d3_batched = oDims[3] == lDims[3];
    bool is_r_d2_batched = oDims[2] == rDims[2];
    bool is_r_d3_batched = oDims[3] == rDims[3];

    for (int n = 0; n < batchSize; n++) {
        int w = n / rDims[2];
        int z = n - w * rDims[2];

        int loff = z * (is_l_d2_batched * lStrides[2]) + w * (is_l_d3_batched * lStrides[3]);
        int roff = z * (is_r_d2_batched * rStrides[2]) + w * (is_r_d3_batched * rStrides[3]);

        dim_t lOffset = lhs.getOffset() + loff;
        dim_t rOffset = rhs.getOffset() + roff;
        dim_t oOffset = out.getOffset() + z * oStrides[2] + w * oStrides[3];

        cl::Event event;
        if(rDims[bColDim] == 1) {
            dim_t incr = (optRhs == AF_MAT_NONE) ? rStrides[0] : rStrides[1];
            gpu_blas_gemv_func<T> gemv;
            OPENCL_BLAS_CHECK(
                gemv(lOpts, lDims[0], lDims[1],
                     alpha,
                     (*lhs.get())(), lOffset, lStrides[1],
                     (*rhs.get())(), rOffset, incr,
                     beta,
                     (*out.get())(), oOffset, 1,
                     1, &getQueue()(), 0, nullptr, &event())
                );
        } else {
            gpu_blas_gemm_func<T> gemm;
            OPENCL_BLAS_CHECK(
                gemm(lOpts, rOpts, M, N, K,
                     alpha,
                     (*lhs.get())(), lOffset, lStrides[1],
                     (*rhs.get())(), rOffset, rStrides[1],
                     beta,
                     (*out.get())(), oOffset, out.dims()[0],
                     1, &getQueue()(), 0, nullptr, &event())
                );
        }
    }

    return out;
}