CBLAS_TRANSPOSE
toCblasTranspose(af_mat_prop opt)
{
    CBLAS_TRANSPOSE out = CblasNoTrans;
    switch(opt) {
        case AF_MAT_NONE        : out = CblasNoTrans;   break;
        case AF_MAT_TRANS       : out = CblasTrans;     break;
        case AF_MAT_CTRANS      : out = CblasConjTrans; break;
        default                 : AF_ERROR("INVALID af_mat_prop", AF_ERR_ARG);
    }
    return out;
}
Example #2
0
SparseArray<T> sparseConvertStorageToStorage(const SparseArray<T> &in)
{
    // Dummy function
    // TODO finish this function when support is required
    AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED);

    in.eval();

    SparseArray<T> dense = createEmptySparseArray<T>(in.dims(), (int)in.getNNZ(), dest);

    return dense;
}
Example #3
0
Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
{
    const dim4 mdims    = mask.dims();

    if (mdims[0]!=mdims[1] || mdims[0]!=mdims[2])
        AF_ERROR("Only cube masks are supported in opencl morph currently", AF_ERR_SIZE);
    if (mdims[0]>7)
        AF_ERROR("Upto 7x7x7 kernels are only supported in opencl currently", AF_ERR_SIZE);

    const dim4 dims= in.dims();
    Array<T> out   = createEmptyArray<T>(dims);

    switch(mdims[0]) {
        case  3: kernel::morph3d<T, isDilation,  3>(out, in, mask); break;
        case  5: kernel::morph3d<T, isDilation,  5>(out, in, mask); break;
        case  7: kernel::morph3d<T, isDilation,  7>(out, in, mask); break;
        default: kernel::morph3d<T, isDilation,  3>(out, in, mask); break;
    }

    return out;
}
Example #4
0
    void rotate_(T *out, const T *in, const float theta,
                 const af::dim4 &odims, const af::dim4 &idims,
                 const af::dim4 &ostrides, const af::dim4 &istrides)
    {
        dim_t nimages = idims[2];

        void (*t_fn)(T *, const T *, const float *, const af::dim4 &,
                     const af::dim4 &, const af::dim4 &,
                     const dim_t, const dim_t, const dim_t, const dim_t);

        const float c = cos(-theta), s = sin(-theta);
        float tx, ty;
        {
            const float nx = 0.5 * (idims[0] - 1);
            const float ny = 0.5 * (idims[1] - 1);
            const float mx = 0.5 * (odims[0] - 1);
            const float my = 0.5 * (odims[1] - 1);
            const float sx = (mx * c + my *-s);
            const float sy = (mx * s + my * c);
            tx = -(sx - nx);
            ty = -(sy - ny);
        }

        const float tmat[6] = {std::round( c * 1000) / 1000.0f,
                               std::round(-s * 1000) / 1000.0f,
                               std::round(tx * 1000) / 1000.0f,
                               std::round( s * 1000) / 1000.0f,
                               std::round( c * 1000) / 1000.0f,
                               std::round(ty * 1000) / 1000.0f,
                              };

        switch(method) {
            case AF_INTERP_NEAREST:
                t_fn = &transform_n;
                break;
            case AF_INTERP_BILINEAR:
                t_fn = &transform_b;
                break;
            default:
                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                break;
        }


        // Do transform for image
        for(int yy = 0; yy < (int)odims[1]; yy++) {
            for(int xx = 0; xx < (int)odims[0]; xx++) {
                t_fn(out, in, tmat, idims, ostrides, istrides, nimages, 0, xx, yy);
            }
        }
    }
Example #5
0
Array<T>  morph(const Array<T> &in, const Array<T> &mask)
{
    const dim4 mdims = mask.dims();

    if (mdims[0] != mdims[1])
        AF_ERROR("Only square masks are supported in cuda morph currently", AF_ERR_SIZE);
    if (mdims[0] > 19)
        AF_ERROR("Upto 19x19 square kernels are only supported in cuda currently", AF_ERR_SIZE);

    Array<T> out = createEmptyArray<T>(in.dims());

    CUDA_CHECK(cudaMemcpyToSymbolAsync(kernel::cFilter, mask.get(),
                                       mdims[0] * mdims[1] * sizeof(T),
                                       0, cudaMemcpyDeviceToDevice,
                                       cuda::getStream(cuda::getActiveDeviceId())));

    if (isDilation)
        kernel::morph<T, true >(out, in, mdims[0]);
    else
        kernel::morph<T, false>(out, in, mdims[0]);

    return out;
}
Example #6
0
    T* memAlloc(const size_t &elements)
    {
        managerInit();

        T* ptr = NULL;
        size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution;

        if (elements > 0) {
            std::lock_guard<std::mutex> lock(memory_map_mutex);

            // FIXME: Add better checks for garbage collection
            // Perhaps look at total memory available as a metric
            if (memory_map.size() > MAX_BUFFERS ||
                used_bytes >= MAX_BYTES) {

                garbageCollect();
            }

            for(mem_iter iter = memory_map.begin();
                iter != memory_map.end(); ++iter) {

                mem_info info = iter->second;

                if ( info.is_free &&
                    !info.is_unlinked &&
                     info.bytes == alloc_bytes) {

                    iter->second.is_free = false;
                    used_bytes += alloc_bytes;
                    used_buffers++;
                    return (T *)iter->first;
                }
            }

            // Perform garbage collection if memory can not be allocated
            ptr = (T *)malloc(alloc_bytes);

            if (ptr == NULL) {
                AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM);
            }

            mem_info info = {false, false, alloc_bytes};
            memory_map[ptr] = info;

            used_bytes += alloc_bytes;
            used_buffers++;
            total_bytes += alloc_bytes;
        }
        return ptr;
    }
Example #7
0
void Array<T>::eval() {
    if (isReady()) return;
    if (getQueue().is_worker())
        AF_ERROR("Array not evaluated", AF_ERR_INTERNAL);

    this->setId(getActiveDeviceId());

    data = shared_ptr<T>(memAlloc<T>(elements()).release(), memFree<T>);

    getQueue().enqueue(kernel::evalArray<T>, *this, this->node);
    // Reset shared_ptr
    this->node = bufferNodePtr<T>();
    ready      = true;
}
Example #8
0
SparseArray<T> sparseConvertDenseToStorage(const Array<T> &in_)
{
    in_.eval();

    // MKL only has dns->csr.
    // CSR <-> CSC is only supported if input is square
    uint nNZ = reduce_all<af_notzero_t, T, uint>(in_);

    SparseArray<T> sparse_ = createEmptySparseArray<T>(in_.dims(), nNZ, AF_STORAGE_CSR);
    sparse_.eval();

    auto func = [=] (SparseArray<T> sparse, const Array<T> in) {
        // Read: https://software.intel.com/en-us/node/520848
        // But job description is incorrect with regards to job[1]
        // 0 implies row major and 1 implies column major
        int j1 = 1, j2 = 0;
        const int job[] = {0, j1, j2, 2, (int)sparse.elements(), 1};

        const int M = in.dims()[0];
        const int N = in.dims()[1];

        int ldd = in.strides()[1];

        int info = 0;

        // Have to mess up all const correctness because MKL dnscsr function
        // is bidirectional and has input/output on all pointers
        Array<T  > &values = sparse.getValues();
        Array<int> &rowIdx = sparse.getRowIdx();
        Array<int> &colIdx = sparse.getColIdx();

        dnscsr_func<T>()(
                job, &M, &N,
                reinterpret_cast<ptr_type<T>>(const_cast<T*>(in.get())), &ldd,
                reinterpret_cast<ptr_type<T>>(values.get()),
                colIdx.get(),
                rowIdx.get(),
                &info);
    };

    getQueue().enqueue(func, sparse_, in_);

    if(stype == AF_STORAGE_CSR)
        return sparse_;
    else
        AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED);

    return sparse_;
}
    void sort_index(Array<T> &okey, Array<uint> &oval, const Array<T> &in, const uint dim, bool isAscending)
    {
        try {
            // okey contains values, oval contains indices
            okey = copyArray<T>(in);
            oval = range<uint>(in.dims(), dim);
            oval.eval();

            switch(dim) {
                case 0: kernel::sort0ByKey<T, uint>(okey, oval, isAscending); break;
                case 1:
                case 2:
                case 3: kernel::sortByKeyBatched<T, uint>(okey, oval, dim, isAscending); break;
                default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
            }

            if(dim != 0) {
                af::dim4 preorderDims = okey.dims();
                af::dim4 reorderDims(0, 1, 2, 3);
                reorderDims[dim] = 0;
                preorderDims[0] = okey.dims()[dim];
                for(int i = 1; i <= (int)dim; i++) {
                    reorderDims[i - 1] = i;
                    preorderDims[i] = okey.dims()[i - 1];
                }

                okey.setDataDims(preorderDims);
                oval.setDataDims(preorderDims);

                okey = reorder<T>(okey, reorderDims);
                oval = reorder<uint>(oval, reorderDims);
            }
        } catch (std::exception &ex) {
            AF_ERROR(ex.what(), AF_ERR_INTERNAL);
        }
    }
Example #10
0
    void sort_by_key(Array<Tk> &okey, Array<Tv> &oval,
               const Array<Tk> &ikey, const Array<Tv> &ival, const unsigned dim)
    {
        if ((std::is_same<Tk, double>::value || std::is_same<Tk, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }
        if ((std::is_same<Tv, double>::value || std::is_same<Tv, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }

        try {
            okey = copyArray<Tk>(ikey);
            oval = copyArray<Tv>(ival);
            switch(dim) {
            case 0: kernel::sort0_by_key<Tk, Tv, isAscending>(okey, oval);
                break;
            default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
            }
        }catch(std::exception &ex) {
            AF_ERROR(ex.what(), AF_ERR_INTERNAL);
        }
    }
Example #11
0
    Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
                     const af_interp_type method)
    {
        Array<T> out = createEmptyArray<T>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                kernel::rotate<T, AF_INTERP_NEAREST> (out, in, theta);
                break;
            case AF_INTERP_BILINEAR:
                kernel::rotate<T, AF_INTERP_BILINEAR> (out, in, theta);
                break;
            default:
                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                break;
        }

        return out;
    }
Example #12
0
    Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
                     const af_interp_type method)
    {
        Array<T> out = createEmptyArray<T>(odims);
        const af::dim4 idims = in.dims();

        switch(method) {
            case AF_INTERP_NEAREST:
                rotate_<T, AF_INTERP_NEAREST>
                       (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
                break;
            case AF_INTERP_BILINEAR:
                rotate_<T, AF_INTERP_BILINEAR>
                       (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
                break;
            default:
                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                break;
        }

        return out;
    }
Example #13
0
    Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
                     const af_interp_type method)
    {
        if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }

        Array<T> out = createEmptyArray<T>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                kernel::rotate<T, AF_INTERP_NEAREST> (out, in, theta);
                break;
            case AF_INTERP_BILINEAR:
                kernel::rotate<T, AF_INTERP_BILINEAR> (out, in, theta);
                break;
            default:
                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                break;
        }

        return out;
    }
Example #14
0
Array<T> sparseConvertStorageToDense(const SparseArray<T> &in_)
{
    in_.eval();

    Array<T> dense_ = createValueArray<T>(in_.dims(), scalar<T>(0));
    dense_.eval();

    auto func = [=] (Array<T> dense, const SparseArray<T> in) {
        Array<T  > values = in.getValues();
        Array<int> rowIdx = in.getRowIdx();
        Array<int> colIdx = in.getColIdx();

        kernel::csr_dense<T>()(dense, values, rowIdx, colIdx);
    };

    getQueue().enqueue(func, dense_, in_);

    if(stype == AF_STORAGE_CSR)
        return dense_;
    else
        AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED);

    return dense_;
}
Example #15
0
void swapdblk(int n, int nb, cl_mem dA, size_t dA_offset, int ldda, int inca,
              cl_mem dB, size_t dB_offset, int lddb, int incb,
              cl_command_queue queue) {
    std::string refName =
        std::string("swapdblk_") + std::string(dtype_traits<T>::getName());

    int device       = getActiveDeviceId();
    kc_entry_t entry = kernelCache(device, refName);

    if (entry.prog == 0 && entry.ker == 0) {
        std::ostringstream options;

        options << " -D T=" << dtype_traits<T>::getName();
        if (std::is_same<T, double>::value || std::is_same<T, cdouble>::value)
            options << " -D USE_DOUBLE";

        const char* ker_strs[] = {swapdblk_cl};
        const int ker_lens[]   = {swapdblk_cl_len};
        Program prog;
        buildProgram(prog, 1, ker_strs, ker_lens, options.str());
        entry.prog = new Program(prog);
        entry.ker  = new Kernel(*entry.prog, "swapdblk");

        addKernelToCache(device, refName, entry);
    }

    int nblocks = n / nb;

    if (nblocks == 0) return;

    int info = 0;
    if (n < 0) {
        info = -1;
    } else if (nb < 1 || nb > 1024) {
        info = -2;
    } else if (ldda < (nblocks - 1) * nb * inca + nb) {
        info = -4;
    } else if (inca < 0) {
        info = -5;
    } else if (lddb < (nblocks - 1) * nb * incb + nb) {
        info = -7;
    } else if (incb < 0) {
        info = -8;
    }

    if (info != 0) {
        AF_ERROR("Invalid configuration", AF_ERR_INTERNAL);
        return;
    }

    NDRange local(nb);
    NDRange global(nblocks * nb);

    cl::Buffer dAObj(dA, true);
    cl::Buffer dBObj(dB, true);

    auto swapdOp =
        KernelFunctor<int, Buffer, unsigned long long, int, int, Buffer,
                      unsigned long long, int, int>(*entry.ker);

    cl::CommandQueue q(queue);
    swapdOp(EnqueueArgs(q, global, local), nb, dAObj, dA_offset, ldda, inca,
            dBObj, dB_offset, lddb, incb);
}
Example #16
0
void swapdblk(int n, int nb,
              cl_mem dA, size_t dA_offset, int ldda, int inca,
              cl_mem dB, size_t dB_offset, int lddb, int incb)
{

    static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
    static std::map<int, Program*>  swpProgs;
    static std::map<int, Kernel*> swpKernels;

    int device = getActiveDeviceId();

    std::call_once(compileFlags[device], [device] () {

            std::ostringstream options;
            options << " -D T=" << dtype_traits<T>::getName();

            if (std::is_same<T, double>::value ||
                std::is_same<T, cdouble>::value) {
                options << " -D USE_DOUBLE";
            }

            cl::Program prog;
            buildProgram(prog, swapdblk_cl, swapdblk_cl_len, options.str());
            swpProgs[device] = new Program(prog);

            swpKernels[device] = new Kernel(*swpProgs[device], "swapdblk");
        });

    int nblocks = n / nb;

    if(nblocks == 0)
        return;

    int info = 0;
    if (n < 0) {
        info = -1;
    } else if (nb < 1 || nb > 1024) {
        info = -2;
    } else if (ldda < (nblocks-1)*nb*inca + nb) {
        info = -4;
    } else if (inca < 0) {
        info = -5;
    } else if (lddb < (nblocks-1)*nb*incb + nb) {
        info = -7;
    } else if (incb < 0) {
        info = -8;
    }

    if (info != 0) {
        AF_ERROR("Invalid configuration", AF_ERR_INTERNAL);
        return;
    }

    NDRange local(nb);
    NDRange global(nblocks * nb);
    auto swapdOp = make_kernel<int,
                               cl_mem, unsigned long long, int, int,
                               cl_mem, unsigned long long, int, int>(*swpKernels[device]);

    swapdOp(EnqueueArgs(getQueue(), global, local),
            nb,
            dA, dA_offset, ldda, inca,
            dB, dB_offset, lddb, incb);

}
Array<T> cholesky(int *info, const Array<T> &in, const bool is_upper)
{
    AF_ERROR("Linear Algebra is disabled on OpenCL", AF_ERR_NOT_CONFIGURED);
}
Example #18
0
Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
{
    AF_ERROR("Linear Algebra is diabled on OpenCL",
              AF_ERR_NOT_CONFIGURED);
}
int cholesky_inplace(Array<T> &in, const bool is_upper)
{
    AF_ERROR("Linear Algebra is disabled on OpenCL", AF_ERR_NOT_CONFIGURED);
}
Example #20
0
    Array<T> transform(const Array<T> &in, const Array<float> &transform,
                       const af::dim4 &odims, const af_interp_type method,
                       const bool inverse, const bool perspective)
    {
        Array<T> out = createEmptyArray<T>(odims);

        if(inverse) {
            if (perspective) {
                switch(method) {
                    case AF_INTERP_NEAREST:
                        kernel::transform<T, true, true, AF_INTERP_NEAREST>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_BILINEAR:
                        kernel::transform<T, true, true, AF_INTERP_BILINEAR>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_LOWER:
                        kernel::transform<T, true, true, AF_INTERP_LOWER>
                                         (out, in, transform);
                        break;
                    default:
                        AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                        break;
                }
            } else {
                switch(method) {
                    case AF_INTERP_NEAREST:
                        kernel::transform<T, true, false, AF_INTERP_NEAREST>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_BILINEAR:
                        kernel::transform<T, true, false, AF_INTERP_BILINEAR>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_LOWER:
                        kernel::transform<T, true, false, AF_INTERP_LOWER>
                                         (out, in, transform);
                        break;
                    default:
                        AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                        break;
                }
            }
        } else {
            if (perspective) {
                switch(method) {
                    case AF_INTERP_NEAREST:
                        kernel::transform<T, false, true, AF_INTERP_NEAREST>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_BILINEAR:
                        kernel::transform<T, false, true, AF_INTERP_BILINEAR>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_LOWER:
                        kernel::transform<T, false, true, AF_INTERP_LOWER>
                                         (out, in, transform);
                        break;
                    default:
                        AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                        break;
                }
            } else {
                switch(method) {
                    case AF_INTERP_NEAREST:
                        kernel::transform<T, false, false, AF_INTERP_NEAREST>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_BILINEAR:
                        kernel::transform<T, false, false, AF_INTERP_BILINEAR>
                                         (out, in, transform);
                        break;
                    case AF_INTERP_LOWER:
                        kernel::transform<T, false, false, AF_INTERP_LOWER>
                                         (out, in, transform);
                        break;
                    default:
                        AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                        break;
                }
            }
        }

        return out;
    }