void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in) { dim4 iDims = in.dims(); int M = iDims[0]; int N = iDims[1]; Array<T> in_copy = copyArray<T>(in); ////////////////////////////////////////// // LU inplace int *pivotPtr = pinnedAlloc<int>(min(M, N)); T *inPtr = pinnedAlloc<T> (in_copy.elements()); copyData(inPtr, in); getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, inPtr, in_copy.strides()[1], pivotPtr); convertPivot(&pivotPtr, M, min(M, N)); pivot = createHostDataArray<int>(af::dim4(M), pivotPtr); ////////////////////////////////////////// // SPLIT into lower and upper dim4 ldims(M, min(M, N)); dim4 udims(min(M, N), N); T *lowerPtr = pinnedAlloc<T>(ldims.elements()); T *upperPtr = pinnedAlloc<T>(udims.elements()); dim4 lst(1, ldims[0], ldims[0] * ldims[1], ldims[0] * ldims[1] * ldims[2]); dim4 ust(1, udims[0], udims[0] * udims[1], udims[0] * udims[1] * udims[2]); lu_split<T>(lowerPtr, upperPtr, inPtr, ldims, udims, iDims, lst, ust, in_copy.strides()); lower = createHostDataArray<T>(ldims, lowerPtr); upper = createHostDataArray<T>(udims, upperPtr); lower.eval(); upper.eval(); pinnedFree(lowerPtr); pinnedFree(upperPtr); pinnedFree(pivotPtr); pinnedFree(inPtr); }
void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in) { lower.eval(); upper.eval(); pivot.eval(); in.eval(); dim4 iDims = in.dims(); int M = iDims[0]; int N = iDims[1]; Array<T> in_copy = copyArray<T>(in); pivot = lu_inplace(in_copy); // SPLIT into lower and upper dim4 ldims(M, min(M, N)); dim4 udims(min(M, N), N); lower = createEmptyArray<T>(ldims); upper = createEmptyArray<T>(udims); getQueue().enqueue(kernel::lu_split<T>, lower, upper, in_copy); }