/// return the extremal eigenvalues of Ax=cBx std::pair<double, double> generalized_extreme_eigenvalues(const Eigen::SparseMatrix<double> &Ain, const Eigen::SparseMatrix<double> &Bin) { assert(Ain.rows() == Ain.cols()); assert(Ain.rows() == Ain.cols()); assert(Ain.rows() == Bin.rows()); assert(Ain.isCompressed()); assert(Bin.isCompressed()); const int N = static_cast<int>(Ain.rows()); /* mkl_sparse_d_gv input parameters */ char which = 'S'; /* Which eigenvalues to calculate. ('L' - largest (algebraic) eigenvalues, 'S' - smallest (algebraic) eigenvalues) */ int pm[128]; /* This array is used to pass various parameters to Extended Eigensolver Extensions routines. */ int k0 = 1; /* Desired number of max/min eigenvalues */ /* mkl_sparse_d_gv output parameters */ int k; /* Number of eigenvalues found (might be less than k0). */ double E_small[3]; /* Eigenvalues */ double E_large[3]; /* Eigenvalues */ double X[3]; /* Eigenvectors */ double res[3]; /* Residual */ /* Local variables */ int compute_vectors = 0; /* Flag to compute eigenvectors */ int tol = 7; /* Tolerance */ /* Sparse BLAS IE variables */ sparse_status_t status; ConvertToMklResult A = to_mkl(Ain, status); // TODO: check A.status; ConvertToMklResult B = to_mkl(Bin, status); // TODO: check B.status; /* Step 2. Call mkl_sparse_ee_init to define default input values */ mkl_sparse_ee_init(pm); pm[1] = tol; /* Set tolerance */ pm[6] = compute_vectors; /* Step 3. Solve the standard Ax = ex eigenvalue problem. */ which = 'S'; const int infoS = mkl_sparse_d_gv(&which, pm, A.matrix, A.descr, B.matrix, B.descr, k0, &k, E_small, X, res); assert(infoS == 0); which = 'L'; const int infoL = mkl_sparse_d_gv(&which, pm, A.matrix, A.descr, B.matrix, B.descr, k0, &k, E_large, X, res); assert(infoL == 0); mkl_sparse_destroy(A.matrix); mkl_sparse_destroy(B.matrix); return {E_small[0], E_large[0]}; // todo: return the right thing }
void mkl_apply( KernelHandle *handle, typename KernelHandle::row_lno_t m, typename KernelHandle::row_lno_t n, typename KernelHandle::row_lno_t k, in_row_index_view_type row_mapA, in_nonzero_index_view_type entriesA, in_nonzero_value_view_type valuesA, bool transposeA, in_row_index_view_type row_mapB, in_nonzero_index_view_type entriesB, in_nonzero_value_view_type valuesB, bool transposeB, typename in_row_index_view_type::non_const_type &row_mapC, typename in_nonzero_index_view_type::non_const_type &entriesC, typename in_nonzero_value_view_type::non_const_type &valuesC){ #ifdef KERNELS_HAVE_MKL typedef typename KernelHandle::row_lno_t idx; typedef in_row_index_view_type idx_array_type; typedef typename KernelHandle::nnz_scalar_t value_type; typedef typename in_row_index_view_type::device_type device1; typedef typename in_nonzero_index_view_type::device_type device2; typedef typename in_nonzero_value_view_type::device_type device3; typedef typename KernelHandle::HandleExecSpace MyExecSpace; std::cout << "RUNNING MKL" << std::endl; #if defined( KOKKOS_HAVE_CUDA ) if (!Kokkos::Impl::is_same<Kokkos::Cuda, device1 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device2 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } if (!Kokkos::Impl::is_same<Kokkos::Cuda, device3 >::value){ std::cerr << "MEMORY IS NOT ALLOCATED IN HOST DEVICE for MKL" << std::endl; return; } #endif if (Kokkos::Impl::is_same<idx, int>::value){ int *a_xadj = (int *)row_mapA.ptr_on_device(); int *b_xadj = (int *)row_mapB.ptr_on_device(); int *c_xadj = (int *)row_mapC.ptr_on_device(); int *a_adj = (int *)entriesA.ptr_on_device(); int *b_adj = (int *)entriesB.ptr_on_device(); int *c_adj = (int *)entriesC.ptr_on_device(); int nnzA = entriesA.dimension_0(); int nnzB = entriesB.dimension_0(); value_type *a_ew = valuesA.ptr_on_device(); value_type *b_ew = valuesB.ptr_on_device(); value_type *c_ew = valuesC.ptr_on_device(); sparse_matrix_t A; sparse_matrix_t B; sparse_matrix_t C; if (Kokkos::Impl::is_same<value_type, float>::value){ if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (float *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (float *)b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_s_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual FLOAT MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; float *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_s_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type , MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<float *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else if (Kokkos::Impl::is_same<value_type, double>::value){ /* std::cout << "create a" << std::endl; std::cout << "m:" << m << " n:" << n << std::endl; std::cout << "a_xadj[0]:" << a_xadj[0] << " a_xadj[m]:" << a_xadj[m] << std::endl; std::cout << "a_adj[a_xadj[m] - 1]:" << a_adj[a_xadj[m] - 1] << " a_ew[a_xadj[m] - 1]:" << a_ew[a_xadj[m] - 1] << std::endl; */ if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&A, SPARSE_INDEX_BASE_ZERO, m, n, a_xadj, a_xadj + 1, a_adj, (double *)a_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr A" << std::endl; return; } //std::cout << "create b" << std::endl; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_create_csr (&B, SPARSE_INDEX_BASE_ZERO, n, k, b_xadj, b_xadj + 1, b_adj, (double *) b_ew)){ std::cerr << "CANNOT CREATE mkl_sparse_d_create_csr B" << std::endl; return; } sparse_operation_t operation; if (transposeA && transposeB){ operation = SPARSE_OPERATION_TRANSPOSE; } else if (!(transposeA || transposeB)){ operation = SPARSE_OPERATION_NON_TRANSPOSE; } else { std::cerr << "Ask both to transpose or non transpose for MKL SPGEMM" << std::endl; return; } Kokkos::Impl::Timer timer1; bool success = SPARSE_STATUS_SUCCESS != mkl_sparse_spmm (operation, A, B, &C); std::cout << "Actual DOUBLE MKL SPMM Time:" << timer1.seconds() << std::endl; if (success){ std::cerr << "CANNOT multiply mkl_sparse_spmm " << std::endl; return; } else{ sparse_index_base_t c_indexing; MKL_INT c_rows, c_cols, *rows_start, *rows_end, *columns; double *values; if (SPARSE_STATUS_SUCCESS != mkl_sparse_d_export_csr (C, &c_indexing, &c_rows, &c_cols, &rows_start, &rows_end, &columns, &values)){ std::cerr << "CANNOT export result matrix " << std::endl; return; } if (SPARSE_INDEX_BASE_ZERO != c_indexing){ std::cerr << "C is not zero based indexed." << std::endl; return; } { Kokkos::Impl::Timer copy_time; row_mapC = typename in_row_index_view_type::non_const_type(Kokkos::ViewAllocateWithoutInitializing("rowmapC"), c_rows + 1); entriesC = typename in_nonzero_index_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("EntriesC") , rows_end[m - 1] ); valuesC = typename in_nonzero_value_view_type::non_const_type (Kokkos::ViewAllocateWithoutInitializing("valuesC") , rows_end[m - 1]); KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_row_index_view_type::non_const_type, MyExecSpace> (m, rows_start, row_mapC); idx nnz = row_mapC(m) = rows_end[m - 1]; KokkosKernels::Experimental::Util::copy_vector<MKL_INT *, typename in_nonzero_index_view_type::non_const_type, MyExecSpace> (nnz, columns, entriesC); KokkosKernels::Experimental::Util::copy_vector<double *, typename in_nonzero_value_view_type::non_const_type, MyExecSpace> (m, values, valuesC); double copy_time_d = copy_time.seconds(); std::cout << "MKL COPYTIME:" << copy_time_d << std::endl; } } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (A)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy A" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (B)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy B" << std::endl; return; } if (SPARSE_STATUS_SUCCESS != mkl_sparse_destroy (C)){ std::cerr << "CANNOT DESTROY mkl_sparse_destroy C" << std::endl; return; } } else { std::cerr << "CUSPARSE requires float or double values. cuComplex and cuDoubleComplex are not implemented yet." << std::endl; return; } } else { //int *a_xadj = row_mapA.ptr_on_device(); std::cerr << "MKL requires integer values" << std::endl; if (Kokkos::Impl::is_same<idx, unsigned int>::value){ std::cerr << "MKL is given unsigned integer" << std::endl; } else if (Kokkos::Impl::is_same<idx, long>::value){ std::cerr << "MKL is given long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const int>::value){ std::cerr << "MKL is given const int" << std::endl; } else if (Kokkos::Impl::is_same<idx, unsigned long>::value){ std::cerr << "MKL is given unsigned long" << std::endl; } else if (Kokkos::Impl::is_same<idx, const unsigned long>::value){ std::cerr << "MKL is given const unsigned long" << std::endl; } else{ std::cerr << "MKL is given something else" << std::endl; } return; } #else std::cerr << "MKL IS NOT DEFINED" << std::endl; return; #endif }
Array<T> matmul(const common::SparseArray<T> lhs, const Array<T> rhs, af_mat_prop optLhs, af_mat_prop optRhs) { // MKL: CSRMM Does not support optRhs lhs.eval(); rhs.eval(); // Similar Operations to GEMM sparse_operation_t lOpts = toSparseTranspose(optLhs); int lRowDim = (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) ? 0 : 1; //int lColDim = (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) ? 1 : 0; //Unsupported : (rOpts == SPARSE_OPERATION_NON_TRANSPOSE;) ? 1 : 0; static const int rColDim = 1; dim4 lDims = lhs.dims(); dim4 rDims = rhs.dims(); int M = lDims[lRowDim]; int N = rDims[rColDim]; //int K = lDims[lColDim]; Array<T> out = createValueArray<T>(af::dim4(M, N, 1, 1), scalar<T>(0)); out.eval(); auto alpha = getScale<T, 1>(); auto beta = getScale<T, 0>(); int ldb = rhs.strides()[1]; int ldc = out.strides()[1]; // get host pointers from mapped memory auto rhsPtr = rhs.getMappedPtr(); auto outPtr = out.getMappedPtr(); Array<T > values = lhs.getValues(); Array<int> rowIdx = lhs.getRowIdx(); Array<int> colIdx = lhs.getColIdx(); auto vPtr = values.getMappedPtr(); auto rPtr = rowIdx.getMappedPtr(); auto cPtr = colIdx.getMappedPtr(); int* pB = rPtr.get(); int* pE = rPtr.get() + 1; sparse_matrix_t csrLhs; create_csr_func<T>()(&csrLhs, SPARSE_INDEX_BASE_ZERO, lhs.dims()[0], lhs.dims()[1], pB, pE, cPtr.get(), reinterpret_cast<ptr_type<T>>(vPtr.get())); struct matrix_descr descrLhs; descrLhs.type = SPARSE_MATRIX_TYPE_GENERAL; mkl_sparse_optimize(csrLhs); if(rDims[rColDim] == 1) { mkl_sparse_set_mv_hint(csrLhs, lOpts, descrLhs, 1); mv_func<T>()( lOpts, alpha, csrLhs, descrLhs, reinterpret_cast<cptr_type<T>>(rhsPtr.get()), beta, reinterpret_cast<ptr_type<T>>(outPtr.get())); } else { mkl_sparse_set_mm_hint(csrLhs, lOpts, descrLhs, SPARSE_LAYOUT_COLUMN_MAJOR, N, 1); mm_func<T>()( lOpts, alpha, csrLhs, descrLhs, SPARSE_LAYOUT_COLUMN_MAJOR, reinterpret_cast<cptr_type<T>>(rhsPtr.get()), N, ldb, beta, reinterpret_cast<ptr_type<T>>(outPtr.get()), ldc); } mkl_sparse_destroy(csrLhs); return out; }