Beispiel #1
0
    void initialize_gpu_buffer()
    {
        cudaError_t err = cudaMemcpy(device_row_offsets, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_row_offsets");

        err = cudaMemcpy(device_col_indices, &col_indices[0], col_indices.size() * sizeof(int), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_col_indices");

        err = cudaMemcpy(device_values, &values[0], values.size() * sizeof(T), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_values");

        err = cudaMemset(device_A, 0x0, n_rows * n_cols * sizeof(T));
        CUDA_V_THROW(err, "cudaMalloc device_A");

        // call csr2dense to get input in dense format
        csr2dense_Function(true);

        int nnzA;
        // Compute number of nonzero elements per row
        if (typeid(T) == typeid(float))
        {
            cuSparseStatus = cusparseSnnz(handle,
                CUSPARSE_DIRECTION_ROW,
                n_rows,
                n_cols,
                descrA,
                reinterpret_cast< float*> (device_A),
                n_rows,
                nnzPerRow,
                &nnzA);
            CUDA_V_THROW(cuSparseStatus, "cusparseSnnz");
        }
        else if (typeid(T) == typeid(double))
        {
            cuSparseStatus = cusparseDnnz(handle,
                CUSPARSE_DIRECTION_ROW,
                n_rows,
                n_cols,
                descrA,
               reinterpret_cast< double*> (device_A),
                n_rows,
                nnzPerRow,
                &nnzA);
            CUDA_V_THROW(cuSparseStatus, "cusparseDnnz");
        }
        else
        {
            // error
        }

        if (nnzA != n_vals)
        {
            // error
        }
        cudaDeviceSynchronize();
        // Once we get input in dense format, no-loner input csr values are needed.
        CUDA_V_THROW(cudaFree(device_values), "cudafree device_values");
        CUDA_V_THROW(cudaFree(device_row_offsets), "cudafree device_row_offsets");
        CUDA_V_THROW(cudaFree(device_col_indices), "cudafree device_col_indices");

    }// end
Beispiel #2
0
		cusparseStatus_t cusparseXnnz(cusparseDirection_t dirA, int m, int n, const cusparseMatDescr_t descrA, 
									  const double *A, int lda, int *nnzPerRowColumn, int *nnzTotalDevHostPtr) {
			return cusparseDnnz(g_context->cusparseHandle, dirA, m, n, descrA, A, lda, nnzPerRowColumn, nnzTotalDevHostPtr);
		}