Example #1
0
    void setup_buffer( double alpha, double beta, const std::string& path )
    {
        int fileError = sparseHeaderfromFile(&n_vals, &n_rows, &n_cols, path.c_str());
        if (fileError != 0)
        {
            throw clsparse::io_exception( "Could not read matrix market header from disk: " + path);
        }

        if (csrMatrixfromFile(row_offsets, col_indices, values, path.c_str(), explicit_zeroes))
        {
            throw clsparse::io_exception( "Could not read matrix market from disk: " + path);
        }

        //n_rows = row_offsets.size( );
        //n_cols = col_indices.size( );
        //n_vals = values.size( );

        cudaError_t err = cudaMalloc( (void**) &device_row_offsets, (n_rows + 1) * sizeof( clsparseIdx_t ) );
        CUDA_V_THROW( err, "cudaMalloc device_row_offsets" );

        err = cudaMalloc( (void**) &device_col_indices, n_vals * sizeof( clsparseIdx_t ) );
        CUDA_V_THROW( err, "cudaMalloc device_col_indices" );

        err = cudaMalloc( (void**) &device_values, n_vals * sizeof( T ) );
        CUDA_V_THROW( err, "cudaMalloc device_values" );

        err = cudaMalloc( (void**) &device_A, n_rows * n_cols * sizeof( T ) );
        CUDA_V_THROW( err, "cudaMalloc device_A" );
    }
Example #2
0
    xDense2Csr(StatisticalTimer& timer) : cusparseFunc(timer)
    {
        cusparseStatus_t err = cusparseCreateMatDescr(&descrA);
        CUDA_V_THROW(err, "cusparseCreateMatDescr failed");

        err = cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL);
        CUDA_V_THROW(err, "cusparseSetMatType failed");

        err = cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO);
        CUDA_V_THROW(err, "cusparseSetMatIndexBase failed");

        n_rows = 0;
        n_cols = 0;
        n_vals = 0;

        device_col_indices = nullptr;
        device_row_offsets = nullptr;

        device_values = nullptr;
        device_A      = nullptr;
        nnzPerRow     = nullptr;

        devRowOffsets = nullptr;
        devColIndices = nullptr;
        devValues     = nullptr;
    }// end
Example #3
0
    void releaseGPUBuffer_deleteCPUBuffer( )
    {
        CUDA_V_THROW( cudaFree( deviceCSRRowOffsets ), "cudafree deviceCSRRowOffsets" );
        CUDA_V_THROW( cudaFree( deviceCooRowInd ), "cudafree deviceCooRowInd" );

        row_indices.clear( );
        col_indices.clear( );
        values.clear( );
    }
Example #4
0
    void initialize_gpu_buffer( )
    {
        cudaError_t err = cudaMemcpy( deviceCooRowInd, &row_indices[ 0 ], row_indices.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice );
        CUDA_V_THROW( err, "cudaMalloc deviceCSRRowOffsets" );

        err = cudaMemset(deviceCSRRowOffsets, 0x0, (n_rows + 1) * sizeof( clsparseIdx_t ));
        CUDA_V_THROW( err, "cudaMemset deviceCSRRowOffsets" );

    }// end of function
Example #5
0
    void reset_gpu_write_buffer()
    {
        cudaError_t err = cudaMemset(devRowOffsets, 0x0, (n_rows + 1) * sizeof(int));
        CUDA_V_THROW(err, "cudaMemset reset_gpu_write_buffer: devRowOffsets");

        err = cudaMemset(devColIndices, 0x0, n_vals * sizeof(int));
        CUDA_V_THROW(err, "cudaMemset reset_gpu_write_buffer: devColIndices");

        err = cudaMemset(devValues, 0x0, n_vals * sizeof(T));
        CUDA_V_THROW(err, "cudaMemset reset_gpu_write_buffer: devValues");
    }
Example #6
0
    void releaseGPUBuffer_deleteCPUBuffer( )
    {
        //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
        //need to do this before we eventually hit the destructor
        CUDA_V_THROW( cudaFree( device_values  ), "cudafree device_values" );
        CUDA_V_THROW( cudaFree( device_row_offsets ), "cudafree device_row_offsets" );
        CUDA_V_THROW( cudaFree( device_col_indices ), "cudafree device_col_indices" );
        CUDA_V_THROW( cudaFree( device_A ), "cudafree device_A" );

        row_offsets.clear( );
        col_indices.clear( );
        values.clear( );
    }
Example #7
0
    void initialize_gpu_buffer( )
    {
        cudaError_t err = cudaMemcpy( device_row_offsets, &row_offsets[ 0 ], row_offsets.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice );
        CUDA_V_THROW( err, "cudaMalloc device_row_offsets" );

        err = cudaMemcpy( device_col_indices, &col_indices[ 0 ], col_indices.size( ) * sizeof( clsparseIdx_t ), cudaMemcpyHostToDevice );
        CUDA_V_THROW( err, "cudaMalloc device_col_indices" );

        err = cudaMemcpy( device_values, &values[ 0 ], values.size( ) * sizeof( T ), cudaMemcpyHostToDevice );
        CUDA_V_THROW( err, "cudaMalloc device_values" );

        err = cudaMemset( device_A, 0x0, n_rows * n_cols * sizeof( T ) );
        CUDA_V_THROW( err, "cudaMalloc device_A" );
    }
Example #8
0
    void reset_gpu_write_buffer( )
    {
        err = cudaMemset(deviceCSRRowOffsets, 0x0, (n_rows + 1) * sizeof( clsparseIdx_t ));
        CUDA_V_THROW( err, "cudaMemset deviceCSRRowOffsets" );

        cudaDeviceSynchronize( );
    }// end of function
Example #9
0
    xCsr2Dense( StatisticalTimer& timer, bool read_explicit_zeroes = true ): cusparseFunc( timer )
    {
        cusparseStatus_t err = cusparseCreateMatDescr( &descrA );
        CUDA_V_THROW( err, "cusparseCreateMatDescr failed" );

        err = cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL );
        CUDA_V_THROW( err, "cusparseSetMatType failed" );

        err = cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO );
        CUDA_V_THROW( err, "cusparseSetMatIndexBase failed" );

        n_rows = 0;
        n_cols = 0;
        n_vals = 0;
        explicit_zeroes = read_explicit_zeroes;
    }
Example #10
0
    void setup_buffer( double alpha, double beta, const std::string& path )
    {
        int fileError = sparseHeaderfromFile( &n_vals, &n_rows, &n_cols, path.c_str( ) );
        if( fileError != 0 )
        {
            throw clsparse::io_exception( "Could not read matrix market header from disk" + path);
        }

        if( cooMatrixfromFile( row_indices, col_indices, values, path.c_str( ), explicit_zeroes ) )
        {
            throw clsparse::io_exception( "Could not read matrix market from disk: " + path );
        }

        // Input: COO Row Indices
        err = cudaMalloc( (void**)&deviceCooRowInd, n_vals * sizeof( clsparseIdx_t ) );
        CUDA_V_THROW( err, "cudaMalloc deviceCooRowInd" );

        // Output: CSR
        cudaError_t err = cudaMalloc( (void**)&deviceCSRRowOffsets, ( n_rows + 1 ) * sizeof( clsparseIdx_t ) );
        CUDA_V_THROW( err, "cudaMalloc deviceCSRRowOffsets" );

    }// End of function
Example #11
0
void
xCoo2Csr<double>::
xCoo2Csr_Function( bool flush )
{
    cuSparseStatus = cusparseXcoo2csr( handle,
                                       deviceCooRowInd,
                                       n_vals,
                                       n_rows,
                                       deviceCSRRowOffsets,
                                       CUSPARSE_INDEX_BASE_ZERO );

    CUDA_V_THROW( cuSparseStatus, "cusparseCoo2Csr" );

    cudaDeviceSynchronize( );

}
Example #12
0
void
xDense2Csr<double>::
csr2dense_Function(bool flush)
{
    cuSparseStatus = cusparseDcsr2dense(handle,
                                         n_rows,
                                         n_cols,
                                         descrA,
                                         device_values,
                                         device_row_offsets,
                                         device_col_indices,
                                         device_A,
                                         n_rows); //dense Matrix A  stored in Col-major format
    CUDA_V_THROW(cuSparseStatus, "cusparseDcsr2dense");

    cudaDeviceSynchronize();
}// end of function
Example #13
0
void
xCsr2Dense<double>::
xCsr2Dense_Function( bool flush )
{
    cuSparseStatus = cusparseDcsr2dense( handle,
                                         n_rows,
                                         n_cols,
                                         descrA,
                                         device_values,
                                         device_row_offsets,
                                         device_col_indices,
                                         device_A,
                                         n_rows );
    CUDA_V_THROW( cuSparseStatus, "cusparseDcsr2dense" );

    cudaDeviceSynchronize( );
}
Example #14
0
void
xDense2Csr<double>::
xDense2Csr_Function(bool flush)
{
    cuSparseStatus = cusparseDdense2csr(handle,
        n_rows,
        n_cols,
        descrA,
        device_A,
        n_rows,  // dense matrix in col-major format, lda is number of elements in major dimension (number of rows)
        nnzPerRow,
        devValues,
        devRowOffsets,
        devColIndices);

    CUDA_V_THROW(cuSparseStatus, "cusparseDdense2csr");
    cudaDeviceSynchronize();
}// end of function
Example #15
0
    void setup_buffer(double alpha, double beta, const std::string& path)
    {
        int fileError = sparseHeaderfromFile(&n_vals, &n_rows, &n_cols, path.c_str());
        if (fileError != 0)
        {
            throw clsparse::io_exception("Could not read matrix market header from disk");
        }

        if (csrMatrixfromFile(row_offsets, col_indices, values, path.c_str()))
        {
            throw clsparse::io_exception("Could not read matrix market header from disk");
        }

        cudaError_t err = cudaMalloc((void**)&device_row_offsets, (n_rows + 1) * sizeof(int));
        CUDA_V_THROW(err, "cudaMalloc device_row_offsets");

        err = cudaMalloc((void**)&device_col_indices, n_vals * sizeof(int));
        CUDA_V_THROW(err, "cudaMalloc device_col_indices");

        err = cudaMalloc((void**)&device_values, n_vals * sizeof(T));
        CUDA_V_THROW(err, "cudaMalloc device_values");

        err = cudaMalloc((void**)&device_A, n_rows * n_cols * sizeof(T));
        CUDA_V_THROW(err, "cudaMalloc device_A");

        // Output CSR
        err = cudaMalloc((void**)&devRowOffsets, (n_rows + 1) * sizeof(int));
        CUDA_V_THROW(err, "cudaMalloc devRowOffsets");

        err = cudaMalloc((void**)&devColIndices, n_vals * sizeof(int));
        CUDA_V_THROW(err, "cudaMalloc devColIndices");

        err = cudaMalloc((void**)&devValues, n_vals * sizeof(T));
        CUDA_V_THROW(err, "cudaMalloc devValues");

        // Allocate memory for nnzPerRow
        err = cudaMalloc((void**)&nnzPerRow, n_rows * sizeof(int));
        CUDA_V_THROW(err, "cudaMalloc nnzPerRow");

    }// end
Example #16
0
    void reset_gpu_write_buffer( )
    {
        cudaError_t err = cudaMemset( device_A, 0x0, n_rows * n_cols * sizeof( T ) );
        CUDA_V_THROW( err, "cudaMemset reset_gpu_write_buffer" );

    }
Example #17
0
    void initialize_gpu_buffer()
    {
        cudaError_t err = cudaMemcpy(device_row_offsets, &row_offsets[0], row_offsets.size() * sizeof(int), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_row_offsets");

        err = cudaMemcpy(device_col_indices, &col_indices[0], col_indices.size() * sizeof(int), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_col_indices");

        err = cudaMemcpy(device_values, &values[0], values.size() * sizeof(T), cudaMemcpyHostToDevice);
        CUDA_V_THROW(err, "cudaMalloc device_values");

        err = cudaMemset(device_A, 0x0, n_rows * n_cols * sizeof(T));
        CUDA_V_THROW(err, "cudaMalloc device_A");

        // call csr2dense to get input in dense format
        csr2dense_Function(true);

        int nnzA;
        // Compute number of nonzero elements per row
        if (typeid(T) == typeid(float))
        {
            cuSparseStatus = cusparseSnnz(handle,
                CUSPARSE_DIRECTION_ROW,
                n_rows,
                n_cols,
                descrA,
                reinterpret_cast< float*> (device_A),
                n_rows,
                nnzPerRow,
                &nnzA);
            CUDA_V_THROW(cuSparseStatus, "cusparseSnnz");
        }
        else if (typeid(T) == typeid(double))
        {
            cuSparseStatus = cusparseDnnz(handle,
                CUSPARSE_DIRECTION_ROW,
                n_rows,
                n_cols,
                descrA,
               reinterpret_cast< double*> (device_A),
                n_rows,
                nnzPerRow,
                &nnzA);
            CUDA_V_THROW(cuSparseStatus, "cusparseDnnz");
        }
        else
        {
            // error
        }

        if (nnzA != n_vals)
        {
            // error
        }
        cudaDeviceSynchronize();
        // Once we get input in dense format, no-loner input csr values are needed.
        CUDA_V_THROW(cudaFree(device_values), "cudafree device_values");
        CUDA_V_THROW(cudaFree(device_row_offsets), "cudafree device_row_offsets");
        CUDA_V_THROW(cudaFree(device_col_indices), "cudafree device_col_indices");

    }// end