Exemplo n.º 1
0
    vector(const vector& other, bool copy = true) :
        BASE::_size(other.size()), queue(other.queue)
    {
        cl_int status;
        cl::Event controlEvent;

        const BUFF_TYPE& src = other.data();
        BUFF_TYPE& dst = BASE::data();

        cl_mem_flags flags = src.getInfo<CL_MEM_FLAGS>(&status);

        CLSPARSE_V(status, "Vector cpy constr, getInfo<CL_MEM_FLAGS>");

        assert (BASE::_size > 0);

        dst = create_buffer(BASE::_size, flags);

        if (copy)
        {
            status = queue.enqueueCopyBuffer(src, dst, 0, 0,
                                         sizeof(value_type) * other.size(),
                                         NULL, &controlEvent);
            CLSPARSE_V(status, "operator= queue.enqueueCopyBuffer");
            status = controlEvent.wait();
            CLSPARSE_V(status, "operator= controlEvent.wait");
        }

    }
Exemplo n.º 2
0
    //assignment operator performs deep copy
    vector& operator= (const vector& other)
    {
        if (this != &other)
        {
            assert(other.size() > 0);

            if (size() != other.size())
                resize(other.size());

            cl::Event controlEvent;
            cl_int status;

            const cl::Buffer& src = other.data();
            cl::Buffer& dst = BASE::data();

            status = queue.enqueueCopyBuffer(src, dst, 0, 0,
                                             sizeof(value_type) * other.size(),
                                             NULL, &controlEvent);
            CLSPARSE_V(status, "operator= queue.enqueueCopyBuffer");
            status = controlEvent.wait();
            CLSPARSE_V(status, "operator= controlEvent.wait");
        }
        return *this;

    }
Exemplo n.º 3
0
void clsparseDeviceTimer::queryOpenCL( size_t id )
{
    for( size_t s = 0; s < timerData.at( id ).size( ); ++s )
    {
        for( size_t n = 0; n < timerData.at( id ).at( s ).size( ); ++n )
        {
            StatData& sd = timerData[ id ][ s ][ n ];

            cl_ulong profStart, profEnd = 0;
            cl_int err = 0;
            sd.deltaNanoSec = 0;

            for( size_t i = 0; i < sd.outEvents.size( ); ++i )
            {
                profStart = sd.outEvents[ i ].getProfilingInfo<CL_PROFILING_COMMAND_START>( &err );
                CLSPARSE_V( err, "clsparseDeviceTimer::queryOpenCL" );

                profEnd = sd.outEvents[ i ].getProfilingInfo<CL_PROFILING_COMMAND_END>( &err );
                CLSPARSE_V( err, "clsparseDeviceTimer::queryOpenCL" );

                sd.deltaNanoSec += ( profEnd - profStart );
            }

            sd.doubleNanoSec = static_cast<cl_double>( sd.deltaNanoSec );
        }
    }
}
Exemplo n.º 4
0
    void initialize_gpu_buffer()
    {
        CLSPARSE_V(::clEnqueueFillBuffer(queue, a.value, &alpha, sizeof(T), 0,
            sizeof(T) * 1, 0, NULL, NULL), "::clEnqueueFillBuffer alpha.value");

        CLSPARSE_V(::clEnqueueFillBuffer(queue, b.value, &beta, sizeof(T), 0,
            sizeof(T) * 1, 0, NULL, NULL), "::clEnqueueFillBuffer beta.value");

    }// end of function
Exemplo n.º 5
0
 // update the device memory when reference is out of scope
 ~reference_base()
 {
     if (host_buffer)
     {
         ::cl::Event unmapEvent;
         CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), host_buffer, NULL, &unmapEvent ),
                         "Array failed to unmap host buffer back to device memory" );
         CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" );
     }
 }
Exemplo n.º 6
0
    void reset_gpu_write_buffer()
    {
        // Every call to clsparseScsrSpGemm() allocates memory to csrMtxC, therefore freeing the memory
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.values), "clReleaseMemObject csrMtxC.values");
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices");
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer");

        // Initilize the output CSR Matrix
        clsparseInitCsrMatrix(&csrMtxC);

    }// end of function
Exemplo n.º 7
0
    virtual ~clsparseFunc( )
    {
        if( clsparseReleaseControl( control ) != clsparseSuccess )
        {
            std::cout << "Problem with releasing control object" << std::endl;
        }

        clsparseTeardown( );
        CLSPARSE_V( ::clReleaseCommandQueue( queue ), "releasing command queue" );
        CLSPARSE_V( ::clReleaseContext( ctx ), "releasing context" );
    }
Exemplo n.º 8
0
    void setup_buffer(double pAlpha, double pBeta, const std::string& path)
    {
        sparseFile = path;

        // Read sparse data from file and construct a CSR matrix from it
        int nnz;
        int row;
        int col;
        clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str());
        if (clsparseSuccess != fileError)
            throw std::runtime_error("Could not read matrix market header from disk");

        // Now initialize a CSR matrix from the CSR matrix
        // VK we have to handle other cases if input mtx file is not in CSR format
        clsparseInitCsrMatrix(&csrMtx);
        csrMtx.num_nonzeros = nnz;
        csrMtx.num_rows     = row;
        csrMtx.num_cols     = col;
        clsparseCsrMetaSize( &csrMtx, control );

        cl_int status;
        csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.values");

        csrMtx.colIndices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(cl_int), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.colIndices");

        csrMtx.rowOffsets = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(cl_int), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowOffsets");

        csrMtx.rowBlocks = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.rowBlockSize * sizeof(cl_ulong), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.rowBlocks");

		if (typeid(T) == typeid(float))
			fileError = clsparseSCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
		else if (typeid(T) == typeid(double))
			fileError = clsparseDCsrMatrixfromFile(&csrMtx, sparseFile.c_str(), control);
		else
			fileError = clsparseInvalidType;

        if (fileError != clsparseSuccess)
            throw std::runtime_error("Could not read matrix market data from disk");

        // Initialize the output dense matrix
        cldenseInitMatrix(&denseMtx);
        denseMtx.major    = rowMajor;
        denseMtx.num_rows = row;
        denseMtx.num_cols = col;
		denseMtx.lead_dim = col;  // To Check!! VK;
        denseMtx.values = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY,
                                            denseMtx.num_rows * denseMtx.num_cols * sizeof(T), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer denseMtx.values");
    }// end
Exemplo n.º 9
0
    void reset_gpu_write_buffer( )
    {
		
		int scalar_i = 0;
		T scalar_f = 0;
		CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.rowOffsets, &scalar_i, sizeof( int ), 0,
                              sizeof( int ) * (csrMtx.num_rows + 1), 0, NULL, NULL ), "::clEnqueueFillBuffer row" ); 
		CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.colIndices, &scalar_i, sizeof( int ), 0,
                              sizeof( int ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer col" );
		CLSPARSE_V( ::clEnqueueFillBuffer( queue, csrMtx.values, &scalar_f, sizeof( T ), 0,
                              sizeof( T ) * csrMtx.num_nonzeros, 0, NULL, NULL ), "::clEnqueueFillBuffer values" );
    }
Exemplo n.º 10
0
    void cleanup()
    {
        if (gpuTimer && cpuTimer)
        {
            std::cout << "clSPARSE matrix: " << sparseFile << std::endl;
            cpuTimer->pruneOutliers(3.0);
            cpuTimer->Print(flopCnt, "GFlop/s");
            cpuTimer->Reset();

            gpuTimer->pruneOutliers(3.0);
            gpuTimer->Print(flopCnt, "GFlop/s");
            gpuTimer->Reset();
        }

        //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
        //need to do this before we eventually hit the destructor
        CLSPARSE_V(::clReleaseMemObject(csrMtx.values),     "clReleaseMemObject csrMtx.values");
        CLSPARSE_V(::clReleaseMemObject(csrMtx.col_indices), "clReleaseMemObject csrMtx.col_indices");
        CLSPARSE_V(::clReleaseMemObject(csrMtx.row_pointer), "clReleaseMemObject csrMtx.row_pointer");
        //CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks),  "clReleaseMemObject csrMtx.rowBlocks");

        if (csrMtxC.values != nullptr)
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.values),     "clReleaseMemObject csrMtxC.values");

        if (csrMtxC.col_indices != nullptr)
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.col_indices), "clReleaseMemObject csrMtxC.col_indices");

        if (csrMtxC.row_pointer != nullptr)
        CLSPARSE_V(::clReleaseMemObject(csrMtxC.row_pointer), "clReleaseMemObject csrMtxC.row_pointer");
        //CLSPARSE_V(::clReleaseMemObject(csrMtxC.rowBlocks),  "clReleaseMemObject csrMtxC.rowBlocks");

        CLSPARSE_V(::clReleaseMemObject(a.value), "clReleaseMemObject alpha.value");
        CLSPARSE_V(::clReleaseMemObject(b.value), "clReleaseMemObject beta.value");
    }
Exemplo n.º 11
0
    void setup_buffer(double pAlpha, double pBeta, const std::string& path)
    {
        sparseFile = path;

        // Read sparse data from file and construct a CSR matrix from it
        clsparseIdx_t nnz;
        clsparseIdx_t row;
        clsparseIdx_t col;
        clsparseStatus fileError = clsparseHeaderfromFile(&nnz, &row, &col, sparseFile.c_str());
        if (clsparseSuccess != fileError)
             throw clsparse::io_exception( "Could not read matrix market header from disk: " + sparseFile );

        // Now initialize a CSR matrix from the CSR matrix
        clsparseInitCsrMatrix(&csrMtx);
        csrMtx.num_nonzeros = nnz;
        csrMtx.num_rows     = row;
        csrMtx.num_cols     = col;

        cl_int status;
        csrMtx.values = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(T), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.values");

        csrMtx.col_indices = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, csrMtx.num_nonzeros * sizeof(clsparseIdx_t), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.col_indices");

        csrMtx.row_pointer = ::clCreateBuffer(ctx, CL_MEM_READ_ONLY, (csrMtx.num_rows + 1) * sizeof(clsparseIdx_t), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer csrMtx.row_pointer");

		if (typeid(T) == typeid(float))
			fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
		else if (typeid(T) == typeid(double))
			fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str(), control, explicit_zeroes );
		else
			fileError = clsparseInvalidType;

        if (fileError != clsparseSuccess)
            throw std::runtime_error("Could not read matrix market data from disk: " + sparseFile);

        clsparseCsrMetaCreate(&csrMtx, control);

        // Initialize the output dense matrix
        cldenseInitMatrix(&denseMtx);
        denseMtx.major    = rowMajor;
        denseMtx.num_rows = row;
        denseMtx.num_cols = col;
        denseMtx.lead_dim = col;  // To Check!! VK;
        denseMtx.values = ::clCreateBuffer(ctx, CL_MEM_WRITE_ONLY,
                                            denseMtx.num_rows * denseMtx.num_cols * sizeof(T), NULL, &status);
        CLSPARSE_V(status, "::clCreateBuffer denseMtx.values");
    }// end
Exemplo n.º 12
0
    void cleanup( )
    {
        if( gpuTimer && cpuTimer )
        {
          std::cout << "clSPARSE matrix: " << sparseFile << std::endl;
          size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows );
          cpuTimer->pruneOutliers( 3.0 );
          cpuTimer->Print( sparseBytes, "GiB/s" );
          cpuTimer->Reset( );

          gpuTimer->pruneOutliers( 3.0 );
          gpuTimer->Print( sparseBytes, "GiB/s" );
          gpuTimer->Reset( );
        }

        //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
        //need to do this before we eventually hit the destructor
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" );
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" );
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" );
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowBlocks ), "clReleaseMemObject csrMtx.rowBlocks" );

        CLSPARSE_V( ::clReleaseMemObject( x.values ), "clReleaseMemObject x.values" );
        CLSPARSE_V( ::clReleaseMemObject( y.values ), "clReleaseMemObject y.values" );
        CLSPARSE_V( ::clReleaseMemObject( a.value ), "clReleaseMemObject alpha.value" );
        CLSPARSE_V( ::clReleaseMemObject( b.value ), "clReleaseMemObject beta.value" );
    }
Exemplo n.º 13
0
	void reset_gpu_write_buffer()
	{
		T scalar = 0;
		CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.values, &scalar, sizeof(T), 0,
			cooMtx.num_nonzeros * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.values");

		cl_int scalarIntZero = 0;
		CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.rowIndices, &scalarIntZero, sizeof(cl_int), 0,
			cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.rowIndices");


		CLSPARSE_V(::clEnqueueFillBuffer(queue, cooMtx.colIndices, &scalarIntZero, sizeof(cl_int), 0,
			cooMtx.num_nonzeros * sizeof(cl_int), 0, NULL, NULL), "::clEnqueueFillBuffer cooMtx.colIndices");
	}// end
Exemplo n.º 14
0
    void initialize_gpu_buffer( )
    {
        T scalarOne = 1.0;
        CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &scalarOne, sizeof( T ), 0,
            sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" );

        T scalarZero = 0.0;
        CLSPARSE_V( ::clEnqueueFillBuffer( queue, y.values, &scalarZero, sizeof( T ), 0,
            sizeof( T ) * y.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer y.values" );

        CLSPARSE_V( ::clEnqueueFillBuffer( queue, a.value, &alpha, sizeof( T ), 0,
            sizeof( T ) * 1, 0, NULL, NULL ), "::clEnqueueFillBuffer alpha.value" );

        CLSPARSE_V( ::clEnqueueFillBuffer( queue, b.value, &beta, sizeof( T ), 0,
            sizeof( T ) * 1, 0, NULL, NULL ), "::clEnqueueFillBuffer beta.value" );
    }
Exemplo n.º 15
0
    void initialize_gpu_buffer()
    {
        T scalarZero = 0.0;
        CLSPARSE_V(::clEnqueueFillBuffer(queue, denseMtx.values, &scalarZero, sizeof(T), 0,
            denseMtx.num_rows * denseMtx.num_cols * sizeof(T), 0, NULL, NULL), "::clEnqueueFillBuffer denseMtx.values");     

    }// end
Exemplo n.º 16
0
    void initialize_gpu_buffer( )
    {
        // We will solve A*x = y,
        // where initial guess of x will be vector of zeros 0,
        // and y will be vector of ones;

        T xValue = 0.0;
        T yValue = 1.0;

        CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &xValue, sizeof( T ), 0,
            sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" );

        CLSPARSE_V( ::clEnqueueFillBuffer( queue, y.values, &yValue, sizeof( T ), 0,
            sizeof( T ) * y.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer y.values" );


    }
Exemplo n.º 17
0
 // apply preconditioner
 void operator ()(const clsparse::vector<T>& x,
                  clsparse::vector<T>& y,
                  clsparseControl control)
 {
     //element wise multiply y = x*invDiag_A;
     clsparseStatus status =
             elementwise_transform<T, EW_MULTIPLY>(y, x, invDiag_A, control);
     CLSPARSE_V(status, "Diagonal operator()");
 }
Exemplo n.º 18
0
    reference_base< Container >& operator=(const value_type& rhs )
    {
        cl_int status = CL_SUCCESS;
        naked_pointer result = reinterpret_cast< naked_pointer >(
                    queue.enqueueMapBuffer(container.data(), true, CL_MAP_WRITE_INVALIDATE_REGION,
                                           index * sizeof( value_type ), sizeof( value_type ),
                                           NULL, NULL, &status ) );
        CLSPARSE_V( status, "Array failed map device memory to host memory for operator[]" );

        *result = rhs;

        ::cl::Event unmapEvent;
        CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), result, NULL, &unmapEvent ),
                        "Array failed to unmap host memory back to device memory" );
        CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" );

        return *this;
    }
Exemplo n.º 19
0
cl_int fill(Container& c, const T & value)
{
    cl::Event controlEvent;
    cl_int status;

    assert (c.size() > 0);
    if (c.size() > 0)
    {
        status = c.getQueue().enqueueFillBuffer(c.data(), value, 0,
                                         c.size() * sizeof(T),
                                         NULL, &controlEvent);
        CLSPARSE_V(status, "queue.enqueueFillBuffer");

        status = controlEvent.wait();
        CLSPARSE_V(status, "controlEvent.wait");
    }

    return status;
}
Exemplo n.º 20
0
    void reset_gpu_write_buffer( )
    {
        // we will solve A*x = y, where initial guess of x will be 0
        T scalar = 0;
        CLSPARSE_V( ::clEnqueueFillBuffer( queue, x.values, &scalar, sizeof( T ), 0,
                             sizeof( T ) * x.num_values, 0, NULL, NULL ), "::clEnqueueFillBuffer x.values" );

        // reset solverControl for next call
        clsparseSetSolverParams(solverControl, NOPRECOND, 100, 1e-2, 1e-8);
    }
Exemplo n.º 21
0
    void resize(size_t size)
    {
        if(this->size() != size)
        {
            cl_mem_flags flags;
            cl_int status = BASE::data().getInfo(CL_MEM_FLAGS, &flags);
            CLSPARSE_V(status, "buffer get info flags");

            BASE::data() = create_buffer(size, flags);
        }
    }
Exemplo n.º 22
0
    //  Automatic type conversion operator to turn the reference object into a value_type
    operator value_type() const
    {
        cl_int status = CL_SUCCESS;
        naked_pointer result = reinterpret_cast< naked_pointer >(
                    queue.enqueueMapBuffer( container.data(), true, CL_MAP_READ,
                                            index * sizeof( value_type ),
                                            sizeof( value_type ),
                                            NULL, NULL, &status)
                    );

        CLSPARSE_V( status, "Array failed map device memory to host memory for operator[]" );

        value_type valTmp = *result;

        ::cl::Event unmapEvent;
        CLSPARSE_V( queue.enqueueUnmapMemObject( container.data(), result, NULL, &unmapEvent ),
                        "Array failed to unmap host memory back to device memory" );
        CLSPARSE_V( unmapEvent.wait( ), "Failed to wait for unmap event" );

        return valTmp;
    }
Exemplo n.º 23
0
    vector(clsparseControl control, size_t size, const value_type& value = value_type(),
          cl_mem_flags flags = CL_MEM_READ_WRITE, cl_bool init = true) :
        queue(control->queue)
    {
        BASE::data() = create_buffer(size, flags);

        if (init)
        {
            cl_int status = fill(control, value);
            CLSPARSE_V(status, "vector.fill");
        }
    }
Exemplo n.º 24
0
    void cleanup( )
    {	
        if( gpuTimer && cpuTimer )
        {
          std::cout << "clSPARSE matrix: " << sparseFile << std::endl;
          size_t sparseBytes = 0;
          cpuTimer->pruneOutliers( 3.0 );
          cpuTimer->Print( sparseBytes, "GiB/s" );
          cpuTimer->Reset( );

          gpuTimer->pruneOutliers( 3.0 );
          gpuTimer->Print( sparseBytes, "GiB/s" );
          gpuTimer->Reset( );
        }

        //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
        //need to do this before we eventually hit the destructor
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.values ), "clReleaseMemObject csrMtx.values" );
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.colIndices ), "clReleaseMemObject csrMtx.colIndices" );
        CLSPARSE_V( ::clReleaseMemObject( csrMtx.rowOffsets ), "clReleaseMemObject csrMtx.rowOffsets" );

        CLSPARSE_V( ::clReleaseMemObject( cooMatx.values ), "clReleaseMemObject cooMtx.values" );
        CLSPARSE_V( ::clReleaseMemObject( cooMatx.colIndices ), "clReleaseMemObject cooMtx.colIndices" );
        CLSPARSE_V( ::clReleaseMemObject( cooMatx.rowIndices ), "clReleaseMemObject cooMtx.rowOffsets" );
    }
Exemplo n.º 25
0
    clsparseIdx_t xSpMSpM_Getflopcount(void)
    {
        // C = A * B
        // But here C = A* A, the A & B matrices are same
        clsparseIdx_t nnzA = csrMtx.num_nonzeros;
        clsparseIdx_t Browptrlen = csrMtx.num_rows + 1; // Number of row offsets

        std::vector<clsparseIdx_t> colIdxA(nnzA, 0);
        std::vector<clsparseIdx_t> rowptrB(Browptrlen, 0);

        cl_int run_status = 0;

        run_status = clEnqueueReadBuffer(queue, 
                                          csrMtx.col_indices, 
                                          CL_TRUE, 0, 
                                          nnzA*sizeof(clsparseIdx_t),
                                          colIdxA.data(), 0, nullptr, nullptr);
        CLSPARSE_V(run_status, "Reading col_indices from GPU failed");

        // copy rowptrs

        run_status = clEnqueueReadBuffer(queue,
                                            csrMtx.row_pointer,
                                            CL_TRUE, 0,
                                            Browptrlen*sizeof(clsparseIdx_t),
                                            rowptrB.data(), 0, nullptr, nullptr);

        CLSPARSE_V(run_status, "Reading row offsets from GPU failed");

        clsparseIdx_t flop = 0;
        for (clsparseIdx_t i = 0; i < nnzA; i++)
        {
            clsparseIdx_t colIdx = colIdxA[i]; // Get colIdx of A
            flop += rowptrB[colIdx + 1] - rowptrB[colIdx]; // nnz in 'colIdx'th row of B
        }

        flop = 2 * flop; // Two operations - Multiply & Add

        return flop;
    }// end of function
Exemplo n.º 26
0
    void setup_buffer( double pAlpha, double pBeta, const std::string& path )
    {
        sparseFile = path;

        // Read sparse data from file and construct a COO matrix from it
        int nnz, row, col;
        clsparseStatus fileError = clsparseHeaderfromFile( &nnz, &row, &col, sparseFile.c_str( ) );
        if( fileError != clsparseSuccess )
            throw clsparse::io_exception( "Could not read matrix market header from disk" );


        // Now initialise a CSR matrix from the COO matrix
        clsparseInitCsrMatrix( &csrMtx );
        csrMtx.num_nonzeros = nnz;
        csrMtx.num_rows = row;
        csrMtx.num_cols = col;
        clsparseCsrMetaSize( &csrMtx, control );

        cl_int status;
        csrMtx.values = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.num_nonzeros * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.values" );

        csrMtx.colIndices = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.num_nonzeros * sizeof( cl_int ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.colIndices" );

        csrMtx.rowOffsets = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            ( csrMtx.num_rows + 1 ) * sizeof( cl_int ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowOffsets" );

        csrMtx.rowBlocks = ::clCreateBuffer( ctx, CL_MEM_READ_ONLY,
            csrMtx.rowBlockSize * sizeof( cl_ulong ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer csrMtx.rowBlocks" );

        if(typeid(T) == typeid(float))
            fileError = clsparseSCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
        else if (typeid(T) == typeid(double))
            fileError = clsparseDCsrMatrixfromFile( &csrMtx, sparseFile.c_str( ), control );
        else
            fileError = clsparseInvalidType;

        if( fileError != clsparseSuccess )
            throw std::runtime_error( "Could not read matrix market data from disk" );

        // Initialize the dense X & Y vectors that we multiply against the sparse matrix
        clsparseInitVector( &x );
        x.num_values = csrMtx.num_rows;
        x.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE,
                                     x.num_values * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer x.values" );

        clsparseInitVector( &y );
        y.num_values = csrMtx.num_cols;
        y.values = ::clCreateBuffer( ctx, CL_MEM_READ_WRITE,
                                     y.num_values * sizeof( T ), NULL, &status );
        CLSPARSE_V( status, "::clCreateBuffer y.values" );


    }
Exemplo n.º 27
0
    DiagonalPreconditioner(const clsparseCsrMatrixPrivate* A,
                           clsparseControl control) :
                           invDiag_A( control, std::min( A->num_rows, A->num_cols ), 0, CL_MEM_READ_WRITE, false )
    {

        cl_int status;

        // extract inverse diagonal from matrix A and store it in invDiag_A
        // easy to check with poisson matrix;
        status = extract_diagonal<T, true>(invDiag_A, A, control);
        CLSPARSE_V(status, "Invalid extract_diagonal kernel execution");

    }
Exemplo n.º 28
0
    void cleanup(void)
    {
        if (gpuTimer && cpuTimer)
        {
            std::cout << "clSPARSE matrix: " << sparseFile << std::endl;
#if 0
            // Need to verify this calculation VK
            //size_t sparseBytes = sizeof(cl_int) * (csrMtx.nnz + csrMtx.m) + sizeof(T) * (csrMtx.nnz + csrMtx.n + csrMtx.m);
			//Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero)
			//GPU to Host: Dense - > [sizeof(T) * denseMtx.num_rows * denseMTx.num_cols]
            size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (denseMtx.num_rows * denseMtx.num_cols);
            cpuTimer->pruneOutliers(3.0);
            cpuTimer->Print( sparseBytes, "GiB/s" );
            cpuTimer->Reset();

            gpuTimer->pruneOutliers( 3.0 );
            gpuTimer->Print( sparseBytes, "GiB/s" );
            gpuTimer->Reset();
#endif
			// Calculate Number of Elements transformed per unit time
			size_t sparseElements = csrMtx.num_nonzeros;
			cpuTimer->pruneOutliers(3.0);
			cpuTimer->Print(sparseElements, "GiElements/s");
			cpuTimer->Reset();

			gpuTimer->pruneOutliers(3.0);
			gpuTimer->Print(sparseElements, "GiElements/s");
			gpuTimer->Reset();
        }

        //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
        //need to do this before we eventually hit the destructor
        CLSPARSE_V(::clReleaseMemObject(csrMtx.values), "clReleaseMemObject csrMtx.values");
        CLSPARSE_V(::clReleaseMemObject(csrMtx.colIndices), "clReleaseMemObject csrMtx.colIndices");
        CLSPARSE_V(::clReleaseMemObject(csrMtx.rowOffsets), "clReleaseMemObject csrMtx.rowOffsets");
        CLSPARSE_V(::clReleaseMemObject(csrMtx.rowBlocks), "clReleaseMemObject csrMtx.rowBlocks");

        CLSPARSE_V(::clReleaseMemObject(denseMtx.values), "clReleaseMemObject denseMtx.values");
    }
Exemplo n.º 29
0
    reference_base(Container &rhs, difference_type index,
                   difference_type range, cl::CommandQueue queue):
        container( rhs ), index( index ), range ( range ), queue(queue)
    {
        cl_int status = CL_SUCCESS;


        //should we throw or map until container.size()?
        assert( (index + range) < container.size() );

        host_buffer = reinterpret_cast< naked_pointer >(
                    queue.enqueueMapBuffer( container.data(), true, CL_MAP_READ | CL_MAP_WRITE,
                                            index * sizeof( value_type ),
                                            range * sizeof( value_type ),
                                            NULL, NULL, &status)
                    );

        CLSPARSE_V( status, "Mapping device buffer on host failed" );
    }
Exemplo n.º 30
0
clsparseStatus
clsparseDcoo2csr ( const clsparseCooMatrix* coo,
                   clsparseCsrMatrix* csr,
                   const clsparseControl control)
{
    if (!clsparseInitialized)
    {
        return clsparseNotInitialized;
    }

    //check opencl elements
    if (control == nullptr)
    {
        return clsparseInvalidControlObject;
    }

    csr->num_rows = coo->num_rows;
    csr->num_cols = coo->num_cols;
    csr->num_nonzeros = coo->num_nonzeros;

    // how to obtain proper type of the matrix indices? int assumed
    clsparse::vector<int> csr_row_offsets (control, csr->rowOffsets, csr->num_rows + 1);
    clsparse::vector<int> csr_col_indices (control, csr->colIndices, csr->num_nonzeros);
    clsparse::vector<cl_double> csr_values (control, csr->values, csr->num_nonzeros);

    clsparse::vector<int> coo_row_indices (control, coo->rowIndices, coo->num_nonzeros);
    clsparse::vector<int> coo_col_indices (control, coo->colIndices, coo->num_nonzeros);
    clsparse::vector<cl_double> coo_values (control, coo->values, coo->num_nonzeros);

    csr_col_indices = coo_col_indices;
    csr_values = coo_values;

    clsparseStatus status = indices_to_offsets(csr_row_offsets, coo_row_indices, control);
    CLSPARSE_V(status, "Error: coo2csr indices to offsets");

    return status;

}