Example #1
0
 double gflops()
 {
     if (buffer_.side_ == clblasLeft)
     {
         return buffer_.m_*(buffer_.m_+1)*buffer_.n_/time_in_ns();
     }
     else
     {
         return buffer_.m_*(buffer_.n_+1)*buffer_.n_/time_in_ns();
     }
 }
Example #2
0
    double bandwidth()
    {
#if 0
        //Check VK
		//Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero)
		//GPU to Host: Dense - > [sizeof(T) * denseMtx.num_rows * denseMTx.num_cols]
		size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) + sizeof(T) * (denseMtx.num_rows * denseMtx.num_cols);
        return (sparseBytes / time_in_ns());
#endif
		// Number of Elements converted in unit time
		return (csrMtx.num_nonzeros / time_in_ns());
    }// end
Example #3
0
double
xTrsm<cl_double2>::
gflops()
{
    if (buffer_.side_ == clblasLeft)
    {
        return 4.0*buffer_.m_*(buffer_.m_+1)*buffer_.n_/time_in_ns();
    }
    else
    {
        return 4.0*buffer_.m_*(buffer_.n_+1)*buffer_.n_/time_in_ns();
    }
}
Example #4
0
	double bandwidth()
	{
#if 0
		//Check VK
		//Host to GPU: CSR-> [rowOffsets(num_rows + 1) + Column Indices] * sizeof(int) + sizeof(T) * (num_nonzero)
		//GPU to Host: Coo - > row_indices + Col_indices + Values- > [sizeof(T) * num_nonzero] + sizeof(int) 
		size_t sparseBytes = sizeof(cl_int) * (csrMtx.num_nonzeros + csrMtx.num_rows + 1) + sizeof(T) * (csrMtx.num_nonzeros) +
			sizeof(T) * (cooMtx.num_nonzeros) + sizeof(cl_int) * (cooMtx.num_nonzeros * 2);
		return (sparseBytes / time_in_ns());
#endif
		// Number of Elements converted in unit time
		return (csrMtx.num_nonzeros / time_in_ns());
	}// end
Example #5
0
    double bandwidth( )
    {
#if 0
		//  Assuming that accesses to the vector always hit in the cache after the first access
        //  There are NNZ integers in the cols[ ] array
        //  You access each integer value in row_delimiters[ ] once.
        //  There are NNZ float_types in the vals[ ] array
        //  You read num_cols floats from the vector, afterwards they cache perfectly.
        //  Finally, you write num_rows floats out to DRAM at the end of the kernel.
        return ( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( );
#endif
		// Number of Elements converted in unit time
		return (csrMtx.num_cols * csrMtx.num_rows / time_in_ns());
    }
Example #6
0
double
xGemm<cl_double2>::
gflops()
{
    return (8.0*buffer_.m_*buffer_.n_*buffer_.k_)/(time_in_ns() / buffer_.apiCallCount);
}
Example #7
0
 double bandwidth( )
 {
     // Number of Elements converted in unit time
     return ( n_vals / time_in_ns( ) );
 }
Example #8
0
 double gflops()
 {
     return buffer_.n_*(buffer_.n_+1)*buffer_.n_/time_in_ns() +
         buffer_.n_*(buffer_.n_+1)/time_in_ns();
 }
Example #9
0
 double gflops()
 {
   return static_cast<double>(buffer_.m_ * buffer_.m_ )/time_in_ns();
 }
Example #10
0
double
xTrmv<cl_double2>::
gflops()
{
  return static_cast<double>(4 * buffer_.m_ * buffer_.m_ )/time_in_ns();
}
Example #11
0
 double bandwidth() // Need to modify this later **********
 {
     //  Assuming that accesses to the vector always hit in the cache after the first access
     //  There are NNZ integers in the cols[ ] array
     //  You access each integer value in row_delimiters[ ] once.
     //  There are NNZ float_types in the vals[ ] array
     //  You read num_cols floats from the vector, afterwards they cache perfectly.
     //  Finally, you write num_rows floats out to DRAM at the end of the kernel.
     return (sizeof(clsparseIdx_t)*(csrMtx.num_nonzeros + csrMtx.num_rows) + sizeof(T) * (csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows)) / time_in_ns();
 } // end of function
double
xSyr2k<cl_double2>::gflops()
{
        return (8*buffer_.k_*buffer_.n_*buffer_.n_+2*buffer_.n_)/time_in_ns();
}
 double gflops()
 {
     return (2*buffer_.k_*buffer_.n_*buffer_.n_+buffer_.n_)/time_in_ns();
 }
 double gflops()
 {
   return static_cast<double>(8*(buffer_.K_ * buffer_.N_ * buffer_.N_)/time_in_ns()+2*buffer_.N_/time_in_ns());
 }
Example #15
0
    double gflops()
    {
		return (2.0*buffer_.m_*buffer_.n_*buffer_.k_) / (time_in_ns() / buffer_.apiCallCount);
    }
Example #16
0
 double gflops()
 {
     return (2.0*buffer_.n_*buffer_.n_)/time_in_ns();
 }
double
xSyrk<cl_double2>::gflops()
{
        return 4*buffer_.n_*(buffer_.n_+1)*buffer_.n_/time_in_ns();
}
Example #18
0
 double bandwidth()
 {
     // Number of Elements processed in unit time
     return (n_rows * n_cols / time_in_ns());
 }
 double gflops()
 {
   return (buffer.N*(buffer.N+1))/time_in_ns();
 }