示例#1
0
    __global__ void convolve_kernel(I, kernel_image2d<O> out, unsigned kernelsize)
    {
      i_int2 p = thread_pos2d();

      if (!out.has(p))
        return;

      bt_change_vtype(O, type_mult(bt_vtype(O), float)) r  = zero();
      for(int i = 0; i < kernelsize; i++)
      {
        float w = tex1Dfetch(tex_weights, i);
        point2d<int> n = i_int2(tex1Dfetch(tex_dpoints, i)) + p;
        if (out.has(n))
          r += O(tex2D(conv_input_tex<I>::tex(), n)) * w;
      }
      out(p) = r;
    }
示例#2
0
__device__ float SpMV_Ellpack_device(const float * vals,
                              const int * colIdx, 
                              const int * rowLength,
                              const int row,
                              const int numRows)
{
    const int num_rows =numRows;
    int maxEl = rowLength[row];
    float dot=0;   
    int col=-1;
    float val=0;
    int i=0;
    for(i=0; i<maxEl;i++)
    {
        col=colIdx[num_rows*i+row];
        val= vals[num_rows*i+row];
        dot+=val*tex1Dfetch(mainVecTexRef,col);
    }
    return dot;
}