Example #1
0
        __global__ static void
        transform_kernel(Param<T> out, CParam<T> in,
                         const dim_type nimages, const dim_type ntransforms)
        {
            // Get thread indices
            const dim_type xx = blockIdx.x * blockDim.x + threadIdx.x;
            const dim_type yy = blockIdx.y * blockDim.y + threadIdx.y;

            if(xx >= out.dims[0] * nimages || yy >= out.dims[1] * ntransforms)
                return;

            // Index of channel of images and transform
            const dim_type i_idx = xx / out.dims[0];
            const dim_type t_idx = yy / out.dims[1];

            // Index in local channel -> This is output index
            const dim_type xido = xx - i_idx * out.dims[0];
            const dim_type yido = yy - t_idx * out.dims[1];

            // Global offset
            //          Offset for transform channel + Offset for image channel.
            T *optr = out.ptr + t_idx * nimages * out.strides[2] + i_idx * out.strides[2];
            const T *iptr = in.ptr + i_idx * in.strides[2];

            // Transform is in constant memory.
            const float *tmat_ptr = c_tmat + t_idx * 6;
            float tmat[6];

            // We expect a inverse transform matrix by default
            // If it is an forward transform, then we need its inverse
            if(inverse) {
                #pragma unroll
                for(int i = 0; i < 6; i++)
                    tmat[i] = tmat_ptr[i];
            } else {
                calc_affine_inverse(tmat, tmat_ptr);
            }

            if (xido >= out.dims[0] && yido >= out.dims[1]) return;

            // Compute input index
            const dim_type xidi = round(xido * tmat[0]
                                      + yido * tmat[1]
                                             + tmat[2]);
            const dim_type yidi = round(xido * tmat[3]
                                      + yido * tmat[4]
                                             + tmat[5]);

            // Compute memory location of indices
            dim_type loci = (yidi * in.strides[1]  + xidi);
            dim_type loco = (yido * out.strides[1] + xido);

            // Copy to output
            T val = 0;
            if (xidi < in.dims[0] && yidi < in.dims[1] && xidi >= 0 && yidi >= 0) val = iptr[loci];

            optr[loco] = val;
        }
Example #2
0
        __global__ static void
        transform_kernel(Param<T> out, CParam<T> in, const int nimages,
                         const int ntransforms, const int blocksXPerImage)
        {
            // Compute which image set
            const int setId = blockIdx.x / blocksXPerImage;
            const int blockIdx_x = blockIdx.x - setId * blocksXPerImage;

            // Get thread indices
            const int xx = blockIdx_x * blockDim.x + threadIdx.x;
            const int yy = blockIdx.y * blockDim.y + threadIdx.y;

            const int limages = min(out.dims[2] - setId * nimages, nimages);

            if(xx >= out.dims[0] || yy >= out.dims[1] * ntransforms)
                return;

            // Index of channel of images and transform
            //const int i_idx = xx / out.dims[0];
            const int t_idx = yy / out.dims[1];

            // Index in local channel -> This is output index
            //const int xido = xx - i_idx * out.dims[0];
            const int xido = xx;
            const int yido = yy - t_idx * out.dims[1];

            // Global offset
            //          Offset for transform channel + Offset for image channel.
                  T *optr = out.ptr + t_idx * nimages * out.strides[2] + setId * nimages * out.strides[2];
            const T *iptr = in.ptr  + setId * nimages * in.strides[2];

            // Transform is in constant memory.
            const float *tmat_ptr = c_tmat + t_idx * 6;
            float tmat[6];

            // We expect a inverse transform matrix by default
            // If it is an forward transform, then we need its inverse
            if(inverse) {
                #pragma unroll
                for(int i = 0; i < 6; i++)
                    tmat[i] = tmat_ptr[i];
            } else {
                calc_affine_inverse(tmat, tmat_ptr);
            }

            if (xido >= out.dims[0] && yido >= out.dims[1]) return;

            switch(method) {
                case AF_INTERP_NEAREST:
                    transform_n(optr, out, iptr, in, tmat, xido, yido, limages); break;
                case AF_INTERP_BILINEAR:
                    transform_b(optr, out, iptr, in, tmat, xido, yido, limages); break;
                case AF_INTERP_LOWER:
                    transform_l(optr, out, iptr, in, tmat, xido, yido, limages); break;
                default: break;
            }
        }