コード例 #1
0
ファイル: problem.c プロジェクト: 376473984/fftw3
/* do what I mean */
static bench_tensor *dwim(bench_tensor *t, bench_iodim **last_iodim,
			  n_transform nti, n_transform nto,
			  bench_iodim *dt)
{
     int i;
     bench_iodim *d, *d1;

     if (!FINITE_RNK(t->rnk) || t->rnk < 1)
	  return t;

     i = t->rnk;
     d1 = *last_iodim;

     while (--i >= 0) {
	  d = t->dims + i;
	  if (!d->is) 
	       d->is = d1->is * transform_n(d1->n, d1==dt ? nti : SAME); 
	  if (!d->os) 
	       d->os = d1->os * transform_n(d1->n, d1==dt ? nto : SAME); 
	  d1 = d;
     }

     *last_iodim = d1;
     return t;
}
コード例 #2
0
ファイル: transform.hpp プロジェクト: hxiaox/arrayfire
        __global__ static void
        transform_kernel(Param<T> out, CParam<T> in, const int nimages,
                         const int ntransforms, const int blocksXPerImage)
        {
            // Compute which image set
            const int setId = blockIdx.x / blocksXPerImage;
            const int blockIdx_x = blockIdx.x - setId * blocksXPerImage;

            // Get thread indices
            const int xx = blockIdx_x * blockDim.x + threadIdx.x;
            const int yy = blockIdx.y * blockDim.y + threadIdx.y;

            const int limages = min(out.dims[2] - setId * nimages, nimages);

            if(xx >= out.dims[0] || yy >= out.dims[1] * ntransforms)
                return;

            // Index of channel of images and transform
            //const int i_idx = xx / out.dims[0];
            const int t_idx = yy / out.dims[1];

            // Index in local channel -> This is output index
            //const int xido = xx - i_idx * out.dims[0];
            const int xido = xx;
            const int yido = yy - t_idx * out.dims[1];

            // Global offset
            //          Offset for transform channel + Offset for image channel.
                  T *optr = out.ptr + t_idx * nimages * out.strides[2] + setId * nimages * out.strides[2];
            const T *iptr = in.ptr  + setId * nimages * in.strides[2];

            // Transform is in constant memory.
            const float *tmat_ptr = c_tmat + t_idx * 6;
            float tmat[6];

            // We expect a inverse transform matrix by default
            // If it is an forward transform, then we need its inverse
            if(inverse) {
                #pragma unroll
                for(int i = 0; i < 6; i++)
                    tmat[i] = tmat_ptr[i];
            } else {
                calc_affine_inverse(tmat, tmat_ptr);
            }

            if (xido >= out.dims[0] && yido >= out.dims[1]) return;

            switch(method) {
                case AF_INTERP_NEAREST:
                    transform_n(optr, out, iptr, in, tmat, xido, yido, limages); break;
                case AF_INTERP_BILINEAR:
                    transform_b(optr, out, iptr, in, tmat, xido, yido, limages); break;
                case AF_INTERP_LOWER:
                    transform_l(optr, out, iptr, in, tmat, xido, yido, limages); break;
                default: break;
            }
        }
コード例 #3
0
ファイル: rotate.hpp プロジェクト: hxiaox/arrayfire
        __global__ static void
        rotate_kernel(Param<T> out, CParam<T> in, const tmat_t t,
                      const int nimages, const int nbatches,
                      const int blocksXPerImage, const int blocksYPerImage)
        {
            // Compute which image set
            const int setId = blockIdx.x / blocksXPerImage;
            const int blockIdx_x = blockIdx.x - setId * blocksXPerImage;

            const int batch = blockIdx.y / blocksYPerImage;
            const int blockIdx_y = blockIdx.y - batch * blocksYPerImage;

            // Get thread indices
            const int xx = blockIdx_x * blockDim.x + threadIdx.x;
            const int yy = blockIdx_y * blockDim.y + threadIdx.y;

            const int limages = min(out.dims[2] - setId * nimages, nimages);

            if(xx >= out.dims[0] || yy >= out.dims[1])
                return;

            // Global offset
            //          Offset for transform channel + Offset for image channel.
                  T *optr = out.ptr + setId * nimages * out.strides[2] + batch * out.strides[3];
            const T *iptr = in.ptr  + setId * nimages * in.strides[2]  + batch * in.strides[3];

            switch(method) {
                case AF_INTERP_NEAREST:
                    transform_n(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
                case AF_INTERP_BILINEAR:
                    transform_b(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
                case AF_INTERP_LOWER:
                    transform_l(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
                default: break;
            }
        }