__device__ __forceinline__ elem_type operator ()(float y, float x) const { typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; work_type out = VecTraits<work_type>::all(0); const int x1 = __float2int_rd(x); const int y1 = __float2int_rd(y); const int x2 = x1 + 1; const int y2 = y1 + 1; elem_type src_reg = src(y1, x1); out = out + src_reg * ((x2 - x) * (y2 - y)); src_reg = src(y1, x2); out = out + src_reg * ((x - x1) * (y2 - y)); src_reg = src(y2, x1); out = out + src_reg * ((x2 - x) * (y - y1)); src_reg = src(y2, x2); out = out + src_reg * ((x - x1) * (y - y1)); return saturate_cast<elem_type>(out); }
__device__ elem_type operator ()(float y, float x) const { const float xmin = ::ceilf(x - 2.0f); const float xmax = ::floorf(x + 2.0f); const float ymin = ::ceilf(y - 2.0f); const float ymax = ::floorf(y + 2.0f); work_type sum = VecTraits<work_type>::all(0); float wsum = 0.0f; for (float cy = ymin; cy <= ymax; cy += 1.0f) { for (float cx = xmin; cx <= xmax; cx += 1.0f) { const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy); sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx)); wsum += w; } } work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum; return saturate_cast<elem_type>(res); }
__device__ __forceinline__ elem_type operator ()(float y, float x) const { float fsx1 = x * scale_x; float fsx2 = fsx1 + scale_x; int sx1 = __float2int_ru(fsx1); int sx2 = __float2int_rd(fsx2); float fsy1 = y * scale_y; float fsy2 = fsy1 + scale_y; int sy1 = __float2int_ru(fsy1); int sy2 = __float2int_rd(fsy2); typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; work_type out = VecTraits<work_type>::all(0.f); for(int dy = sy1; dy < sy2; ++dy) for(int dx = sx1; dx < sx2; ++dx) { out = out + src(dy, dx) * scale; } return saturate_cast<elem_type>(out); }
__device__ __forceinline__ elem_type operator ()(float y, float x) const { float fsx1 = x * scale_x; float fsx2 = fsx1 + scale_x; int sx1 = __float2int_ru(fsx1); int sx2 = __float2int_rd(fsx2); float fsy1 = y * scale_y; float fsy2 = fsy1 + scale_y; int sy1 = __float2int_ru(fsy1); int sy2 = __float2int_rd(fsy2); float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1)); typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type; work_type out = VecTraits<work_type>::all(0.f); for (int dy = sy1; dy < sy2; ++dy) { for (int dx = sx1; dx < sx2; ++dx) out = out + src(dy, dx) * scale; if (sx1 > fsx1) out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale); if (sx2 < fsx2) out = out + src(dy, sx2) * ((fsx2 -sx2) * scale); } if (sy1 > fsy1) for (int dx = sx1; dx < sx2; ++dx) out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale); if (sy2 < fsy2) for (int dx = sx1; dx < sx2; ++dx) out = out + src(sy2, dx) * ((fsy2 -sy2) * scale); if ((sy1 > fsy1) && (sx1 > fsx1)) out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale); if ((sy1 > fsy1) && (sx2 < fsx2)) out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale); if ((sy2 < fsy2) && (sx2 < fsx2)) out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale); if ((sy2 < fsy2) && (sx1 > fsx1)) out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale); return saturate_cast<elem_type>(out); }
calc_j(curr_cache_t curr_cache, particle_real_t *xm, particle_real_t *xp, int *lf, int *lg, particle_t *prt, particle_real_t *vxi) { assert(0); } #endif #endif // DIM // ====================================================================== // TBD: piece to save block_idx as we go for following sort #if 0 // save block_idx for new particle position at x^(n+1.5) unsigned int block_pos_y = __float2int_rd(prt->xi[1] * prm.b_dxi[1]); unsigned int block_pos_z = __float2int_rd(prt->xi[2] * prm.b_dxi[2]); int nr_blocks = prm.b_mx[1] * prm.b_mx[2]; int block_idx; if (block_pos_y >= prm.b_mx[1] || block_pos_z >= prm.b_mx[2]) { block_idx = CUDA_BND_S_OOB; } else { int bidx = block_pos_z * prm.b_mx[1] + block_pos_y + p_nr * nr_blocks; int b_diff = bid - bidx + prm.b_mx[1] + 1; int d1 = b_diff % prm.b_mx[1]; int d2 = b_diff / prm.b_mx[1]; block_idx = d2 * 3 + d1; } d_bidx[n] = block_idx; #endif