KOKKOS_INLINE_FUNCTION
 double shfl_up(const double &val, const int& delta, const int& width ) {
   int lo = __double2loint(val);
   int hi = __double2hiint(val);
   lo = __shfl_up(lo,delta,width);
   hi = __shfl_up(hi,delta,width);
   return __hiloint2double(hi,lo);
 }
 KOKKOS_INLINE_FUNCTION
 double shfl(const double &val, const int& srcLane, const int& width) {
   int lo = __double2loint(val);
   int hi = __double2hiint(val);
   lo = __shfl(lo,srcLane,width);
   hi = __shfl(hi,srcLane,width);
   return __hiloint2double(hi,lo);
 }
 KOKKOS_INLINE_FUNCTION
 Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
   int lo = __double2loint(*reinterpret_cast<const double*>(&val));
   int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
   lo = __shfl_up(lo,delta,width);
   hi = __shfl_up(hi,delta,width);
   const double tmp = __hiloint2double(hi,lo);
   return *(reinterpret_cast<const Scalar*>(&tmp));
 }
Esempio n. 4
0
__device__ double shfl_xor(double val, int laneMask, int width = warpSize)
{
    int lo = __double2loint(val);
    int hi = __double2hiint(val);

    lo = __shfl_xor(lo, laneMask, width);
    hi = __shfl_xor(hi, laneMask, width);

    return __hiloint2double(hi, lo);
}
Esempio n. 5
0
__device__ double shfl_down(double val, uint delta, int width = warpSize)
{
    int lo = __double2loint(val);
    int hi = __double2hiint(val);

    lo = __shfl_down(lo, delta, width);
    hi = __shfl_down(hi, delta, width);

    return __hiloint2double(hi, lo);
}
Esempio n. 6
0
    __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
    {
    #if __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);

        lo = __shfl_down(lo, delta, width);
        hi = __shfl_down(hi, delta, width);

        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
Esempio n. 7
0
    __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
    {
    #if __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);

        lo = __shfl(lo, srcLane, width);
        hi = __shfl(hi, srcLane, width);

        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
KOKKOS_INLINE_FUNCTION
double shfl_down<double>(const double &val, const int& delta, const int& width){
#ifdef __CUDA_ARCH__
  #if (__CUDA_ARCH__ >= 300)
    int lo = __double2loint(val);
    int hi = __double2hiint(val);
    lo = __shfl_down(lo,delta,width);
    hi = __shfl_down(hi,delta,width);
    return __hiloint2double(hi,lo);
  #else
    return val;
  #endif
#else
  return val;
#endif
}
KOKKOS_INLINE_FUNCTION
unsigned long shfl_down<unsigned long>(const unsigned long &val, const int& delta, const int& width){
#ifdef __CUDA_ARCH__
  #if (__CUDA_ARCH__ >= 300)
    int lo = __double2loint(*reinterpret_cast<const double*>(&val));
    int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
    lo = __shfl_down(lo,delta,width);
    hi = __shfl_down(hi,delta,width);
    const double tmp = __hiloint2double(hi,lo);
    return *(reinterpret_cast<const unsigned long*>(&tmp));
  #else
    return val;
  #endif
#else
  return val;
#endif
}