inline void Copy(Tensor<cpu,dim> _dst, const Tensor<cpu,dim> &_src ){ utils::Assert( _dst.shape == _src.shape, "Copy:shape mismatch" ); Tensor<cpu,2> dst = _dst.FlatTo2D(); Tensor<cpu,2> src = _src.FlatTo2D(); for (index_t y = 0; y < dst.shape[1]; ++y ) { memcpy( dst[y].dptr, src[y].dptr, sizeof(real_t) * dst.shape[0] ); } }
inline void Copy(Tensor<A,dim> _dst, Tensor<B,dim> _src, cudaMemcpyKind kind){ utils::Assert( _dst.shape == _src.shape, "Copy:shape mismatch" ); Tensor<A,2> dst = _dst.FlatTo2D(); Tensor<B,2> src = _src.FlatTo2D(); cudaError_t err = cudaMemcpy2D( dst.dptr, dst.shape.stride_ * sizeof(real_t), src.dptr, src.shape.stride_ * sizeof(real_t), dst.shape[0] * sizeof(real_t), dst.shape[1], kind ); utils::Assert( err == cudaSuccess, cudaGetErrorString(err) ); }
inline void MapPlan(Tensor<cpu,dim> _dst, const expr::Plan<E> &plan){ Tensor<cpu,2> dst = _dst.FlatTo2D(); for (index_t y = 0; y < dst.shape[1]; ++y ) { for (index_t x = 0; x < dst.shape[0]; ++x ) { // trust your compiler! -_- they will optimize it Saver::Save(dst[y][x], plan.Eval( y, x ) ); } } }
inline void AllocSpace(Tensor<cpu,dim> &obj, bool pad ){ size_t pitch; if( pad ){ obj.dptr = (real_t*)sse2::AlignedMallocPitch ( pitch, obj.shape[0] * sizeof(real_t), obj.FlatTo2D().shape[1] ); obj.shape.stride_ = static_cast<index_t>( pitch / sizeof(real_t) ); }else{ obj.shape.stride_ = obj.shape[0]; obj.dptr = (real_t*)sse2::AlignedMallocPitch ( pitch, obj.shape.Size() * sizeof(real_t), 1 ); } }
inline void AllocSpace(Tensor<gpu,dim> &obj, bool pad){ size_t pitch; // common choice for cuda mem align unit is 32 if( pad && obj.shape[0] >= MSHADOW_MIN_PAD_RATIO * 32 ){ cudaError_t err = cudaMallocPitch( (void**)&obj.dptr, &pitch, \ obj.shape[0] * sizeof(real_t), obj.FlatTo2D().shape[1] ); utils::Assert( err == cudaSuccess, cudaGetErrorString(err) ); obj.shape.stride_ = static_cast<index_t>( pitch / sizeof(real_t) ); }else{ obj.shape.stride_ = obj.shape[0]; cudaError_t err = cudaMallocPitch( (void**)&obj.dptr, &pitch, \ obj.shape.Size() * sizeof(real_t), 1 ); utils::Assert( err == cudaSuccess, cudaGetErrorString(err) ); } }
inline void MapPlan(Tensor<gpu,dim> _dst, const expr::Plan<E> &plan){ cuda::MapPlan<Saver>( _dst.FlatTo2D(), plan ); }