void copy(Array<outType> &dst, const Array<inType> &src, outType default_value, double factor) { dim4 src_dims = src.dims(); dim4 dst_dims = dst.dims(); dim4 src_strides = src.strides(); dim4 dst_strides = dst.strides(); const inType * src_ptr = src.get(); outType * dst_ptr = dst.get(); dim_type trgt_l = std::min(dst_dims[3], src_dims[3]); dim_type trgt_k = std::min(dst_dims[2], src_dims[2]); dim_type trgt_j = std::min(dst_dims[1], src_dims[1]); dim_type trgt_i = std::min(dst_dims[0], src_dims[0]); for(dim_type l=0; l<dst_dims[3]; ++l) { dim_type src_loff = l*src_strides[3]; dim_type dst_loff = l*dst_strides[3]; bool isLvalid = l<trgt_l; for(dim_type k=0; k<dst_dims[2]; ++k) { dim_type src_koff = k*src_strides[2]; dim_type dst_koff = k*dst_strides[2]; bool isKvalid = k<trgt_k; for(dim_type j=0; j<dst_dims[1]; ++j) { dim_type src_joff = j*src_strides[1]; dim_type dst_joff = j*dst_strides[1]; bool isJvalid = j<trgt_j; for(dim_type i=0; i<dst_dims[0]; ++i) { outType temp = default_value; if (isLvalid && isKvalid && isJvalid && i<trgt_i) { dim_type src_idx = i*src_strides[0] + src_joff + src_koff + src_loff; temp = outType(src_ptr[src_idx])*outType(factor); } dim_type dst_idx = i*dst_strides[0] + dst_joff + dst_koff + dst_loff; dst_ptr[dst_idx] = temp; } } } } }
Array<outType> padArray(Array<inType> const &in, dim4 const &dims, outType default_value, double factor) { Array<outType> ret = createValueArray<outType>(dims, default_value); copy<inType, outType>(ret, in, outType(default_value), factor); return ret; }
void outRRO(MSG msg) { if (msg.exsit == REQUIRED) { outType(msg); } else if (msg.exsit == REPEATED) { char stream[1024]; sprintf(stream, "\t\tfor _, data in ipairs(self._msg.%s or {}) do\n", msg.var_name); fputs(stream, s_out_file); if (!isInnerType(msg.var_type)) { sprintf(stream, "\t\t\tLCPB_%s.new(data):printTitle():print()\n\t\tend\n", msg.var_type); } else { sprintf(stream, "\t\t\tself:printItem(\"%s\")\n\t\tend\n", msg.var_name); } fputs(stream, s_out_file); } else if (msg.exsit == OPTIONAL) { outType(msg); } }
Array<outType> padArray(Array<inType> const &in, dim4 const &dims, outType default_value, double factor) { Array<outType> ret = createValueArray<outType>(dims, default_value); ret.eval(); in.eval(); getQueue().enqueue(kernel::copyElemwise<outType, inType>, ret, in, outType(default_value), factor); return ret; }
namespace cpu { template<typename T> void copyData(T *data, const Array<T> &A); template<typename T> Array<T> copyArray(const Array<T> &A); template<typename inType, typename outType> void copyArray(Array<outType> &out, const Array<inType> &in); template<typename inType, typename outType> Array<outType> padArray(Array<inType> const &in, dim4 const &dims, outType default_value=outType(0), double factor=1.0); }
__global__ void matchTemplate(Param<outType> out, CParam<inType> srch, CParam<inType> tmplt, int nBBS0, int nBBS1) { unsigned b2 = blockIdx.x / nBBS0; unsigned b3 = blockIdx.y / nBBS1; int gx = threadIdx.x + (blockIdx.x - b2*nBBS0) * blockDim.x; int gy = threadIdx.y + (blockIdx.y - b3*nBBS1)* blockDim.y; if (gx < srch.dims[0] && gy < srch.dims[1]) { const int tDim0 = tmplt.dims[0]; const int tDim1 = tmplt.dims[1]; const int sDim0 = srch.dims[0]; const int sDim1 = srch.dims[1]; const inType* tptr = (const inType*) tmplt.ptr; int winNumElems = tDim0*tDim1; outType tImgMean = outType(0); if (needMean) { for(int tj=0; tj<tDim1; tj++) { int tjStride = tj*tmplt.strides[1]; for(int ti=0; ti<tDim0; ti++) { tImgMean += (outType)tptr[ tjStride + ti*tmplt.strides[0] ]; } } tImgMean /= winNumElems; } const inType* sptr = (const inType*) srch.ptr + (b2 * srch.strides[2] + b3 * srch.strides[3]); outType* optr = (outType*) out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]); // mean for window // this variable will be used based on mType value outType wImgMean = outType(0); if (needMean) { for(int tj=0,j=gy; tj<tDim1; tj++, j++) { int jStride = j*srch.strides[1]; for(int ti=0, i=gx; ti<tDim0; ti++, i++) { inType sVal = ((j<sDim1 && i<sDim0) ? sptr[jStride + i*srch.strides[0]] : inType(0)); wImgMean += (outType)sVal; } } wImgMean /= winNumElems; } // run the window match metric outType disparity = outType(0); for(int tj=0,j=gy; tj<tDim1; tj++, j++) { int jStride = j*srch.strides[1]; int tjStride = tj*tmplt.strides[1]; for(int ti=0, i=gx; ti<tDim0; ti++, i++) { inType sVal = ((j<sDim1 && i<sDim0) ? sptr[jStride + i*srch.strides[0]] : inType(0)); inType tVal = tptr[ tjStride + ti*tmplt.strides[0] ]; outType temp; switch(mType) { case AF_SAD: disparity += fabs((outType)sVal-(outType)tVal); break; case AF_ZSAD: disparity += fabs((outType)sVal - wImgMean - (outType)tVal + tImgMean); break; case AF_LSAD: disparity += fabs((outType)sVal-(wImgMean/tImgMean)*tVal); break; case AF_SSD: disparity += ((outType)sVal-(outType)tVal)*((outType)sVal-(outType)tVal); break; case AF_ZSSD: temp = ((outType)sVal - wImgMean - (outType)tVal + tImgMean); disparity += temp*temp; break; case AF_LSSD: temp = ((outType)sVal-(wImgMean/tImgMean)*tVal); disparity += temp*temp; break; case AF_NCC: //TODO: furture implementation break; case AF_ZNCC: //TODO: furture implementation break; case AF_SHD: //TODO: furture implementation break; } } } optr[gy*out.strides[1]+gx] = disparity; } }