inline __device__ void load2ShrdMem(T * shrd, const T * const in, int lx, int ly, int shrdStride, int dim0, int dim1, int gx, int gy, int inStride1, int inStride0) { T val = isDilation ? Binary<T, af_max_t>().init() : Binary<T, af_min_t>().init(); if (gx>=0 && gx<dim0 && gy>=0 && gy<dim1) { val = in[ lIdx(gx, gy, inStride1, inStride0) ]; } shrd[ lIdx(lx, ly, shrdStride, 1) ] = val; }
inline __device__ void load2ShrdMem(T * shrd, const T * in, int lx, int ly, int shrdStride, int schStride, int dim0, int dim1, int gx, int gy, int ichStride, int inStride1, int inStride0) { int gx_ = clamp(gx, 0, dim0-1); int gy_ = clamp(gy, 0, dim1-1); #pragma unroll for(int ch=0; ch<channels; ++ch) shrd[lIdx(lx, ly, shrdStride, 1)+ch*schStride] = in[lIdx(gx_, gy_, inStride1, inStride0)+ch*ichStride]; }
inline __device__ void load2ShrdMem(outType * shrd, const inType * const in, int lx, int ly, int shrdStride, int dim0, int dim1, int gx, int gy, int inStride1, int inStride0) { shrd[ly*shrdStride+lx] = in[lIdx(clamp(gx, 0, dim0-1), clamp(gy, 0, dim1-1), inStride1, inStride0)]; }
static __global__ void morphKernel(Param<T> out, CParam<T> in, int nBBS0, int nBBS1) { // get shared memory pointer SharedMemory<T> shared; T * shrdMem = shared.getPointer(); // calculate necessary offset and window parameters const int halo = windLen/2; const int padding= 2*halo; const int shrdLen = blockDim.x + padding + 1; const int shrdLen1 = blockDim.y + padding; // gfor batch offsets unsigned b2 = blockIdx.x / nBBS0; unsigned b3 = blockIdx.y / nBBS1; const T* iptr = (const T *) in.ptr + (b2 * in.strides[2] + b3 * in.strides[3]); T* optr = (T * )out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]); const int lx = threadIdx.x; const int ly = threadIdx.y; // global indices const int gx = blockDim.x * (blockIdx.x-b2*nBBS0) + lx; const int gy = blockDim.y * (blockIdx.y-b3*nBBS1) + ly; // pull image to local memory for (int b=ly, gy2=gy; b<shrdLen1; b+=blockDim.y, gy2+=blockDim.y) { // move row_set get_local_size(1) along coloumns for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) { load2ShrdMem<T, isDilation>(shrdMem, iptr, a, b, shrdLen, in.dims[0], in.dims[1], gx2-halo, gy2-halo, in.strides[1], in.strides[0]); } } int i = lx + halo; int j = ly + halo; __syncthreads(); const T * d_filt = (const T *)cFilter; T acc = isDilation ? Binary<T, af_max_t>().init() : Binary<T, af_min_t>().init(); #pragma unroll for(int wj=0; wj<windLen; ++wj) { int joff = wj*windLen; int w_joff = (j+wj-halo)*shrdLen; #pragma unroll for(int wi=0; wi<windLen; ++wi) { if (d_filt[joff+wi] > (T)0) { T cur = shrdMem[w_joff + (i+wi-halo)]; if (isDilation) acc = max(acc, cur); else acc = min(acc, cur); } } } if (gx<in.dims[0] && gy<in.dims[1]) { int outIdx = lIdx(gx, gy, out.strides[1], out.strides[0]); optr[outIdx] = acc; } }
static __global__ void meanshiftKernel(Param<T> out, CParam<T> in, float space_, int radius, float cvar, uint iter, int nBBS0, int nBBS1) { SharedMemory<T> shared; T * shrdMem = shared.getPointer(); // calculate necessary offset and window parameters const int padding = 2*radius + 1; const int shrdLen = blockDim.x + padding; const int schStride = shrdLen*(blockDim.y + padding); // the variable ichStride will only effect when we have >1 // channels. in the other cases, the expression in question // will not use the variable const int ichStride = in.strides[2]; // gfor batch offsets unsigned b2 = blockIdx.x / nBBS0; unsigned b3 = blockIdx.y / nBBS1; const T* iptr = (const T *) in.ptr + (b2 * in.strides[2] + b3 * in.strides[3]); T* optr = (T * )out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]); const int lx = threadIdx.x; const int ly = threadIdx.y; const int gx = blockDim.x * (blockIdx.x-b2*nBBS0) + lx; const int gy = blockDim.y * (blockIdx.y-b3*nBBS1) + ly; // pull image to local memory for (int b=ly, gy2=gy; b<shrdLen; b+=blockDim.y, gy2+=blockDim.y) { // move row_set get_local_size(1) along coloumns for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) { load2ShrdMem<T, channels>(shrdMem, iptr, a, b, shrdLen, schStride, in.dims[0], in.dims[1], gx2-radius, gy2-radius, ichStride, in.strides[1], in.strides[0]); } } int i = lx + radius; int j = ly + radius; __syncthreads(); if (gx>=in.dims[0] || gy>=in.dims[1]) return; float means[channels]; float centers[channels]; float tmpclrs[channels]; // clear means and centers for this pixel #pragma unroll for(int ch=0; ch<channels; ++ch) { means[ch] = 0.0f; centers[ch] = shrdMem[lIdx(i, j, shrdLen, 1)+ch*schStride]; } // scope of meanshift iterationd begin for(uint it=0; it<iter; ++it) { int count = 0; int shift_x = 0; int shift_y = 0; for(int wj=-radius; wj<=radius; ++wj) { int hit_count = 0; for(int wi=-radius; wi<=radius; ++wi) { int tj = j + wj; int ti = i + wi; // proceed float norm = 0.0f; #pragma unroll for(int ch=0; ch<channels; ++ch) { tmpclrs[ch] = shrdMem[lIdx(ti, tj, shrdLen, 1)+ch*schStride]; norm += (centers[ch]-tmpclrs[ch]) * (centers[ch]-tmpclrs[ch]); } if (norm<= cvar) { #pragma unroll for(int ch=0; ch<channels; ++ch) means[ch] += tmpclrs[ch]; shift_x += wi; ++hit_count; } } count+= hit_count; shift_y += wj*hit_count; } if (count==0) { break; } const float fcount = 1.f/count; const int mean_x = (int)(shift_x*fcount+0.5f); const int mean_y = (int)(shift_y*fcount+0.5f); #pragma unroll for(int ch=0; ch<channels; ++ch) means[ch] *= fcount; float norm = 0.f; #pragma unroll for(int ch=0; ch<channels; ++ch) norm += ((means[ch]-centers[ch])*(means[ch]-centers[ch])); bool stop = ((abs(shift_y-mean_y)+abs(shift_x-mean_x)) + norm) <= 1; shift_x = mean_x; shift_y = mean_y; #pragma unroll for(int ch=0; ch<channels; ++ch) centers[ch] = means[ch]; if (stop) { break; } } // scope of meanshift iterations end #pragma unroll for(int ch=0; ch<channels; ++ch) optr[lIdx(gx, gy, out.strides[1], out.strides[0])+ch*ichStride] = centers[ch]; }
void GLWidget::TraceOneStep() { if(_traceList.size() == 0) { _isTracingDone = false; AnIndex startIdx(0, 0); _traceList.push_back(startIdx); // put in list _cells[startIdx.x][startIdx.y]._isVisited = true; // mark _cells[startIdx.x][startIdx.y]._directionType = DirectionType::DIR_UPRIGHT; // (0,0) always upright or downleft _tilePainter->SetTiles(_cells, _traceList, _gridSpacing, _isTracingDone); this->repaint(); } else if(!_isTracingDone) { AnIndex curIdx = _traceList[_traceList.size() - 1]; _cells[curIdx.x][curIdx.y]._isVisited = true; DirectionType curDir =_cells[curIdx.x][curIdx.y]._directionType; AnIndex urIdx(curIdx.x + 1, curIdx.y - 1); // up right AnIndex drIdx(curIdx.x + 1, curIdx.y + 1); // down right AnIndex dlIdx(curIdx.x - 1, curIdx.y + 1); // down left AnIndex ulIdx(curIdx.x - 1, curIdx.y - 1); // up left AnIndex rIdx(curIdx.x + 1, curIdx.y ); // right AnIndex dIdx(curIdx.x , curIdx.y + 1); // down AnIndex lIdx(curIdx.x - 1, curIdx.y ); // left AnIndex uIdx(curIdx.x , curIdx.y - 1); // up // point-to-line intersection AVector endVec; if(curDir == DirectionType::DIR_UPRIGHT) { endVec = AVector(rIdx.x * _gridSpacing, rIdx.y * _gridSpacing); } else if(curDir == DirectionType::DIR_DOWNRIGHT) { endVec = AVector(drIdx.x * _gridSpacing, drIdx.y * _gridSpacing); } else if(curDir == DirectionType::DIR_DOWNLEFT) { endVec = AVector(dIdx.x * _gridSpacing, dIdx.y * _gridSpacing); } else if(curDir == DirectionType::DIR_UPLEFT) { endVec = AVector(curIdx.x * _gridSpacing, curIdx.y * _gridSpacing); } LineType hitType = GetLineIntersection(endVec); // enter if(curDir == DirectionType::DIR_RIGHT && IsValid(rIdx) && _cells[rIdx.x][rIdx.y]._straightness == Straightness::ST_HORIZONTAL) { //std::cout << "[a] " << "right --> right" << " - " << rIdx.x << " " << rIdx.y << "\n"; _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = DirectionType::DIR_RIGHT; _cells[rIdx.x][rIdx.y]._tempDirection = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_LEFT && IsValid(lIdx) && _cells[lIdx.x][lIdx.y]._straightness == Straightness::ST_HORIZONTAL) { //std::cout << "[b] " << "left --> left" << " - " << lIdx.x << " " << lIdx.y << "\n"; _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = DirectionType::DIR_LEFT; _cells[lIdx.x][lIdx.y]._tempDirection = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_UP && IsValid(uIdx) && _cells[uIdx.x][uIdx.y]._straightness == Straightness::ST_VERTICAL) { //std::cout << "[c] " << "up --> up" << " - " << uIdx.x << " " << uIdx.y << "\n"; _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = DirectionType::DIR_UP; _cells[uIdx.x][uIdx.y]._tempDirection = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_DOWN && IsValid(dIdx) && _cells[dIdx.x][dIdx.y]._straightness == Straightness::ST_VERTICAL) { //std::cout << "[d] " << "down --> down" << " - " << dIdx.x << " " << dIdx.y << "\n"; _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = DirectionType::DIR_DOWN; _cells[dIdx.x][dIdx.y]._tempDirection = _cells[curIdx.x][curIdx.y]._tempDirection; } // out else if(curDir == DirectionType::DIR_RIGHT && IsValid(rIdx) && _cells[rIdx.x][rIdx.y]._straightness == Straightness::ST_DIAGONAL) { //std::cout << "[e] " << "right --> out" << " - " << rIdx.x << " " << rIdx.y << "\n"; _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_LEFT && IsValid(lIdx) && _cells[lIdx.x][lIdx.y]._straightness == Straightness::ST_DIAGONAL) { //std::cout << "[f] " << "left --> out" << " - " << lIdx.x << " " << lIdx.y << "\n"; _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_UP && IsValid(uIdx) && _cells[uIdx.x][uIdx.y]._straightness == Straightness::ST_DIAGONAL) { //std::cout << "[g] " << "up --> out" << " - " << uIdx.x << " " << uIdx.y << "\n"; _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_DOWN && IsValid(dIdx) && _cells[dIdx.x][dIdx.y]._straightness == Straightness::ST_DIAGONAL) { //std::cout << "[h] " << "down --> out" << " - " << dIdx.x << " " << dIdx.y << "\n"; _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = _cells[curIdx.x][curIdx.y]._tempDirection; } else if(curDir == DirectionType::DIR_UPRIGHT && hitType == LineType::LINE_HORIZONTAL && IsValid(rIdx) && _cells[rIdx.x][rIdx.y]._straightness == Straightness::ST_HORIZONTAL) // upright --> right { //std::cout << "[1] " << "upright --> right" << " - " << rIdx.x << " " << rIdx.y << "\n"; // rIdx _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = DirectionType::DIR_RIGHT; _cells[rIdx.x][rIdx.y]._tempDirection = DirectionType::DIR_DOWNRIGHT; } else if(curDir == DirectionType::DIR_DOWNRIGHT && hitType == LineType::LINE_HORIZONTAL && IsValid(rIdx) && _cells[rIdx.x][rIdx.y]._straightness == Straightness::ST_HORIZONTAL) // downright --> right { //std::cout << "[2] " << "downright --> right" << " - " << rIdx.x << " " << rIdx.y << "\n"; //std::cout << curDir << " " << hitType << " " << IsValid(rIdx) << " " << _cells[rIdx.x][rIdx.y]._straightness << "\n"; // rIdx _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = DirectionType::DIR_RIGHT; _cells[rIdx.x][rIdx.y]._tempDirection = DirectionType::DIR_UPRIGHT; } else if(curDir == DirectionType::DIR_DOWNLEFT && hitType == LineType::LINE_HORIZONTAL && IsValid(lIdx) && _cells[lIdx.x][lIdx.y]._straightness == Straightness::ST_HORIZONTAL) // downleft --> left { //std::cout << "[3] " << "downleft --> left" << " - " << lIdx.x << " " << lIdx.y << "\n"; // lIdx _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = DirectionType::DIR_LEFT; _cells[lIdx.x][lIdx.y]._tempDirection = DirectionType::DIR_UPLEFT; } else if(curDir == DirectionType::DIR_UPLEFT && hitType == LineType::LINE_HORIZONTAL && IsValid(lIdx) && _cells[lIdx.x][lIdx.y]._straightness == Straightness::ST_HORIZONTAL) // upleft --> left { //std::cout << "[4] " << "// upleft --> left" << " - " << lIdx.x << " " << lIdx.y << "\n"; // lIdx _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = DirectionType::DIR_LEFT; _cells[lIdx.x][lIdx.y]._tempDirection = DirectionType::DIR_DOWNLEFT; } else if(curDir == DirectionType::DIR_UPRIGHT && hitType == LineType::LINE_VERTICAL && IsValid(uIdx) && _cells[uIdx.x][uIdx.y]._straightness == Straightness::ST_VERTICAL) // upright --> up { //std::cout << "[5] " << "upright --> up" << " - " << uIdx.x << " " << uIdx.y << "\n"; // uIdx _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = DirectionType::DIR_UP; _cells[uIdx.x][uIdx.y]._tempDirection = DirectionType::DIR_UPLEFT; } else if(curDir == DirectionType::DIR_DOWNRIGHT && hitType == LineType::LINE_VERTICAL && IsValid(dIdx) && _cells[dIdx.x][dIdx.y]._straightness == Straightness::ST_VERTICAL) // downright --> down { //std::cout << "[6] " << "downright --> down" << " - " << dIdx.x << " " << dIdx.y << "\n"; // dIdx _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = DirectionType::DIR_DOWN; _cells[dIdx.x][dIdx.y]._tempDirection = DirectionType::DIR_DOWNLEFT; } else if(curDir == DirectionType::DIR_DOWNLEFT && hitType == LineType::LINE_VERTICAL && IsValid(dIdx) && _cells[dIdx.x][dIdx.y]._straightness == Straightness::ST_VERTICAL) // downleft --> down { //std::cout << "[7] " << "downleft --> down" << " - " << dIdx.x << " " << dIdx.y << "\n"; // dIdx _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = DirectionType::DIR_DOWN; _cells[dIdx.x][dIdx.y]._tempDirection = DirectionType::DIR_DOWNRIGHT; } else if(curDir == DirectionType::DIR_UPLEFT && hitType == LineType::LINE_VERTICAL && IsValid(uIdx) && _cells[uIdx.x][uIdx.y]._straightness == Straightness::ST_VERTICAL) // upleft --> up { //std::cout << "[8] " << "upleft --> up" << " - " << uIdx.x << " " << uIdx.y << "\n"; // uIdx _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = DirectionType::DIR_UP; _cells[uIdx.x][uIdx.y]._tempDirection = DirectionType::DIR_UPRIGHT; } // old code else if(curDir == DirectionType::DIR_UPRIGHT && IsValid(urIdx) && hitType == LineType::LINE_NONE) { //std::cout << "[9] " << hitType << " - " << urIdx.x << " " << urIdx.y << "\n"; _traceList.push_back(urIdx); _cells[urIdx.x][urIdx.y]._directionType = DirectionType::DIR_UPRIGHT; } else if(curDir == DirectionType::DIR_DOWNRIGHT && IsValid(drIdx) && hitType == LineType::LINE_NONE) { //std::cout << "[10] " << hitType << " - " << drIdx.x << " " << drIdx.y << "\n"; _traceList.push_back(drIdx); _cells[drIdx.x][drIdx.y]._directionType = DirectionType::DIR_DOWNRIGHT; } else if(curDir == DirectionType::DIR_DOWNLEFT && IsValid(dlIdx) && hitType == LineType::LINE_NONE) { //std::cout << "[11] " << hitType << " - " << dlIdx.x << " " << dlIdx.y << "\n"; _traceList.push_back(dlIdx); _cells[dlIdx.x][dlIdx.y]._directionType = DirectionType::DIR_DOWNLEFT; } else if(curDir == DirectionType::DIR_UPLEFT && IsValid(ulIdx) && hitType == LineType::LINE_NONE) { //std::cout << "[12] " << hitType << " - " << ulIdx.x << " " << ulIdx.y << "\n"; _traceList.push_back(ulIdx); _cells[ulIdx.x][ulIdx.y]._directionType = DirectionType::DIR_UPLEFT; } else if(curDir == DirectionType::DIR_UPRIGHT && ( IsValid(rIdx) || IsValid(uIdx) ) ) { if(hitType == LineType::LINE_HORIZONTAL && IsValid(rIdx)) { //std::cout << "[13] " << hitType << " - " << rIdx.x << " " << rIdx.y << "\n"; _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = DirectionType::DIR_DOWNRIGHT; } else { //std::cout << "[14] " << hitType << " - " << uIdx.x << " " << uIdx.y << "\n"; _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = DirectionType::DIR_UPLEFT; } } else if(curDir == DirectionType::DIR_DOWNRIGHT && ( IsValid(rIdx) || IsValid(dIdx) )) { if(hitType == LineType::LINE_HORIZONTAL && IsValid(rIdx)) { //std::cout << "[15] " << hitType << " - " << rIdx.x << " " << rIdx.y << "\n"; _traceList.push_back(rIdx); _cells[rIdx.x][rIdx.y]._directionType = DirectionType::DIR_UPRIGHT; } else { //std::cout << "[16] " << hitType << " - " << dIdx.x << " " << dIdx.y << "\n"; //std::cout << curDir << " " << hitType << " " << IsValid(rIdx) << " " << _cells[rIdx.x][rIdx.y]._straightness << "\n"; _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = DirectionType::DIR_DOWNLEFT; } } else if(curDir == DirectionType::DIR_DOWNLEFT && ( IsValid(lIdx) || IsValid(dIdx) )) { if(hitType == LineType::LINE_HORIZONTAL && IsValid(lIdx)) // up left { //std::cout << "[17] " << hitType << " - " << lIdx.x << " " << lIdx.y << "\n"; _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = DirectionType::DIR_UPLEFT; } else // down right { //std::cout << "[18] " << hitType << " - " << dIdx.x << " " << dIdx.y << "\n"; _traceList.push_back(dIdx); _cells[dIdx.x][dIdx.y]._directionType = DirectionType::DIR_DOWNRIGHT; } } else if(curDir == DirectionType::DIR_UPLEFT && ( IsValid(lIdx) || IsValid(uIdx) )) { if(hitType == LineType::LINE_HORIZONTAL && IsValid(lIdx)) { //std::cout << "[19] " << hitType << " - " << lIdx.x << " " << lIdx.y << "\n"; _traceList.push_back(lIdx); _cells[lIdx.x][lIdx.y]._directionType = DirectionType::DIR_DOWNLEFT; } else { //std::cout << "[20] " << hitType << " - " << uIdx.x << " " << uIdx.y << "\n"; _traceList.push_back(uIdx); _cells[uIdx.x][uIdx.y]._directionType = DirectionType::DIR_UPRIGHT; } } // check if we revisit a cell which means done AnIndex nextIdx = _traceList[_traceList.size() - 1]; if(_cells[nextIdx.x][nextIdx.y]._isVisited) { //std::cout << "end here " << nextIdx.x << " " << nextIdx.y << "\n"; _isTracingDone = true; } _tilePainter->SetTiles(_cells, _traceList, _gridSpacing, _isTracingDone); this->repaint(); } }