void loadHWMesh(const void* file_data_ptr, u32 name_hash, GPUMesh& gpu_mesh) { auto file_data = static_cast<const char*>(file_data_ptr); auto mesh_header = reinterpret_cast<const HWMeshHeader*>(file_data); assert(std::memcmp(mesh_header->magic, "HWMESH", 6) == 0); assert(mesh_header->version == HWMeshHeader::HWMESH_VERSION); auto mesh_index = reinterpret_cast<const HWMeshIndex*>(file_data + sizeof(HWMeshHeader)); auto entry = std::find_if(mesh_index, mesh_index + mesh_header->num_meshes, [name_hash](const HWMeshIndex& i) { return i.name_hash == name_hash; }); assert(entry != mesh_index + mesh_header->num_meshes); auto mesh_data = reinterpret_cast<const HWMeshData*>(file_data + entry->file_offset); assert(mesh_data->num_submeshes <= HWMeshData::MAX_SUBMESHES); assert(mesh_data->vertex_format == vertex_fmt::FMT_POS3F_NORM3F_TEX2F); assert(mesh_data->num_submeshes == 1); // for now gpu_mesh.indices_count = mesh_data->submesh_indices_size[0]; auto vertex_data = reinterpret_cast<const char*>(file_data + entry->file_offset + sizeof(HWMeshData)); vertex_data = alignPtr(vertex_data, 16); auto index_data = vertex_data + mesh_data->vertex_data_size; index_data = alignPtr(index_data, 4); gpu_mesh.loadVertexData(vertex_data, mesh_data->vertex_data_size, (vertex_fmt::VertexFormat)mesh_data->vertex_format); gpu_mesh.loadIndices(index_data, mesh_data->index_data_size, mesh_data->index_type); }
bool TransTable::resize( size_t sizeBytes ) { size_t sizeEntries = (sizeBytes + sizeof(TransEntry)-1)/sizeof(TransEntry); if ( sizeEntries <= buckets ) { // do dummy alloc (buckets entries) dummyAlloc(); return 1; } if ( !roundPow2( sizeEntries ) ) return 0; // bad size if ( size == sizeEntries ) return 1; // realloc! dealloc(); allocEntries = new(std::nothrow) TransEntry[ sizeEntries + alignSize/sizeof(TransEntry) ]; if ( !allocEntries ) { dummyAlloc(); return 0; } // align entries entries = static_cast<TransEntry *>(alignPtr( allocEntries, alignSize )); size = sizeEntries; return 1; }
static void* cvAlloc( size_t size) { uint8_t* udata = (uint8_t*)malloc(size + sizeof(void*) + 32); if(!udata) return 0; uint8_t** adata = (uint8_t**)alignPtr((uint8_t**)udata + 1, 32); adata[-1] = udata; return adata; }
void* fastMalloc( size_t size ) { uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN); if(!udata) return OutOfMemoryError(size); uchar** adata = alignPtr((uchar**)udata + 1, CV_MALLOC_ALIGN); adata[-1] = udata; return adata; }
void operator()( const Range& range ) const { const float inf = 1e15f; int i, i1 = range.start, i2 = range.end; int n = dst->cols; AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int)); float* f = (float*)(uchar*)_buf; float* z = f + n; int* v = alignPtr((int*)(z + n + 1), sizeof(int)); for( i = i1; i < i2; i++ ) { float* d = dst->ptr<float>(i); int p, q, k; v[0] = 0; z[0] = -inf; z[1] = inf; f[0] = d[0]; for( q = 1, k = 0; q < n; q++ ) { float fq = d[q]; f[q] = fq; for(;;k--) { p = v[k]; float s = (fq + sqr_tab[q] - d[p] - sqr_tab[p])*inv_tab[q - p]; if( s > z[k] ) { k++; v[k] = q; z[k] = s; z[k+1] = inf; break; } } } for( q = 0, k = 0; q < n; q++ ) { while( z[k+1] < q ) k++; p = v[k]; d[q] = std::sqrt(sqr_tab[std::abs(q - p)] + f[p]); } } }
void cv::split(const Mat& src, Mat* mv) { int k, depth = src.depth(), cn = src.channels(); if( cn == 1 ) { src.copyTo(mv[0]); return; } SplitFunc func = splitTab[depth]; CV_Assert( func != 0 ); int esz = (int)src.elemSize(), esz1 = (int)src.elemSize1(); int blocksize0 = (BLOCK_SIZE + esz-1)/esz; AutoBuffer<uchar> _buf((cn+1)*(sizeof(Mat*) + sizeof(uchar*)) + 16); const Mat** arrays = (const Mat**)(uchar*)_buf; uchar** ptrs = (uchar**)alignPtr(arrays + cn + 1, 16); arrays[0] = &src; for( k = 0; k < cn; k++ ) { mv[k].create(src.dims, src.size, depth); arrays[k+1] = &mv[k]; } NAryMatIterator it(arrays, ptrs, cn+1); int total = (int)it.size, blocksize = cn <= 4 ? total : std::min(total, blocksize0); for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( int j = 0; j < total; j += blocksize ) { int bsz = std::min(total - j, blocksize); func( ptrs[0], &ptrs[1], bsz, cn ); if( j + blocksize < total ) { ptrs[0] += bsz*esz; for( k = 0; k < cn; k++ ) ptrs[k+1] += bsz*esz1; } } } }
Mat& Mat::setTo(InputArray _value, InputArray _mask) { if( !data ) return *this; Mat value = _value.getMat(), mask = _mask.getMat(); CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::MAT )); CV_Assert( mask.empty() || mask.type() == CV_8U ); size_t esz = elemSize(); BinaryFunc copymask = getCopyMaskFunc(esz); const Mat* arrays[] = { this, !mask.empty() ? &mask : 0, 0 }; uchar* ptrs[2]={0,0}; NAryMatIterator it(arrays, ptrs); int total = (int)it.size, blockSize0 = std::min(total, (int)((BLOCK_SIZE + esz-1)/esz)); AutoBuffer<uchar> _scbuf(blockSize0*esz + 32); uchar* scbuf = alignPtr((uchar*)_scbuf, (int)sizeof(double)); convertAndUnrollScalar( value, type(), scbuf, blockSize0 ); for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( int j = 0; j < total; j += blockSize0 ) { Size sz(std::min(blockSize0, total - j), 1); size_t blockSize = sz.width*esz; if( ptrs[1] ) { copymask(scbuf, 0, ptrs[1], 0, ptrs[0], 0, sz, &esz); ptrs[1] += sz.width; } else memcpy(ptrs[0], scbuf, blockSize); ptrs[0] += blockSize; } } return *this; }
template<class CastOp, class VecOp> void pyrUp_( const Mat& _src, Mat& _dst, int) { const int PU_SZ = 3; typedef typename CastOp::type1 WT; typedef typename CastOp::rtype T; Size ssize = _src.size(), dsize = _dst.size(); int cn = _src.channels(); int bufstep = (int)alignSize((dsize.width+1)*cn, 16); AutoBuffer<WT> _buf(bufstep*PU_SZ + 16); WT* buf = alignPtr((WT*)_buf, 16); AutoBuffer<int> _dtab(ssize.width*cn); int* dtab = _dtab; WT* rows[PU_SZ]; CastOp castOp; VecOp vecOp; CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 && std::abs(dsize.height - ssize.height*2) == dsize.height % 2); int k, x, sy0 = -PU_SZ/2, sy = sy0; ssize.width *= cn; dsize.width *= cn; for( x = 0; x < ssize.width; x++ ) dtab[x] = (x/cn)*2*cn + x % cn; for( int y = 0; y < ssize.height; y++ ) { T* dst0 = (T*)(_dst.data + _dst.step*y*2); T* dst1 = (T*)(_dst.data + _dst.step*(y*2+1)); WT *row0, *row1, *row2; if( y*2+1 >= dsize.height ) dst1 = dst0; // fill the ring buffer (horizontal convolution and decimation) for( ; sy <= y + 1; sy++ ) { WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep; int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2; const T* src = (const T*)(_src.data + _src.step*_sy); if( ssize.width == cn ) { for( x = 0; x < cn; x++ ) row[x] = row[x + cn] = src[x]*8; continue; } for( x = 0; x < cn; x++ ) { int dx = dtab[x]; WT t0 = src[x]*6 + src[x + cn]*2; WT t1 = (src[x] + src[x + cn])*4; row[dx] = t0; row[dx + cn] = t1; dx = dtab[ssize.width - cn + x]; int sx = ssize.width - cn + x; t0 = src[sx - cn] + src[sx]*7; t1 = src[sx]*8; row[dx] = t0; row[dx + cn] = t1; } for( x = cn; x < ssize.width - cn; x++ ) { int dx = dtab[x]; WT t0 = src[x-cn] + src[x]*6 + src[x+cn]; WT t1 = (src[x] + src[x+cn])*4; row[dx] = t0; row[dx+cn] = t1; } } // do vertical convolution and decimation and write the result to the destination image for( k = 0; k < PU_SZ; k++ ) rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep; row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; x = vecOp(rows, dst0, (int)_dst.step, dsize.width); for( ; x < dsize.width; x++ ) { T t1 = castOp((row1[x] + row2[x])*4); T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); dst1[x] = t1; dst0[x] = t0; } } }
template<class CastOp, class VecOp> void pyrDown_( const Mat& _src, Mat& _dst, int borderType ) { const int PD_SZ = 5; typedef typename CastOp::type1 WT; typedef typename CastOp::rtype T; CV_Assert( !_src.empty() ); Size ssize = _src.size(), dsize = _dst.size(); int cn = _src.channels(); int bufstep = (int)alignSize(dsize.width*cn, 16); AutoBuffer<WT> _buf(bufstep*PD_SZ + 16); WT* buf = alignPtr((WT*)_buf, 16); int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)]; AutoBuffer<int> _tabM(dsize.width*cn); int* tabM = _tabM; WT* rows[PD_SZ]; CastOp castOp; VecOp vecOp; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); for( x = 0; x <= PD_SZ+1; x++ ) { int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn; int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn; for( k = 0; k < cn; k++ ) { tabL[x*cn + k] = sx0 + k; tabR[x*cn + k] = sx1 + k; } } ssize.width *= cn; dsize.width *= cn; width0 *= cn; for( x = 0; x < dsize.width; x++ ) tabM[x] = (x/cn)*2*cn + x % cn; for( int y = 0; y < dsize.height; y++ ) { T* dst = (T*)(_dst.data + _dst.step*y); WT *row0, *row1, *row2, *row3, *row4; // fill the ring buffer (horizontal convolution and decimation) for( ; sy <= y*2 + 2; sy++ ) { WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep; int _sy = borderInterpolate(sy, ssize.height, borderType); const T* src = (const T*)(_src.data + _src.step*_sy); int limit = cn; const int* tab = tabL; for( x = 0;;) { for( ; x < limit; x++ ) { row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 + src[tab[x]] + src[tab[x+cn*4]]; } if( x == dsize.width ) break; if( cn == 1 ) { for( ; x < width0; x++ ) row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 + src[x*2 - 2] + src[x*2 + 2]; } else if( cn == 3 ) { for( ; x < width0; x += 3 ) { const T* s = src + x*2; WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6]; WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7]; WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8]; row[x] = t0; row[x+1] = t1; row[x+2] = t2; } } else if( cn == 4 ) { for( ; x < width0; x += 4 ) { const T* s = src + x*2; WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8]; WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9]; row[x] = t0; row[x+1] = t1; t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10]; t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11]; row[x+2] = t0; row[x+3] = t1; } } else { for( ; x < width0; x++ ) { int sx = tabM[x]; row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 + src[sx - cn*2] + src[sx + cn*2]; } } limit = dsize.width; tab = tabR - x; } } // do vertical convolution and decimation and write the result to the destination image for( k = 0; k < PD_SZ; k++ ) rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep; row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4]; x = vecOp(rows, dst, (int)_dst.step, dsize.width); for( ; x < dsize.width; x++ ) dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]); } }
static Rect maskBoundingRect( const Mat& img ) { CV_Assert( img.depth() <= CV_8S && img.channels() == 1 ); Size size = img.size(); int xmin = size.width, ymin = -1, xmax = -1, ymax = -1, i, j, k; for( i = 0; i < size.height; i++ ) { const uchar* _ptr = img.ptr(i); const uchar* ptr = (const uchar*)alignPtr(_ptr, 4); int have_nz = 0, k_min, offset = (int)(ptr - _ptr); j = 0; offset = MIN(offset, size.width); for( ; j < offset; j++ ) if( _ptr[j] ) { have_nz = 1; break; } if( j < offset ) { if( j < xmin ) xmin = j; if( j > xmax ) xmax = j; } if( offset < size.width ) { xmin -= offset; xmax -= offset; size.width -= offset; j = 0; for( ; j <= xmin - 4; j += 4 ) if( *((int*)(ptr+j)) ) break; for( ; j < xmin; j++ ) if( ptr[j] ) { xmin = j; if( j > xmax ) xmax = j; have_nz = 1; break; } k_min = MAX(j-1, xmax); k = size.width - 1; for( ; k > k_min && (k&3) != 3; k-- ) if( ptr[k] ) break; if( k > k_min && (k&3) == 3 ) { for( ; k > k_min+3; k -= 4 ) if( *((int*)(ptr+k-3)) ) break; } for( ; k > k_min; k-- ) if( ptr[k] ) { xmax = k; have_nz = 1; break; } if( !have_nz ) { j &= ~3; for( ; j <= k - 3; j += 4 ) if( *((int*)(ptr+j)) ) break; for( ; j <= k; j++ ) if( ptr[j] ) { have_nz = 1; break; } } xmin += offset; xmax += offset; size.width += offset; } if( have_nz ) { if( ymin < 0 ) ymin = i; ymax = i; } } if( xmin >= size.width ) xmin = ymin = 0; return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); }
/* ** seeColon (for proctologists only) ** Walks a colon definition, decompiling ** on the fly. Knows about primitive control structures. */ static void seeColon(FICL_VM *pVM, CELL *pc) { char *cp; CELL *param0 = pc; FICL_DICT *pd = vmGetDict(pVM); FICL_WORD *pSemiParen = ficlLookup(pVM->pSys, "(;)"); assert(pSemiParen); for (; pc->p != pSemiParen; pc++) { FICL_WORD *pFW = (FICL_WORD *)(pc->p); cp = pVM->pad; if ((void *)pc == (void *)pVM->ip) *cp++ = '>'; else *cp++ = ' '; cp += sprintf(cp, "%3d ", pc-param0); if (isAFiclWord(pd, pFW)) { WORDKIND kind = ficlWordClassify(pFW); CELL c; switch (kind) { case LITERAL: c = *++pc; if (isAFiclWord(pd, c.p)) { FICL_WORD *pLit = (FICL_WORD *)c.p; sprintf(cp, "%.*s ( %#lx literal )", pLit->nName, pLit->name, (unsigned long)c.u); } else sprintf(cp, "literal %ld (%#lx)", (long)c.i, (unsigned long)c.u); break; case STRINGLIT: { FICL_STRING *sp = (FICL_STRING *)(void *)++pc; pc = (CELL *)alignPtr(sp->text + sp->count + 1) - 1; sprintf(cp, "s\" %.*s\"", sp->count, sp->text); } break; case CSTRINGLIT: { FICL_STRING *sp = (FICL_STRING *)(void *)++pc; pc = (CELL *)alignPtr(sp->text + sp->count + 1) - 1; sprintf(cp, "c\" %.*s\"", sp->count, sp->text); } break; case IF: c = *++pc; if (c.i > 0) sprintf(cp, "if / while (branch %d)", pc+c.i-param0); else sprintf(cp, "until (branch %d)", pc+c.i-param0); break; case BRANCH: c = *++pc; if (c.i == 0) sprintf(cp, "repeat (branch %d)", pc+c.i-param0); else if (c.i == 1) sprintf(cp, "else (branch %d)", pc+c.i-param0); else sprintf(cp, "endof (branch %d)", pc+c.i-param0); break; case OF: c = *++pc; sprintf(cp, "of (branch %d)", pc+c.i-param0); break; case QDO: c = *++pc; sprintf(cp, "?do (leave %d)", (CELL *)c.p-param0); break; case DO: c = *++pc; sprintf(cp, "do (leave %d)", (CELL *)c.p-param0); break; case LOOP: c = *++pc; sprintf(cp, "loop (branch %d)", pc+c.i-param0); break; case PLOOP: c = *++pc; sprintf(cp, "+loop (branch %d)", pc+c.i-param0); break; default: sprintf(cp, "%.*s", pFW->nName, pFW->name); break; } } else /* probably not a word - punt and print value */ { sprintf(cp, "%ld ( %#lx )", (long)pc->i, (unsigned long)pc->u); } vmTextOut(pVM, pVM->pad, 1); } vmTextOut(pVM, ";", 1); }
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression) { Mat img = _img.getMat(); const int K = patternSize/2, N = patternSize + K + 1; int i, j, k, pixel[25]; makeOffsets(pixel, (int)img.step, patternSize); #if CV_SIMD128 const int quarterPatternSize = patternSize/4; v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K); bool hasSimd = hasSIMD128(); #if CV_TRY_AVX2 Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2; if(CV_CPU_HAS_SUPPORT_AVX2) fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel); #endif #endif keypoints.clear(); threshold = std::min(std::max(threshold, 0), 255); uchar threshold_tab[512]; for( i = -255; i <= 255; i++ ) threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0); AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128); uchar* buf[3]; buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols; int* cpbuf[3]; cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1; cpbuf[1] = cpbuf[0] + img.cols + 1; cpbuf[2] = cpbuf[1] + img.cols + 1; memset(buf[0], 0, img.cols*3); for(i = 3; i < img.rows-2; i++) { const uchar* ptr = img.ptr<uchar>(i) + 3; uchar* curr = buf[(i - 3)%3]; int* cornerpos = cpbuf[(i - 3)%3]; memset(curr, 0, img.cols); int ncorners = 0; if( i < img.rows - 3 ) { j = 3; #if CV_SIMD128 if( hasSimd ) { if( patternSize == 16 ) { #if CV_TRY_AVX2 if (fast_t_impl_avx2) fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners); #endif //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements { for (; j < img.cols - 16 - 3; j += 16, ptr += 16) { v_uint8x16 v = v_load(ptr); v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta); v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta); v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta)); v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta)); v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta)); v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta)); v_int8x16 m0, m1; m0 = (v0 < x0) & (v0 < x1); m1 = (x0 < v1) & (x1 < v1); m0 = m0 | ((v0 < x1) & (v0 < x2)); m1 = m1 | ((x1 < v1) & (x2 < v1)); m0 = m0 | ((v0 < x2) & (v0 < x3)); m1 = m1 | ((x2 < v1) & (x3 < v1)); m0 = m0 | ((v0 < x3) & (v0 < x0)); m1 = m1 | ((x3 < v1) & (x0 < v1)); m0 = m0 | m1; int mask = v_signmask(m0); if( mask == 0 ) continue; if( (mask & 255) == 0 ) { j -= 8; ptr -= 8; continue; } v_int8x16 c0 = v_setzero_s8(); v_int8x16 c1 = v_setzero_s8(); v_uint8x16 max0 = v_setzero_u8(); v_uint8x16 max1 = v_setzero_u8(); for( k = 0; k < N; k++ ) { v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta); m0 = v0 < x; m1 = x < v1; c0 = v_sub_wrap(c0, m0) & m0; c1 = v_sub_wrap(c1, m1) & m1; max0 = v_max(max0, v_reinterpret_as_u8(c0)); max1 = v_max(max1, v_reinterpret_as_u8(c1)); } max0 = v_max(max0, max1); int m = v_signmask(K16 < max0); for( k = 0; m > 0 && k < 16; k++, m >>= 1 ) { if(m & 1) { cornerpos[ncorners++] = j+k; if(nonmax_suppression) curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold); } } } } } } #endif for( ; j < img.cols - 3; j++, ptr++ ) { int v = ptr[0]; const uchar* tab = &threshold_tab[0] - v + 255; int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]]; d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]]; d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]]; d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]]; d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]]; d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]]; if( d & 1 ) { int vt = v - threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x < vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } if( d & 2 ) { int vt = v + threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x > vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } } } cornerpos[-1] = ncorners; if( i == 3 ) continue; const uchar* prev = buf[(i - 4 + 3)%3]; const uchar* pprev = buf[(i - 5 + 3)%3]; cornerpos = cpbuf[(i - 4 + 3)%3]; ncorners = cornerpos[-1]; for( k = 0; k < ncorners; k++ ) { j = cornerpos[k]; int score = prev[j]; if( !nonmax_suppression || (score > prev[j+1] && score > prev[j-1] && score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] && score > curr[j-1] && score > curr[j] && score > curr[j+1]) ) { keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score)); } } }
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression) { Mat img = _img.getMat(); const int K = patternSize/2, N = patternSize + K + 1; #if CV_SSE2 const int quarterPatternSize = patternSize/4; (void)quarterPatternSize; #endif int i, j, k, pixel[25]; makeOffsets(pixel, (int)img.step, patternSize); keypoints.clear(); threshold = std::min(std::max(threshold, 0), 255); #if CV_SSE2 __m128i delta = _mm_set1_epi8(-128), t = _mm_set1_epi8((char)threshold), K16 = _mm_set1_epi8((char)K); (void)K16; (void)delta; (void)t; #endif uchar threshold_tab[512]; for( i = -255; i <= 255; i++ ) threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0); AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128); uchar* buf[3]; buf[0] = _buf; buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols; int* cpbuf[3]; cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1; cpbuf[1] = cpbuf[0] + img.cols + 1; cpbuf[2] = cpbuf[1] + img.cols + 1; memset(buf[0], 0, img.cols*3); for(i = 3; i < img.rows-2; i++) { const uchar* ptr = img.ptr<uchar>(i) + 3; uchar* curr = buf[(i - 3)%3]; int* cornerpos = cpbuf[(i - 3)%3]; memset(curr, 0, img.cols); int ncorners = 0; if( i < img.rows - 3 ) { j = 3; #if CV_SSE2 if( patternSize == 16 ) { for(; j < img.cols - 16 - 3; j += 16, ptr += 16) { __m128i m0, m1; __m128i v0 = _mm_loadu_si128((const __m128i*)ptr); __m128i v1 = _mm_xor_si128(_mm_subs_epu8(v0, t), delta); v0 = _mm_xor_si128(_mm_adds_epu8(v0, t), delta); __m128i x0 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[0])), delta); __m128i x1 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[quarterPatternSize])), delta); __m128i x2 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[2*quarterPatternSize])), delta); __m128i x3 = _mm_sub_epi8(_mm_loadu_si128((const __m128i*)(ptr + pixel[3*quarterPatternSize])), delta); m0 = _mm_and_si128(_mm_cmpgt_epi8(x0, v0), _mm_cmpgt_epi8(x1, v0)); m1 = _mm_and_si128(_mm_cmpgt_epi8(v1, x0), _mm_cmpgt_epi8(v1, x1)); m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x1, v0), _mm_cmpgt_epi8(x2, v0))); m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x1), _mm_cmpgt_epi8(v1, x2))); m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x2, v0), _mm_cmpgt_epi8(x3, v0))); m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x2), _mm_cmpgt_epi8(v1, x3))); m0 = _mm_or_si128(m0, _mm_and_si128(_mm_cmpgt_epi8(x3, v0), _mm_cmpgt_epi8(x0, v0))); m1 = _mm_or_si128(m1, _mm_and_si128(_mm_cmpgt_epi8(v1, x3), _mm_cmpgt_epi8(v1, x0))); m0 = _mm_or_si128(m0, m1); int mask = _mm_movemask_epi8(m0); if( mask == 0 ) continue; if( (mask & 255) == 0 ) { j -= 8; ptr -= 8; continue; } __m128i c0 = _mm_setzero_si128(), c1 = c0, max0 = c0, max1 = c0; for( k = 0; k < N; k++ ) { __m128i x = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(ptr + pixel[k])), delta); m0 = _mm_cmpgt_epi8(x, v0); m1 = _mm_cmpgt_epi8(v1, x); c0 = _mm_and_si128(_mm_sub_epi8(c0, m0), m0); c1 = _mm_and_si128(_mm_sub_epi8(c1, m1), m1); max0 = _mm_max_epu8(max0, c0); max1 = _mm_max_epu8(max1, c1); } max0 = _mm_max_epu8(max0, max1); int m = _mm_movemask_epi8(_mm_cmpgt_epi8(max0, K16)); for( k = 0; m > 0 && k < 16; k++, m >>= 1 ) if(m & 1) { cornerpos[ncorners++] = j+k; if(nonmax_suppression) curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold); } } } #endif for( ; j < img.cols - 3; j++, ptr++ ) { int v = ptr[0]; const uchar* tab = &threshold_tab[0] - v + 255; int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]]; d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]]; d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]]; d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]]; d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]]; d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]]; if( d & 1 ) { int vt = v - threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x < vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } if( d & 2 ) { int vt = v + threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x > vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } } } cornerpos[-1] = ncorners; if( i == 3 ) continue; const uchar* prev = buf[(i - 4 + 3)%3]; const uchar* pprev = buf[(i - 5 + 3)%3]; cornerpos = cpbuf[(i - 4 + 3)%3]; ncorners = cornerpos[-1]; for( k = 0; k < ncorners; k++ ) { j = cornerpos[k]; int score = prev[j]; if( !nonmax_suppression || (score > prev[j+1] && score > prev[j-1] && score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] && score > curr[j-1] && score > curr[j] && score > curr[j+1]) ) { keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score)); } } }
void cv::merge(const Mat* mv, size_t n, OutputArray _dst) { CV_Assert( mv && n > 0 ); int depth = mv[0].depth(); bool allch1 = true; int k, cn = 0; size_t i; for( i = 0; i < n; i++ ) { CV_Assert(mv[i].size == mv[0].size && mv[i].depth() == depth); allch1 = allch1 && mv[i].channels() == 1; cn += mv[i].channels(); } CV_Assert( 0 < cn && cn <= CV_CN_MAX ); _dst.create(mv[0].dims, mv[0].size, CV_MAKETYPE(depth, cn)); Mat dst = _dst.getMat(); if( n == 1 ) { mv[0].copyTo(dst); return; } if( !allch1 ) { AutoBuffer<int> pairs(cn*2); int j, ni=0; for( i = 0, j = 0; i < n; i++, j += ni ) { ni = mv[i].channels(); for( k = 0; k < ni; k++ ) { pairs[(j+k)*2] = j + k; pairs[(j+k)*2+1] = j + k; } } mixChannels( mv, n, &dst, 1, &pairs[0], cn ); return; } size_t esz = dst.elemSize(), esz1 = dst.elemSize1(); int blocksize0 = (int)((BLOCK_SIZE + esz-1)/esz); AutoBuffer<uchar> _buf((cn+1)*(sizeof(Mat*) + sizeof(uchar*)) + 16); const Mat** arrays = (const Mat**)(uchar*)_buf; uchar** ptrs = (uchar**)alignPtr(arrays + cn + 1, 16); arrays[0] = &dst; for( k = 0; k < cn; k++ ) arrays[k+1] = &mv[k]; NAryMatIterator it(arrays, ptrs, cn+1); int total = (int)it.size, blocksize = cn <= 4 ? total : std::min(total, blocksize0); MergeFunc func = mergeTab[depth]; for( i = 0; i < it.nplanes; i++, ++it ) { for( int j = 0; j < total; j += blocksize ) { int bsz = std::min(total - j, blocksize); func( (const uchar**)&ptrs[1], ptrs[0], bsz, cn ); if( j + blocksize < total ) { ptrs[0] += bsz*esz; for( int k = 0; k < cn; k++ ) ptrs[k+1] += bsz*esz1; } } } }
/************************************************************************** d i c t A l i g n ** Align the dictionary's free space pointer **************************************************************************/ void dictAlign(FICL_DICT *pDict) { pDict->here = alignPtr(pDict->here); }
void operator()(const Range &boundaries) const { CV_TRACE_FUNCTION(); Mat dx, dy; AutoBuffer<short> dxMax(0), dyMax(0); std::deque<uchar*> stack, borderPeaksLocal; const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1); int *_mag_p, *_mag_a, *_mag_n; short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL; uchar *_pmap; double scale = 1.0; CV_TRACE_REGION("gradient") if(needGradient) { if (aperture_size == 7) { scale = 1 / 16.0; } Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE); Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE); } else { dx = src.rowRange(rowStart, rowEnd); dy = src2.rowRange(rowStart, rowEnd); } CV_TRACE_REGION_NEXT("magnitude"); if(cn > 1) { dxMax.allocate(2 * dx.cols); dyMax.allocate(2 * dy.cols); _dx_a = (short*)dxMax; _dx_n = _dx_a + dx.cols; _dy_a = (short*)dyMax; _dy_n = _dy_a + dy.cols; } // _mag_p: previous row, _mag_a: actual row, _mag_n: next row #if CV_SIMD128 AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128)); _mag_p = alignPtr((int*)buffer + 1, CV_MALLOC_SIMD128); _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128); _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128); #else AutoBuffer<int> buffer(3 * (mapstep * cn)); _mag_p = (int*)buffer + 1; _mag_a = _mag_p + mapstep * cn; _mag_n = _mag_a + mapstep * cn; #endif // For the first time when just 2 rows are filled and for left and right borders if(rowStart == boundaries.start) memset(_mag_n - 1, 0, mapstep * sizeof(int)); else _mag_n[src.cols] = _mag_n[-1] = 0; _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0; // calculate magnitude and angle of gradient, perform non-maxima suppression. // fill the map with one of the following values: // 0 - the pixel might belong to an edge // 1 - the pixel can not belong to an edge // 2 - the pixel does belong to an edge for (int i = rowStart; i <= boundaries.end; ++i) { // Scroll the ring buffer std::swap(_mag_n, _mag_a); std::swap(_mag_n, _mag_p); if(i < rowEnd) { // Next row calculation _dx = dx.ptr<short>(i - rowStart); _dy = dy.ptr<short>(i - rowStart); if (L2gradient) { int j = 0, width = src.cols * cn; #if CV_SIMD128 if (haveSIMD) { for ( ; j <= width - 8; j += 8) { v_int16x8 v_dx = v_load((const short*)(_dx + j)); v_int16x8 v_dy = v_load((const short*)(_dy + j)); v_int32x4 v_dxp_low, v_dxp_high; v_int32x4 v_dyp_low, v_dyp_high; v_expand(v_dx, v_dxp_low, v_dxp_high); v_expand(v_dy, v_dyp_low, v_dyp_high); v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low); v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high); } } #endif for ( ; j < width; ++j) _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; } else { int j = 0, width = src.cols * cn; #if CV_SIMD128 if (haveSIMD) { for(; j <= width - 8; j += 8) { v_int16x8 v_dx = v_load((const short *)(_dx + j)); v_int16x8 v_dy = v_load((const short *)(_dy + j)); v_dx = v_reinterpret_as_s16(v_abs(v_dx)); v_dy = v_reinterpret_as_s16(v_abs(v_dy)); v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh; v_expand(v_dx, v_dx_ml, v_dx_mh); v_expand(v_dy, v_dy_ml, v_dy_mh); v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml); v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh); } } #endif for ( ; j < width; ++j) _mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j])); } if(cn > 1) { std::swap(_dx_n, _dx_a); std::swap(_dy_n, _dy_a); for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) { int maxIdx = jn; for(int k = 1; k < cn; ++k) if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k; _mag_n[j] = _mag_n[maxIdx]; _dx_n[j] = _dx[maxIdx]; _dy_n[j] = _dy[maxIdx]; } _mag_n[src.cols] = 0; } // at the very beginning we do not have a complete ring // buffer of 3 magnitude rows for non-maxima suppression if (i <= boundaries.start) continue; } else { memset(_mag_n - 1, 0, mapstep * sizeof(int)); if(cn > 1) { std::swap(_dx_n, _dx_a); std::swap(_dy_n, _dy_a); } } // From here actual src row is (i - 1) // Set left and right border to 1 #if CV_SIMD128 if(haveSIMD) _pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128; else #endif _pmap = map.ptr<uchar>(i) + 1; _pmap[src.cols] =_pmap[-1] = 1; if(cn == 1) { _dx = dx.ptr<short>(i - rowStart - 1); _dy = dy.ptr<short>(i - rowStart - 1); } else { _dx = _dx_a; _dy = _dy_a; } const int TG22 = 13573; int j = 0; #if CV_SIMD128 if (haveSIMD) { const v_int32x4 v_low = v_setall_s32(low); const v_int8x16 v_one = v_setall_s8(1); for (; j <= src.cols - 32; j += 32) { v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j)); v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4)); v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8)); v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12)); v_int32x4 v_cmp1 = v_m1 > v_low; v_int32x4 v_cmp2 = v_m2 > v_low; v_int32x4 v_cmp3 = v_m3 > v_low; v_int32x4 v_cmp4 = v_m4 > v_low; v_m1 = v_load_aligned((const int*)(_mag_a + j + 16)); v_m2 = v_load_aligned((const int*)(_mag_a + j + 20)); v_m3 = v_load_aligned((const int*)(_mag_a + j + 24)); v_m4 = v_load_aligned((const int*)(_mag_a + j + 28)); v_store_aligned((signed char*)(_pmap + j), v_one); v_store_aligned((signed char*)(_pmap + j + 16), v_one); v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2); v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4); v_cmp1 = v_m1 > v_low; v_cmp2 = v_m2 > v_low; v_cmp3 = v_m3 > v_low; v_cmp4 = v_m4 > v_low; v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81); v_cmp80 = v_pack(v_cmp1, v_cmp2); v_cmp81 = v_pack(v_cmp3, v_cmp4); unsigned int mask = v_signmask(v_cmp); v_cmp = v_pack(v_cmp80, v_cmp81); mask |= v_signmask(v_cmp) << 16; if (mask) { int k = j; do { int l = trailingZeros32(mask); k += l; mask >>= l; int m = _mag_a[k]; short xs = _dx[k]; short ys = _dy[k]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[k - 1] && m >= _mag_a[k + 1]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[k] && m >= _mag_n[k]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[k - s] && m > _mag_n[k + s]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } } ++k; } while((mask >>= 1)); } } if (j <= src.cols - 16) { v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j)); v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4)); v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8)); v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12)); v_store_aligned((signed char*)(_pmap + j), v_one); v_int32x4 v_cmp1 = v_m1 > v_low; v_int32x4 v_cmp2 = v_m2 > v_low; v_int32x4 v_cmp3 = v_m3 > v_low; v_int32x4 v_cmp4 = v_m4 > v_low; v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2); v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4); v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81); unsigned int mask = v_signmask(v_cmp); if (mask) { int k = j; do { int l = trailingZeros32(mask); k += l; mask >>= l; int m = _mag_a[k]; short xs = _dx[k]; short ys = _dy[k]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[k - 1] && m >= _mag_a[k + 1]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[k] && m >= _mag_n[k]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[k - s] && m > _mag_n[k + s]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } } ++k; } while((mask >>= 1)); } j += 16; } } #endif for (; j < src.cols; j++) { int m = _mag_a[j]; if (m > low) { short xs = _dx[j]; short ys = _dy[j]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[j - 1] && m >= _mag_a[j + 1]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[j] && m >= _mag_n[j]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[j - s] && m > _mag_n[j + s]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } } } _pmap[j] = 1; } }