void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net) { CV_Assert(!isInitialized()); try { auto pluginIt = sharedPlugins.find(targetDevice); if (pluginIt != sharedPlugins.end()) { enginePtr = pluginIt->second; } else { enginePtr = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice); sharedPlugins[targetDevice] = enginePtr; if (targetDevice == InferenceEngine::TargetDevice::eCPU) { std::string suffixes[] = {"_avx2", "_sse4", ""}; bool haveFeature[] = { checkHardwareSupport(CPU_AVX2), checkHardwareSupport(CPU_SSE4_2), true }; for (int i = 0; i < 3; ++i) { if (!haveFeature[i]) continue; #ifdef _WIN32 std::string libName = "cpu_extension" + suffixes[i] + ".dll"; #else std::string libName = "libcpu_extension" + suffixes[i] + ".so"; #endif // _WIN32 try { InferenceEngine::IExtensionPtr extension = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(libName); enginePtr->AddExtension(extension, 0); break; } catch(...) {} } // Some of networks can work without a library of extra layers. } } plugin = InferenceEngine::InferencePlugin(enginePtr); netExec = plugin.LoadNetwork(net, {}); infRequest = netExec.CreateInferRequest(); infRequest.SetInput(inpBlobs); infRequest.SetOutput(outBlobs); } catch (const std::exception& ex) { CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what())); } }
//----------------------------------------------------------------------------- String Technique::_compile(bool autoManageTextureUnits) { StringUtil::StrStreamType errors; mIsSupported = checkGPURules(errors); if (mIsSupported) { mIsSupported = checkHardwareSupport(autoManageTextureUnits, errors); } // Compile for categorised illumination on demand clearIlluminationPasses(); mIlluminationPassesCompilationPhase = IPS_NOT_COMPILED; return errors.str(); }
int operator()(float** src, float* dst, int, int width) const { if( !checkHardwareSupport(CV_CPU_SSE) ) return 0; int x = 0; const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; __m128 _4 = _mm_set1_ps(4.f), _scale = _mm_set1_ps(1.f/256); for( ; x <= width - 8; x += 8 ) { __m128 r0, r1, r2, r3, r4, t0, t1; r0 = _mm_load_ps(row0 + x); r1 = _mm_load_ps(row1 + x); r2 = _mm_load_ps(row2 + x); r3 = _mm_load_ps(row3 + x); r4 = _mm_load_ps(row4 + x); r0 = _mm_add_ps(r0, r4); r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2); r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2)); t0 = _mm_add_ps(r0, _mm_mul_ps(r1, _4)); r0 = _mm_load_ps(row0 + x + 4); r1 = _mm_load_ps(row1 + x + 4); r2 = _mm_load_ps(row2 + x + 4); r3 = _mm_load_ps(row3 + x + 4); r4 = _mm_load_ps(row4 + x + 4); r0 = _mm_add_ps(r0, r4); r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2); r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2)); t1 = _mm_add_ps(r0, _mm_mul_ps(r1, _4)); t0 = _mm_mul_ps(t0, _scale); t1 = _mm_mul_ps(t1, _scale); _mm_storeu_ps(dst + x, t0); _mm_storeu_ps(dst + x + 4, t1); } return x; }
template<> void momentsInTile<uchar, int, int>( const cv::Mat& img, double* moments ) { typedef uchar T; typedef int WT; typedef int MT; Size size = img.size(); int y; MT mom[10] = {0,0,0,0,0,0,0,0,0,0}; bool useSIMD = checkHardwareSupport(CV_CPU_SSE2); for( y = 0; y < size.height; y++ ) { const T* ptr = img.ptr<T>(y); int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x = 0; if( useSIMD ) { __m128i qx_init = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); __m128i dx = _mm_set1_epi16(8); __m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = qx_init; for( ; x <= size.width - 8; x += 8 ) { __m128i p = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr + x)), z); qx0 = _mm_add_epi32(qx0, _mm_sad_epu8(p, z)); __m128i px = _mm_mullo_epi16(p, qx); __m128i sx = _mm_mullo_epi16(qx, qx); qx1 = _mm_add_epi32(qx1, _mm_madd_epi16(p, qx)); qx2 = _mm_add_epi32(qx2, _mm_madd_epi16(p, sx)); qx3 = _mm_add_epi32(qx3, _mm_madd_epi16(px, sx)); qx = _mm_add_epi16(qx, dx); } int CV_DECL_ALIGNED(16) buf[4]; _mm_store_si128((__m128i*)buf, qx0); x0 = buf[0] + buf[1] + buf[2] + buf[3]; _mm_store_si128((__m128i*)buf, qx1); x1 = buf[0] + buf[1] + buf[2] + buf[3]; _mm_store_si128((__m128i*)buf, qx2); x2 = buf[0] + buf[1] + buf[2] + buf[3]; _mm_store_si128((__m128i*)buf, qx3); x3 = buf[0] + buf[1] + buf[2] + buf[3]; } for( ; x < size.width; x++ ) { WT p = ptr[x]; WT xp = x * p, xxp; x0 += p; x1 += xp; xxp = xp * x; x2 += xxp; x3 += xxp * x; } WT py = y * x0, sy = y*y; mom[9] += ((MT)py) * sy; // m03 mom[8] += ((MT)x1) * sy; // m12 mom[7] += ((MT)x2) * y; // m21 mom[6] += x3; // m30 mom[5] += x0 * sy; // m02 mom[4] += x1 * y; // m11 mom[3] += x2; // m20 mom[2] += py; // m01 mom[1] += x1; // m10 mom[0] += x0; // m00 } for(int x = 0; x < 10; x++ ) moments[x] = (double)mom[x]; }
int operator()(int** src, uchar* dst, int, int width) const { if( !checkHardwareSupport(CV_CPU_SSE2) ) return 0; int x = 0; const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; __m128i delta = _mm_set1_epi16(128); for( ; x <= width - 16; x += 16 ) { __m128i r0, r1, r2, r3, r4, t0, t1; r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), _mm_load_si128((const __m128i*)(row0 + x + 4))); r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), _mm_load_si128((const __m128i*)(row1 + x + 4))); r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), _mm_load_si128((const __m128i*)(row2 + x + 4))); r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), _mm_load_si128((const __m128i*)(row3 + x + 4))); r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), _mm_load_si128((const __m128i*)(row4 + x + 4))); r0 = _mm_add_epi16(r0, r4); r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); t0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x + 8)), _mm_load_si128((const __m128i*)(row0 + x + 12))); r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x + 8)), _mm_load_si128((const __m128i*)(row1 + x + 12))); r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x + 8)), _mm_load_si128((const __m128i*)(row2 + x + 12))); r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x + 8)), _mm_load_si128((const __m128i*)(row3 + x + 12))); r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x + 8)), _mm_load_si128((const __m128i*)(row4 + x + 12))); r0 = _mm_add_epi16(r0, r4); r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); t1 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); t0 = _mm_srli_epi16(_mm_add_epi16(t0, delta), 8); t1 = _mm_srli_epi16(_mm_add_epi16(t1, delta), 8); _mm_storeu_si128((__m128i*)(dst + x), _mm_packus_epi16(t0, t1)); } for( ; x <= width - 4; x += 4 ) { __m128i r0, r1, r2, r3, r4, z = _mm_setzero_si128(); r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), z); r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), z); r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), z); r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), z); r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), z); r0 = _mm_add_epi16(r0, r4); r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); r0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); r0 = _mm_srli_epi16(_mm_add_epi16(r0, delta), 8); *(int*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(r0, r0)); } return x; }
static void thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const float* src = (const float*)_src.data; float* dst = (float*)_dst.data; size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_32f(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) IppiSize sz = { roi.width, roi.height }; switch( type ) { case THRESH_TRUNC: if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) return; setIppErrorStatus(); break; case THRESH_TOZERO: if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0)) return; setIppErrorStatus(); break; case THRESH_TOZERO_INV: if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) return; setIppErrorStatus(); break; } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmpgt_ps( v0, thresh4 ); v1 = _mm_cmpgt_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_cmple_ps( v0, thresh4 ); v1 = _mm_cmple_ps( v1, thresh4 ); v0 = _mm_and_ps( v0, maxval4 ); v1 = _mm_and_ps( v1, maxval4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_min_ps( v0, thresh4 ); v1 = _mm_min_ps( v1, thresh4 ); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128 thresh4 = _mm_set1_ps(thresh); for( ; j <= roi.width - 8; j += 8 ) { __m128 v0, v1; v0 = _mm_loadu_ps( src + j ); v1 = _mm_loadu_ps( src + j + 4 ); v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4)); v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4)); _mm_storeu_ps( dst + j, v0 ); _mm_storeu_ps( dst + j + 4, v1 ); } } #endif for( ; j < roi.width; j++ ) { float v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
static void thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const short* src = (const short*)_src.data; short* dst = (short*)_dst.data; size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) IppiSize sz = { roi.width, roi.height }; switch( type ) { case THRESH_TRUNC: if (0 <= ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh)) return; setIppErrorStatus(); break; case THRESH_TOZERO: if (0 <= ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0)) return; setIppErrorStatus(); break; case THRESH_TOZERO_INV: if (0 <= ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0)) return; setIppErrorStatus(); break; } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_and_si128( v0, maxval8 ); v1 = _mm_and_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_andnot_si128( v0, maxval8 ); v1 = _mm_andnot_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_min_epi16( v0, thresh8 ); v1 = _mm_min_epi16( v1, thresh8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
static void thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type ) { int i, j; Size roi = _src.size(); roi.width *= _src.channels(); const short* src = _src.ptr<short>(); short* dst = _dst.ptr<short>(); size_t src_step = _src.step/sizeof(src[0]); size_t dst_step = _dst.step/sizeof(dst[0]); #if CV_SSE2 volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE); #endif if( _src.isContinuous() && _dst.isContinuous() ) { roi.width *= roi.height; roi.height = 1; src_step = dst_step = roi.width; } #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type)) return; #endif #if defined(HAVE_IPP) CV_IPP_CHECK() { IppiSize sz = { roi.width, roi.height }; CV_SUPPRESS_DEPRECATED_START switch( type ) { case THRESH_TRUNC: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GT_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_LTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh + 1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; case THRESH_TOZERO_INV: #ifndef HAVE_IPP_ICV_ONLY if (_src.data == _dst.data && ippiThreshold_GTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } #endif if (ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); break; } CV_SUPPRESS_DEPRECATED_END } #endif switch( type ) { case THRESH_BINARY: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_and_si128( v0, maxval8 ); v1 = _mm_and_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] > thresh ? maxval : 0; } break; case THRESH_BINARY_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_cmpgt_epi16( v0, thresh8 ); v1 = _mm_cmpgt_epi16( v1, thresh8 ); v0 = _mm_andnot_si128( v0, maxval8 ); v1 = _mm_andnot_si128( v1, maxval8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval); for( ; j <= roi.width - 8; j += 8 ) { uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval)); } #endif for( ; j < roi.width; j++ ) dst[j] = src[j] <= thresh ? maxval : 0; } break; case THRESH_TRUNC: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_min_epi16( v0, thresh8 ); v1 = _mm_min_epi16( v1, thresh8 ); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh)); #endif for( ; j < roi.width; j++ ) dst[j] = std::min(src[j], thresh); } break; case THRESH_TOZERO: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8)); v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8)); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v > thresh ? v : 0; } } break; case THRESH_TOZERO_INV: for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step ) { j = 0; #if CV_SSE2 if( useSIMD ) { __m128i thresh8 = _mm_set1_epi16(thresh); for( ; j <= roi.width - 16; j += 16 ) { __m128i v0, v1; v0 = _mm_loadu_si128( (const __m128i*)(src + j) ); v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) ); v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0); v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1); _mm_storeu_si128((__m128i*)(dst + j), v0 ); _mm_storeu_si128((__m128i*)(dst + j + 8), v1 ); } } #elif CV_NEON int16x8_t v_thresh = vdupq_n_s16(thresh); for( ; j <= roi.width - 8; j += 8 ) { int16x8_t v_src = vld1q_s16(src + j); uint16x8_t v_mask = vcleq_s16(v_src, v_thresh); vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src)); } #endif for( ; j < roi.width; j++ ) { short v = src[j]; dst[j] = v <= thresh ? v : 0; } } break; default: return CV_Error( CV_StsBadArg, "" ); } }
bool COgreManager::Init(bool bEditor, HWND externalHwnd, HWND hwndParent,int width, int height) { //资源配置文件和插件配置文件 String ResourceCfg, PluginCfg; #ifdef _DEBUG if (bEditor) ResourceCfg = "resources_editor_d.cfg"; else ResourceCfg = "resources_d.cfg"; PluginCfg = "plugins_d.cfg"; #else if (bEditor) ResourceCfg = "resources_editor.cfg"; else ResourceCfg = "resources.cfg"; PluginCfg = "plugins.cfg"; #endif mRoot = new Ogre::Root(PluginCfg); Ogre::ConfigFile cf; cf.load(ResourceCfg); // Go through all sections & settings in the file Ogre::ConfigFile::SectionIterator seci = cf.getSectionIterator(); Ogre::String secName, typeName, archName; while (seci.hasMoreElements()) { secName = seci.peekNextKey(); Ogre::ConfigFile::SettingsMultiMap *settings = seci.getNext(); Ogre::ConfigFile::SettingsMultiMap::iterator i; for (i = settings->begin(); i != settings->end(); ++i) { typeName = i->first; archName = i->second; Ogre::ResourceGroupManager::getSingleton().addResourceLocation(archName, typeName, secName); } } if (bEditor) { RenderSystem* rs = mRoot->getRenderSystemByName("Direct3D9 Rendering Subsystem"); assert(rs); mRoot->setRenderSystem(rs); mRoot->initialise(false); NameValuePairList params; params["externalWindowHandle"] = StringConverter::toString((unsigned int)externalHwnd); params["parentWindowHandle"] = StringConverter::toString((unsigned int)hwndParent); params["vsync"] = "true"; mWindow = mRoot->createRenderWindow("MainWindow", width, height, false, ¶ms); Ogre::ResourceGroupManager::getSingleton().initialiseAllResourceGroups(); } else { if(mRoot->restoreConfig() || mRoot->showConfigDialog()) mWindow = mRoot->initialise(true, "Game : MiniCraft"); else return false; } if(!checkHardwareSupport()) return false; m_pSceneMgr = mRoot->createSceneManager(ST_GENERIC, "DefaultSceneMgr"); m_pMainCamera = m_pSceneMgr->createCamera("MainCamera"); m_pMainCamera->setNearClipDistance(1); m_pMainCamera->setFarClipDistance(500); m_pViewport = mWindow->addViewport(m_pMainCamera); m_pViewport->setBackgroundColour(Ogre::ColourValue(0,0,0)); m_pMainCamera->setAspectRatio( (Ogre::Real)m_pViewport->getActualWidth()/(Ogre::Real)m_pViewport->getActualHeight()); Ogre::TextureManager::getSingleton().setDefaultNumMipmaps(5); //Set initial mouse clipping size windowResized(mWindow); Ogre::WindowEventUtilities::addWindowEventListener(mWindow, this); m_pDS = new DeferredShadingSystem(m_pViewport, m_pSceneMgr, m_pMainCamera); if(bEditor) m_pDS->initialize(); mLightMaterialGenerator = new LightMaterialGenerator; PSSMShadowCameraSetup* pssmSetup = new PSSMShadowCameraSetup; mPSSMSetup.bind(pssmSetup); m_pSceneMgr->setShadowCameraSetup(mPSSMSetup); m_pSceneMgr->getRenderQueue()->getQueueGroup(RENDER_QUEUE_OVERLAY)->setShadowsEnabled(false); m_Timer = new Ogre::Timer(); m_Timer->reset(); mWindow->setActive(true); mWindow->setDeactivateOnFocusChange(false); m_bHasInit = true; return true; }
MomentsInTile_SIMD() { useSIMD = checkHardwareSupport(CV_CPU_SSE2); }