NOINLINE bool SearchContext::WindHeapItem(HeapItem & hi) { unsigned begin = hi.begin; __m128 searchWindow = _mm_load_ps(this->searchWindow); for (;;) { __m128 ptParam = _mm_load_ps(ptsParams + begin * 4); if (ptParam.m128_u32[0] == uint32_t(-1)) { break; } // ptSearchWindow: slx, sly, -shx, -shy // ptParam: x, y, -x, -y __m128 result = _mm_cmplt_ps(ptParam, searchWindow); // inside search window if all false const int inside = _mm_testz_ps(result, result); if (inside) { hi.begin = begin; hi.rank = ptsRanks[begin]; return true; } begin++; } hi.begin = begin; hi.rank = std::numeric_limits<int32_t>::max(); return false; }
NOINLINE HeapItem * SearchContext::ProcessLowestGrid(const unsigned * in, HeapItem * heap) { HeapItem * heapHead = heap; __m128 searchWindow = _mm_load_ps(this->searchWindow); while (*in != unsigned(-1)) { unsigned begin = *in++; unsigned end = *in++; for (unsigned i = begin; i < end; ++i) { __m128 gridWindow = _mm_load_ps(&gridParams[i * 4]); // searchWindow: shx, slx, shy, sly // gridWindow: lx, hx, ly, hy __m128 result = _mm_cmplt_ps(gridWindow, searchWindow); const int intersect = _mm_testz_ps(result, result); if (intersect) { heapHead->begin = gridPoints[i]; heapHead++; // heap[heapHead].rank is set by WindHeapItem } } } return heapHead; }
NOINLINE void SearchContext::ProcessTopGrid(unsigned * out) { __m128 searchWindow = _mm_load_ps(this->searchWindow); __m128 gridWindow = _mm_load_ps(&gridParams[0]); // searchWindow: shx, slx, shy, sly // gridWindow: lx, hx, ly, hy __m128 result = _mm_cmplt_ps(gridWindow, searchWindow); const int intersect = _mm_testz_ps(result, result); if (intersect) { *out++ = gridChildren[0]; *out++ = gridChildren[1]; } *out = -1; }
NOINLINE void SearchContext::ProcessIntermediateGrid(const unsigned * in, unsigned * out) { __m128 searchWindow = _mm_load_ps(this->searchWindow); while (*in != unsigned(-1)) { unsigned begin = *in++; unsigned end = *in++; for (unsigned i = begin; i < end; ++i) { __m128 gridWindow = _mm_load_ps(&gridParams[i * 4]); // searchWindow: shx, slx, shy, sly // gridWindow: lx, hx, ly, hy __m128 result = _mm_cmplt_ps(gridWindow, searchWindow); const int intersect = _mm_testz_ps(result, result); if (intersect) { *out++ = gridChildren[i * 2]; *out++ = gridChildren[i * 2 + 1]; } } } *out = -1; }
// horizontal OR KFR_SINTRIN bool bittestany(const f32sse& x) { return !_mm_testz_ps(*x, *x); }