static void VL_XCAT(_vl_kmeans_sort_data_helper_, SFX) (VlKMeans * self, vl_uint32 * permutations, TYPE const * data, vl_size numData) { vl_uindex d, x ; for (d = 0 ; d < self->dimension ; ++d) { VlKMeansSortWrapper array ; array.permutation = permutations + d * numData ; array.data = data + d ; array.stride = self->dimension ; for (x = 0 ; x < numData ; ++x) { array.permutation[x] = x ; } VL_XCAT3(_vl_kmeans_, SFX, _qsort_sort)(&array, numData) ; } }
VL_EXPORT void VL_XCAT(vl_imconvcol_v, SFX) (T* dst, vl_size dst_stride, T const* src, vl_size src_width, vl_size src_height, vl_size src_stride, T const* filt, vl_index filt_begin, vl_index filt_end, int step, unsigned int flags) { vl_index x = 0 ; vl_index y ; vl_index dheight = (src_height - 1) / step + 1 ; vl_bool transp = flags & VL_TRANSPOSE ; vl_bool zeropad = (flags & VL_PAD_MASK) == VL_PAD_BY_ZERO ; /* dispatch to accelerated version */ #ifndef VL_DISABLE_SSE2 if (vl_cpu_has_sse2() && vl_get_simd_enabled()) { VL_XCAT3(_vl_imconvcol_v,SFX,_sse2) (dst,dst_stride, src,src_width,src_height,src_stride, filt,filt_begin,filt_end, step,flags) ; return ; } #endif /* let filt point to the last sample of the filter */ filt += filt_end - filt_begin ; while (x < (signed)src_width) { /* Calculate dest[x,y] = sum_p image[x,p] filt[y - p] * where supp(filt) = [filt_begin, filt_end] = [fb,fe]. * * CHUNK_A: y - fe <= p < 0 * completes VL_MAX(fe - y, 0) samples * CHUNK_B: VL_MAX(y - fe, 0) <= p < VL_MIN(y - fb, height - 1) * completes fe - VL_MAX(fb, height - y) + 1 samples * CHUNK_C: completes all samples */ T const *filti ; vl_index stop ; for (y = 0 ; y < (signed)src_height ; y += step) { T acc = 0 ; T v = 0, c ; T const* srci ; filti = filt ; stop = filt_end - y ; srci = src + x - stop * src_stride ; if (stop > 0) { if (zeropad) { v = 0 ; } else { v = *(src + x) ; } while (filti > filt - stop) { c = *filti-- ; acc += v * c ; srci += src_stride ; } } stop = filt_end - VL_MAX(filt_begin, y - (signed)src_height + 1) + 1 ; while (filti > filt - stop) { v = *srci ; c = *filti-- ; acc += v * c ; srci += src_stride ; } if (zeropad) v = 0 ; stop = filt_end - filt_begin + 1 ; while (filti > filt - stop) { c = *filti-- ; acc += v * c ; } if (transp) { *dst = acc ; dst += 1 ; } else { *dst = acc ; dst += dst_stride ; } } /* next y */ if (transp) { dst += 1 * dst_stride - dheight * 1 ; } else { dst += 1 * 1 - dheight * dst_stride ; } x += 1 ; } /* next x */ }