void av1_highbd_quantize_fp_avx2( const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale) { (void)scan; (void)zbin_ptr; (void)quant_shift_ptr; const unsigned int step = 8; __m256i qp[3], coeff; init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp); coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); __m256i eob = _mm256_setzero_si256(); quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); coeff_ptr += step; qcoeff_ptr += step; dqcoeff_ptr += step; iscan += step; n_coeffs -= step; update_qp(qp); while (n_coeffs > 0) { coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); coeff_ptr += step; qcoeff_ptr += step; dqcoeff_ptr += step; iscan += step; n_coeffs -= step; } { __m256i eob_s; eob_s = _mm256_shuffle_epi32(eob, 0xe); eob = _mm256_max_epi16(eob, eob_s); eob_s = _mm256_shufflelo_epi16(eob, 0xe); eob = _mm256_max_epi16(eob, eob_s); eob_s = _mm256_shufflelo_epi16(eob, 1); eob = _mm256_max_epi16(eob, eob_s); const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), _mm256_extractf128_si256(eob, 1)); *eob_ptr = _mm_extract_epi16(final_eob, 0); } }
/* Completes the most computationally intense part of the NMPC iteration; this step is independent of the lastest sensor measurements and so can be executed as soon as possible after the previous iteration. */ void OptimalControlProblem::preparation_step() { update_qp(); }