void test_vmulQ_nf32 (void) { float32x4_t out_float32x4_t; float32x4_t arg0_float32x4_t; float32_t arg1_float32_t; out_float32x4_t = vmulq_n_f32 (arg0_float32x4_t, arg1_float32_t); }
void dotProd_i16_neon(const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *istd) { const int16_t *data = (const int16_t *)dataf; const int16_t *weights = (const int16_t *)weightsf; weightsf += n * len / 2; // sizeof(float) / sizeof(int16_t) for (int i = 0; i < n; i += 4) { int32x4_t accum0 = { 0, 0, 0, 0 }; int32x4_t accum1 = accum0; int32x4_t accum2 = accum0; int32x4_t accum3 = accum0; for (int j = 0; j < len; j += 8) { int16x4x2_t d0 = vld2_s16(data + j); int16x4x2_t w0 = vld2_s16(weights); int16x4x2_t w1 = vld2_s16(weights + 8); int16x4x2_t w2 = vld2_s16(weights + 16); int16x4x2_t w3 = vld2_s16(weights + 24); accum0 = vmlal_s16(accum0, d0.val[0], w0.val[0]); accum0 = vmlal_s16(accum0, d0.val[1], w0.val[1]); accum1 = vmlal_s16(accum1, d0.val[0], w1.val[0]); accum1 = vmlal_s16(accum1, d0.val[1], w1.val[1]); accum2 = vmlal_s16(accum2, d0.val[0], w2.val[0]); accum2 = vmlal_s16(accum2, d0.val[1], w2.val[1]); accum3 = vmlal_s16(accum3, d0.val[0], w3.val[0]); accum3 = vmlal_s16(accum3, d0.val[1], w3.val[1]); weights += 32; } int32x2_t sum0 = vpadd_s32(vget_low_s32(accum0), vget_high_s32(accum0)); int32x2_t sum1 = vpadd_s32(vget_low_s32(accum1), vget_high_s32(accum1)); int32x2_t sum2 = vpadd_s32(vget_low_s32(accum2), vget_high_s32(accum2)); int32x2_t sum3 = vpadd_s32(vget_low_s32(accum3), vget_high_s32(accum3)); sum0 = vpadd_s32(sum0, sum1); sum1 = vpadd_s32(sum2, sum3); int32x4_t sum = vcombine_s32(sum0, sum1); float32x4_t val = vcvtq_f32_s32(sum); val = vmulq_f32(val, vld1q_f32(weightsf + i*2)); val = vmulq_n_f32(val, istd[0]); val = vaddq_f32(val, vld1q_f32(weightsf + i*2 + 4)); vst1q_f32(vals + i, val); } }
/** * @brief vector scale & accu: A[] = alpha * B[] + beta * A[]. * * @param dst[out] the accumulating matrix A. * src[in] the input matrix B. * alpha[in] scale of B. * beta[in] scale of A. * elemCnt[in] number of elements to calc. * * @return void. */ void neon_axpby(float *dst, const float *src, const float alpha, const float beta, const int elemCnt) { int i; for (i = 0; i <= elemCnt - 16; i += 16) { float32x4_t q0 = vld1q_f32(src + i); float32x4_t q1 = vld1q_f32(src + i + 4); float32x4_t q2 = vld1q_f32(src + i + 8); float32x4_t q3 = vld1q_f32(src + i + 12); float32x4_t q4 = vld1q_f32(dst + i); float32x4_t q5 = vld1q_f32(dst + i + 4); float32x4_t q6 = vld1q_f32(dst + i + 8); float32x4_t q7 = vld1q_f32(dst + i + 12); q0 = vmulq_n_f32(q0, alpha); q1 = vmulq_n_f32(q1, alpha); q2 = vmulq_n_f32(q2, alpha); q3 = vmulq_n_f32(q3, alpha); q0 = vmlaq_n_f32(q0, q4, beta); q1 = vmlaq_n_f32(q1, q5, beta); q2 = vmlaq_n_f32(q2, q6, beta); q3 = vmlaq_n_f32(q3, q7, beta); vst1q_f32(dst + i, q0); vst1q_f32(dst + i + 4, q1); vst1q_f32(dst + i + 8, q2); vst1q_f32(dst + i + 12, q3); } for (; i < elemCnt; i++) { float a = src[i] * alpha + dst[i] * beta; dst[i] = a; } }
void dotProd_neon(const float *data, const float *weights, float *vals, const int n, const int len, const float *istd) { for (int i = 0; i < n; i += 4) { float32x4_t accum0 = { 0.0f, 0.0f, 0.0f, 0.0f }; float32x4_t accum1 = accum0; float32x4_t accum2 = accum0; float32x4_t accum3 = accum0; for (int j = 0; j < len; j += 4) { float32x4_t d0 = vld1q_f32(data + j); float32x4_t d1 = d0; float32x4_t d2 = d0; float32x4_t d3 = d0; float32x4_t w0 = vld1q_f32(weights); float32x4_t w1 = vld1q_f32(weights + 4); float32x4_t w2 = vld1q_f32(weights + 8); float32x4_t w3 = vld1q_f32(weights + 12); accum0 = vaddq_f32(accum0, vmulq_f32(d0, w0)); accum1 = vaddq_f32(accum1, vmulq_f32(d1, w1)); accum2 = vaddq_f32(accum2, vmulq_f32(d2, w2)); accum3 = vaddq_f32(accum3, vmulq_f32(d3, w3)); weights += 16; } float32x2_t sum0 = vpadd_f32(vget_low_f32(accum0), vget_high_f32(accum0)); float32x2_t sum1 = vpadd_f32(vget_low_f32(accum1), vget_high_f32(accum1)); float32x2_t sum2 = vpadd_f32(vget_low_f32(accum2), vget_high_f32(accum2)); float32x2_t sum3 = vpadd_f32(vget_low_f32(accum3), vget_high_f32(accum3)); sum0 = vpadd_f32(sum0, sum1); sum1 = vpadd_f32(sum2, sum3); float32x4_t sum = vcombine_f32(sum0, sum1); sum = vmulq_n_f32(sum, istd[0]); sum = vaddq_f32(sum, vld1q_f32(weights + n*len + i)); vst1q_f32(vals + i, sum); } }
void x(float32x4_t v0, float32_t f) { float32x4_t vv = vmulq_n_f32( v0, f); }
inline void lxt_v4_scale( lxt_v4* a_result, const lxt_v4* a_v, float a_scale ) { a_result->m_i = vmulq_n_f32( a_v->m_i, a_scale ); }
// Updates the following smoothed Power Spectral Densities (PSD): // - sd : near-end // - se : residual echo // - sx : far-end // - sde : cross-PSD of near-end and residual echo // - sxd : cross-PSD of near-end and far-end // // In addition to updating the PSDs, also the filter diverge state is determined // upon actions are taken. static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; int i; float sdSum = 0, seSum = 0; const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); float32x4_t vec_sdSum = vdupq_n_f32(0.0f); float32x4_t vec_seSum = vdupq_n_f32(0.0f); for (i = 0; i + 3 < PART_LEN1; i += 4) { const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); vst1q_f32(&aec->sd[i], vec_sd); vst1q_f32(&aec->se[i], vec_se); vst1q_f32(&aec->sx[i], vec_sx); { float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); vst2q_f32(&aec->sde[i][0], vec_sde); } { float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); vst2q_f32(&aec->sxd[i][0], vec_sxd); } vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); vec_seSum = vaddq_f32(vec_seSum, vec_se); } { float32x2_t vec_sdSum_total; float32x2_t vec_seSum_total; // A B C D vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); // A+B C+D vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); // A+B+C+D A+B+C+D sdSum = vget_lane_f32(vec_sdSum_total, 0); seSum = vget_lane_f32(vec_seSum_total, 0); } // scalar code for the remaining items. for (; i < PART_LEN1; i++) { aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); // We threshold here to protect against the ill-effects of a zero farend. // The threshold is not arbitrarily chosen, but balances protection and // adverse interaction with the algorithm's tuning. // TODO(bjornv): investigate further why this is so sensitive. aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * WEBRTC_SPL_MAX( xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], WebRtcAec_kMinFarendPSD); aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); sdSum += aec->sd[i]; seSum += aec->se[i]; } // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; // Signal extreme filter divergence if the error is significantly larger // than the nearend (13 dB). *extreme_filter_divergence = (seSum > (19.95f * sdSum)); }
/** * @brief matrix dot mul vector: C[] = alpha * A[] * B[] + beta * C[]. * * @param dst[out] the result matrix C. * src1[in] the input matrix A. * src2[in] the input vector B. * alpha[in] scale of A * B. * beta[in] scale of C. * dimM[in] row number. * dimN[in] column number. * leadingN[in]the aligned column number * * @return void. */ void neon_dotMulVec(float *dst, const float *src1, const float *src2, const float alpha, const float beta, const int dimM, const int dimN, const int leadingN) { float *mat0 = (float *)src1; float *mat1 = (float *)src1 + leadingN; float *mat2 = (float *)src1 + leadingN*2; float *mat3 = (float *)src1 + leadingN*3; float *mat4 = (float *)src2; float *mat8 = dst; float *mat9 = dst + leadingN; float *matA = dst + leadingN*2; float *matB = dst + leadingN*3; int i = 0; for (i = 0; i <= dimM - 4; i += 4) { int j = 0; for (j = 0; j <= dimN - 4; j += 4) { float32x4_t q0 = vld1q_f32(mat0 + j); float32x4_t q1 = vld1q_f32(mat1 + j); float32x4_t q2 = vld1q_f32(mat2 + j); float32x4_t q3 = vld1q_f32(mat3 + j); float32x4_t q4 = vld1q_f32(mat4 + j); float32x4_t q8 = vld1q_f32(mat8 + j); float32x4_t q9 = vld1q_f32(mat9 + j); float32x4_t qA = vld1q_f32(matA + j); float32x4_t qB = vld1q_f32(matB + j); q0 = vmulq_n_f32(q0, alpha); q1 = vmulq_n_f32(q1, alpha); q2 = vmulq_n_f32(q2, alpha); q3 = vmulq_n_f32(q3, alpha); q0 = vmulq_f32(q0, q4); q1 = vmulq_f32(q1, q4); q2 = vmulq_f32(q2, q4); q3 = vmulq_f32(q3, q4); q0 = vmlaq_n_f32(q0, q8, beta); q1 = vmlaq_n_f32(q1, q9, beta); q2 = vmlaq_n_f32(q2, qA, beta); q3 = vmlaq_n_f32(q3, qB, beta); vst1q_f32(mat8 + j, q0); vst1q_f32(mat9 + j, q1); vst1q_f32(matA + j, q2); vst1q_f32(matB + j, q3); } for (; j < dimN; j++) { float a0 = mat8[j] * beta; float a1 = mat9[j] * beta; float a2 = matA[j] * beta; float a3 = matB[j] * beta; a0 += mat0[j] * mat4[j] * alpha; a1 += mat1[j] * mat4[j] * alpha; a2 += mat2[j] * mat4[j] * alpha; a3 += mat3[j] * mat4[j] * alpha; mat8[j] = a0; mat9[j] = a1; matA[j] = a2; matB[j] = a3; } mat0 += leadingN * 4; mat1 += leadingN * 4; mat2 += leadingN * 4; mat3 += leadingN * 4; mat8 += leadingN * 4; mat9 += leadingN * 4; matA += leadingN * 4; matB += leadingN * 4; } for (; i < dimM; i++) { int j = 0; for (j = 0; j <= dimN - 4; j += 4) { float32x4_t q0 = vld1q_f32(mat0 + j); float32x4_t q4 = vld1q_f32(mat4 + j); float32x4_t q8 = vld1q_f32(mat8 + j); q0 = vmulq_n_f32(q0, alpha); q0 = vmulq_f32(q0, q4); q0 = vmlaq_n_f32(q0, q8, beta); vst1q_f32(mat8 + j, q0); } for (; j < dimN; j++) { float a0 = mat0[j] * mat4[j] * alpha + mat8[j] * beta; mat8[j] = a0; } mat0 += leadingN; mat8 += leadingN; } }
void ConvexPolygonShapeSW::project_range(const Vector3& p_normal, const Transform& p_transform, real_t &r_min, real_t &r_max) const { int vertex_count=mesh.vertices.size(); if (vertex_count==0) return; const Vector3 *vrts=&mesh.vertices[0]; #ifndef NEON for (int i=0;i<vertex_count;i++) { float d=p_normal.dot( p_transform.xform( vrts[i] ) ); if (i==0 || d > r_max) r_max=d; if (i==0 || d < r_min) r_min=d; } #else int i; Matrix3 m = p_transform.get_basis(); Vector3 o = p_transform.get_origin(); float32x4_t vo[4] = {{o[0],o[0],o[0],o[0]} , {o[1],o[1],o[1],o[1]} , {o[2],o[2],o[2],o[2]} , {o[3],o[3],o[3],o[3]}}; for (i=0;i<vertex_count-4;i+=4) { // as long as 4 calculations at a time are possible /*_FORCE_INLINE_ Vector3 Transform::xform(const Vector3& p_vector) const { return Vector3( basis[0].dot(p_vector)+origin.x, basis[1].dot(p_vector)+origin.y, basis[2].dot(p_vector)+origin.z ); }*/ //print_line("yay"); //float d1, d2, d3, d4; //float f1_1, f1_2, f1_3, f1_4, f2_1, f2_2, f2_3, f2_4, f3_1, f3_2, f3_3, f3_4; float32x4_t f1, f2, f3; float32x4_t d; float32x4_t vrts_x = {vrts[i].x, vrts[i+1].x, vrts[i+2].x, vrts[i+3].x}; float32x4_t vrts_y = {vrts[i].y, vrts[i+1].y, vrts[i+2].y, vrts[i+3].y}; float32x4_t vrts_z = {vrts[i].z, vrts[i+1].z, vrts[i+2].z, vrts[i+3].z}; /*f1_1 = m[0][0]*vrts[i][0]; f1_2 = m[0][0]*vrts[i+1][0]; f1_3 = m[0][0]*vrts[i+2][0]; f1_4 = m[0][0]*vrts[i+3][0];*/ //f1 = vrts_x * m[0][0]; f1 = vmulq_n_f32(vrts_x, m[0][0]); /*f2_1 = m[1][0]*vrts[i][0]; f2_2 = m[1][0]*vrts[i+1][0]; f2_3 = m[1][0]*vrts[i+2][0]; f2_4 = m[1][0]*vrts[i+3][0];*/ //f2 = m[1][0] * vrts_x; f2 = vmulq_n_f32(vrts_x, m[1][0]); /*f3_1 = m[2][0]*vrts[i][0]; f3_2 = m[2][0]*vrts[i+1][0]; f3_3 = m[2][0]*vrts[i+2][0]; f3_4 = m[2][0]*vrts[i+3][0];*/ //f3 = m[2][0] * vrts_x; f3 = vmulq_n_f32(vrts_x, m[2][0]); /*f1_1 += m[0][1]*vrts[i][1]; f1_2 += m[0][1]*vrts[i+1][1]; f1_3 += m[0][1]*vrts[i+2][1]; f1_4 += m[0][1]*vrts[i+3][1];*/ //f1 += m[0][1] * vrts_y; f1 += vmulq_n_f32(vrts_y, m[0][1]); /*f2_1 += m[1][1]*vrts[i][1]; f2_2 += m[1][1]*vrts[i+1][1]; f2_3 += m[1][1]*vrts[i+2][1]; f2_4 += m[1][1]*vrts[i+3][1];*/ //f2 += m[1][1] * vrts_y; f2 += vmulq_n_f32(vrts_y, m[1][1]); /*f3_1 += m[2][1]*vrts[i][1]; f3_2 += m[2][1]*vrts[i+1][1]; f3_3 += m[2][1]*vrts[i+2][1]; f3_4 += m[2][1]*vrts[i+3][1];*/ //f3 += m[2][1] * vrts_y; f3 += vmulq_n_f32(vrts_y, m[2][1]); /*f1_1 += m[0][2]*vrts[i][2]; f1_2 += m[0][2]*vrts[i+1][2]; f1_3 += m[0][2]*vrts[i+2][2]; f1_4 += m[0][2]*vrts[i+3][2];*/ //f1 += m[0][2] * vrts_z; f1 += vmulq_n_f32(vrts_z, m[0][2]); /*f2_1 += m[1][2]*vrts[i][2]; f2_2 += m[1][2]*vrts[i+1][2]; f2_3 += m[1][2]*vrts[i+2][2]; f2_4 += m[1][2]*vrts[i+3][2];*/ //f2 += m[1][2] * vrts_z; f2 += vmulq_n_f32(vrts_z, m[1][2]); /*f3_1 += m[2][2]*vrts[i][2]; f3_2 += m[2][2]*vrts[i+1][2]; f3_3 += m[2][2]*vrts[i+2][2]; f3_4 += m[2][2]*vrts[i+3][2];*/ //f3 += m[2][2] * vrts_z; f3 += vmulq_n_f32(vrts_z, m[2][2]); /*f1_1 += o[0]; f1_2 += o[0]; f1_3 += o[0]; f1_4 += o[0];*/ f1 += vo[0]; /*f2_1 += o[1]; f2_2 += o[1]; f2_3 += o[1]; f2_4 += o[1];*/ f2 += vo[1]; /*f3_1 += o[2]; f3_2 += o[2]; f3_3 += o[2]; f3_4 += o[2];*/ f3 += vo[2]; /*d1 = f1_1*p_normal[0]; d2 = f1_2*p_normal[0]; d3 = f1_3*p_normal[0]; d4 = f1_4*p_normal[0];*/ d = vmulq_n_f32(f1 , p_normal[0]); /*d1 += f2_1*p_normal[1]; d2 += f2_2*p_normal[1]; d3 += f2_3*p_normal[1]; d4 += f2_4*p_normal[1];*/ d += vmulq_n_f32(f2 , p_normal[1]); /*d1 += f3_1*p_normal[2]; d2 += f3_2*p_normal[2]; d3 += f3_3*p_normal[2]; d4 += f3_4*p_normal[2];*/ d += vmulq_n_f32(f3 , p_normal[2]); float *fd = (float *)&d; if (i==0 || fd[0] > r_max) r_max=fd[0]; if (i==0 || fd[0] < r_min) r_min=fd[0]; if (i==0 || fd[1] > r_max) r_max=fd[1]; if (i==0 || fd[1] < r_min) r_min=fd[1]; if (i==0 || fd[2] > r_max) r_max=fd[2]; if (i==0 || fd[2] < r_min) r_min=fd[2]; if (i==0 || fd[3] > r_max) r_max=fd[3]; if (i==0 || fd[3] < r_min) r_min=fd[3]; } for (i=i;i<vertex_count;i++) { // rest /*_FORCE_INLINE_ Vector3 Transform::xform(const Vector3& p_vector) const { return Vector3( basis[0].dot(p_vector)+origin.x, basis[1].dot(p_vector)+origin.y, basis[2].dot(p_vector)+origin.z ); }*/ float d; Matrix3 m = p_transform.get_basis(); Vector3 o = p_transform.get_origin(); float f1, f2, f3; f1 = m[0][0]*vrts[i][0]; f2 = m[1][0]*vrts[i][0]; f3 = m[2][0]*vrts[i][0]; f1 += m[0][1]*vrts[i][1]; f2 += m[1][1]*vrts[i][1]; f3 += m[2][1]*vrts[i][1]; f1 += m[0][2]*vrts[i][2]; f2 += m[1][2]*vrts[i][2]; f3 += m[2][2]*vrts[i][2]; f1 += o[0]; f2 += o[1]; f3 += o[2]; d = f1*p_normal[0]; d += f2*p_normal[1]; d += f3*p_normal[2]; if (i==0 || d > r_max) r_max=d; if (i==0 || d < r_min) r_min=d; } #endif }