void arm_shift_q31( q31_t * pSrc, int8_t shiftBits, q31_t * pDst, uint32_t blockSize) { uint32_t blkCnt; /* loop counter */ uint32_t sign; /* Sign of shiftBits */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* Getting the sign of shiftBits */ sign = (shiftBits & 0x80000000); /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C = A (>> or <<) shiftBits */ /* Shift the input and then store the results in the destination buffer. */ *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : (*pSrc++ >> -shiftBits); *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : (*pSrc++ >> -shiftBits); *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : (*pSrc++ >> -shiftBits); *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : (*pSrc++ >> -shiftBits); /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; while(blkCnt > 0u) { /* C = A (>> or <<) shiftBits */ /* Shift the input and then store the result in the destination buffer. */ *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : (*pSrc++ >> -shiftBits); /* Decrement the loop counter */ blkCnt--; } }
void arm_pid_init_q31( arm_pid_instance_q31 * S, int32_t resetStateFlag) { #ifndef ARM_MATH_CM0_FAMILY /* Run the below code for Cortex-M4 and Cortex-M3 */ /* Derived coefficient A0 */ S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd); /* Derived coefficient A1 */ S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp); #else /* Run the below code for Cortex-M0 */ q31_t temp; /* Derived coefficient A0 */ temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki); S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd); /* Derived coefficient A1 */ temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd); S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp); #endif /* #ifndef ARM_MATH_CM0_FAMILY */ /* Derived coefficient A2 */ S->A2 = S->Kd; /* Check whether state needs reset or not */ if(resetStateFlag) { /* Clear the state buffer. The size will be always 3 samples */ memset(S->state, 0, 3u * sizeof(q31_t)); } }
void comb(uint16_t* output){ #ifdef AUDIO_BIGEND float* l = left; float* r = right; uint32_t blkCnt = AUDIO_BLOCK_SIZE; uint16_t* dst = output; int32_t qint; while(blkCnt > 0u){ #ifdef AUDIO_SATURATE_SAMPLES qint = clip_q63_to_q31((q63_t)(*l++ * 2147483648.0f)); *dst++ = qint >> 16; *dst++ = qint & 0xffff; qint = clip_q63_to_q31((q63_t)(*r++ * 2147483648.0f)); *dst++ = qint >> 16; *dst++ = qint & 0xffff; #else qint = *l++ * 2147483648.0f; *dst++ = qint >> 16; *dst++ = qint & 0xffff; qint = *r++ * 2147483648.0f; *dst++ = qint >> 16; *dst++ = qint & 0xffff; #endif /* AUDIO_SATURATE_SAMPLES */ blkCnt--; } #else // todo: test if this works in big-endian on ARM float* l = left; float* r = right; uint32_t blkCnt = AUDIO_BLOCK_SIZE>>1; int32_t* dst = (int32_t*)output; while(blkCnt > 0u){ *dst++ = *l++ * 2147483648.0f; *dst++ = *r++ * 2147483648.0f; *dst++ = *l++ * 2147483648.0f; *dst++ = *r++ * 2147483648.0f; blkCnt--; } #endif }
void arm_pid_init_q31( arm_pid_instance_q31 * S, int32_t resetStateFlag) { #if defined (ARM_MATH_DSP) /* Derived coefficient A0 */ S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd); /* Derived coefficient A1 */ S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp); #else q31_t temp; /* to store the sum */ /* Derived coefficient A0 */ temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki); S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd); /* Derived coefficient A1 */ temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd); S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp); #endif /* #if defined (ARM_MATH_DSP) */ /* Derived coefficient A2 */ S->A2 = S->Kd; /* Check whether state needs reset or not */ if (resetStateFlag) { /* Reset state to zero, The size will be always 3 samples */ memset(S->state, 0, 3U * sizeof(q31_t)); } }
void arm_float_to_q31( float32_t * pSrc, q31_t * pDst, uint32_t blockSize) { float32_t *pIn = pSrc; /* Src pointer */ uint32_t blkCnt; /* loop counter */ #ifdef ARM_MATH_ROUNDING float32_t in; #endif /* #ifdef ARM_MATH_ROUNDING */ #ifndef ARM_MATH_CM0_FAMILY /* Run the below code for Cortex-M4 and Cortex-M3 */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { #ifdef ARM_MATH_ROUNDING /* C = A * 32768 */ /* convert from float to Q31 and then store the results in the destination buffer */ in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5 : -0.5; *pDst++ = clip_q63_to_q31((q63_t) (in)); in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5 : -0.5; *pDst++ = clip_q63_to_q31((q63_t) (in)); in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5 : -0.5; *pDst++ = clip_q63_to_q31((q63_t) (in)); in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5 : -0.5; *pDst++ = clip_q63_to_q31((q63_t) (in)); #else /* C = A * 2147483648 */ /* convert from float to Q31 and then store the results in the destination buffer */ *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); #endif /* #ifdef ARM_MATH_ROUNDING */ /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; while(blkCnt > 0u) { #ifdef ARM_MATH_ROUNDING /* C = A * 2147483648 */ /* convert from float to Q31 and then store the results in the destination buffer */ in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5 : -0.5; *pDst++ = clip_q63_to_q31((q63_t) (in)); #else /* C = A * 2147483648 */ /* convert from float to Q31 and then store the results in the destination buffer */ *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); #endif /* #ifdef ARM_MATH_ROUNDING */ /* Decrement the loop counter */ blkCnt--; } #else /* Run the below code for Cortex-M0 */ /* Loop over blockSize number of values */ blkCnt = blockSize; while(blkCnt > 0u) { #ifdef ARM_MATH_ROUNDING /* C = A * 2147483648 */ /* convert from float to Q31 and then store the results in the destination buffer */ in = *pIn++; in = (in * 2147483648.0f); in += in > 0 ? 0.5f : -0.5f; *pDst++ = clip_q63_to_q31((q63_t) (in)); #else /* C = A * 2147483648 */ /* convert from float to Q31 and then store the results in the destination buffer */ *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); #endif /* #ifdef ARM_MATH_ROUNDING */ /* Decrement the loop counter */ blkCnt--; } #endif /* #ifndef ARM_MATH_CM0_FAMILY */ }
void arm_sub_q31( q31_t * pSrcA, q31_t * pSrcB, q31_t * pDst, uint32_t blockSize) { uint32_t blkCnt; /* loop counter */ #ifndef ARM_MATH_CM0_FAMILY /* Run the below code for Cortex-M4 and Cortex-M3 */ q31_t inA1, inA2, inA3, inA4; q31_t inB1, inB2, inB3, inB4; /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C = A - B */ /* Subtract and then store the results in the destination buffer. */ inA1 = *pSrcA++; inA2 = *pSrcA++; inB1 = *pSrcB++; inB2 = *pSrcB++; inA3 = *pSrcA++; inA4 = *pSrcA++; inB3 = *pSrcB++; inB4 = *pSrcB++; *pDst++ = __QSUB(inA1, inB1); *pDst++ = __QSUB(inA2, inB2); *pDst++ = __QSUB(inA3, inB3); *pDst++ = __QSUB(inA4, inB4); /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; while(blkCnt > 0u) { /* C = A - B */ /* Subtract and then store the result in the destination buffer. */ *pDst++ = __QSUB(*pSrcA++, *pSrcB++); /* Decrement the loop counter */ blkCnt--; } #else /* Run the below code for Cortex-M0 */ /* Initialize blkCnt with number of samples */ blkCnt = blockSize; while(blkCnt > 0u) { /* C = A - B */ /* Subtract and then store the result in the destination buffer. */ *pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ - *pSrcB++); /* Decrement the loop counter */ blkCnt--; } #endif /* #ifndef ARM_MATH_CM0_FAMILY */ }
void arm_offset_q31( q31_t * pSrc, q31_t offset, q31_t * pDst, uint32_t blockSize) { uint32_t blkCnt; /* loop counter */ #ifndef ARM_MATH_CM0_FAMILY /* Run the below code for Cortex-M4 and Cortex-M3 */ q31_t in1, in2, in3, in4; /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C = A + offset */ /* Add offset and then store the results in the destination buffer. */ in1 = *pSrc++; in2 = *pSrc++; in3 = *pSrc++; in4 = *pSrc++; *pDst++ = __QADD(in1, offset); *pDst++ = __QADD(in2, offset); *pDst++ = __QADD(in3, offset); *pDst++ = __QADD(in4, offset); /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; while(blkCnt > 0u) { /* C = A + offset */ /* Add offset and then store the result in the destination buffer. */ *pDst++ = __QADD(*pSrc++, offset); /* Decrement the loop counter */ blkCnt--; } #else /* Run the below code for Cortex-M0 */ /* Initialize blkCnt with number of samples */ blkCnt = blockSize; while(blkCnt > 0u) { /* C = A + offset */ /* Add offset and then store the result in the destination buffer. */ *pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset); /* Decrement the loop counter */ blkCnt--; } #endif /* #ifndef ARM_MATH_CM0_FAMILY */ }
arm_status arm_mat_scale_q31( const arm_matrix_instance_q31 * pSrc, q31_t scaleFract, int32_t shift, arm_matrix_instance_q31 * pDst) { q31_t *pIn = pSrc->pData; /* input data matrix pointer */ q31_t *pOut = pDst->pData; /* output data matrix pointer */ q63_t out; /* temporary variable to hold output value */ uint32_t numSamples; /* total number of elements in the matrix */ int32_t totShift = 31 - shift; /* shift to apply after scaling */ uint32_t blkCnt; /* loop counters */ arm_status status; /* status of matrix scaling */ #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch */ if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ { /* Total number of samples in the input matrix */ numSamples = (uint32_t) pSrc->numRows * pSrc->numCols; #ifndef ARM_MATH_CM0 /* Run the below code for Cortex-M4 and Cortex-M3 */ /* Loop Unrolling */ blkCnt = numSamples >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C(m,n) = A(m,n) * k */ /* Scale, saturate and then store the results in the destination buffer. */ out = ((q63_t) * pIn++ * scaleFract) >> totShift; *pOut++ = clip_q63_to_q31(out); out = ((q63_t) * pIn++ * scaleFract) >> totShift; *pOut++ = clip_q63_to_q31(out); out = ((q63_t) * pIn++ * scaleFract) >> totShift; *pOut++ = clip_q63_to_q31(out); out = ((q63_t) * pIn++ * scaleFract) >> totShift; *pOut++ = clip_q63_to_q31(out); /* Decrement the numSamples loop counter */ blkCnt--; } /* If the numSamples is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = numSamples % 0x4u; #else /* Run the below code for Cortex-M0 */ /* Initialize blkCnt with number of samples */ blkCnt = numSamples; #endif /* #ifndef ARM_MATH_CM0 */ while(blkCnt > 0u) { /* C(m,n) = A(m,n) * k */ /* Scale, saturate and then store the results in the destination buffer. */ out = ((q63_t) * pIn++ * scaleFract) >> totShift; *pOut++ = clip_q63_to_q31(out); /* Decrement the numSamples loop counter */ blkCnt--; } /* Set status as ARM_MATH_SUCCESS */ status = ARM_MATH_SUCCESS; } /* Return to application */ return (status); }