void arm_q7_to_q31( q7_t * pSrc, q31_t * pDst, uint32_t blockSize) { q7_t *pIn = pSrc; /* Src pointer */ uint32_t blkCnt; /* loop counter */ #ifndef ARM_MATH_CM0_FAMILY q31_t in; /* Run the below code for Cortex-M4 and Cortex-M3 */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C = (q31_t) A << 24 */ /* convert from q7 to q31 and then store the results in the destination buffer */ in = *__SIMD32(pIn)++; #ifndef ARM_MATH_BIG_ENDIAN *pDst++ = (__ROR(in, 8)) & 0xFF000000; *pDst++ = (__ROR(in, 16)) & 0xFF000000; *pDst++ = (__ROR(in, 24)) & 0xFF000000; *pDst++ = (in & 0xFF000000); #else *pDst++ = (in & 0xFF000000); *pDst++ = (__ROR(in, 24)) & 0xFF000000; *pDst++ = (__ROR(in, 16)) & 0xFF000000; *pDst++ = (__ROR(in, 8)) & 0xFF000000; #endif // #ifndef ARM_MATH_BIG_ENDIAN /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; #else /* Run the below code for Cortex-M0 */ /* Loop over blockSize number of values */ blkCnt = blockSize; #endif /* #ifndef ARM_MATH_CM0_FAMILY */ while(blkCnt > 0u) { /* C = (q31_t) A << 24 */ /* convert from q7 to q31 and then store the results in the destination buffer */ *pDst++ = (q31_t) * pIn++ << 24; /* Decrement the loop counter */ blkCnt--; } }
void arm_q7_to_q15( q7_t * pSrc, q15_t * pDst, uint32_t blockSize) { q7_t *pIn = pSrc; /* Src pointer */ uint32_t blkCnt; /* loop counter */ #ifndef ARM_MATH_CM0_FAMILY q31_t in; q31_t in1, in2; q31_t out1, out2; /* Run the below code for Cortex-M4 and Cortex-M3 */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* C = (q15_t) A << 8 */ /* convert from q7 to q15 and then store the results in the destination buffer */ in = *__SIMD32(pIn)++; /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(__ROR(in, 8)); /* extend remainig two q7_t values to q15_t values */ in2 = __SXTB16(in); in1 = in1 << 8u; in2 = in2 << 8u; in1 = in1 & 0xFF00FF00; in2 = in2 & 0xFF00FF00; #ifndef ARM_MATH_BIG_ENDIAN out2 = __PKHTB(in1, in2, 16); out1 = __PKHBT(in2, in1, 16); #else out1 = __PKHTB(in1, in2, 16); out2 = __PKHBT(in2, in1, 16); #endif *__SIMD32(pDst)++ = out1; *__SIMD32(pDst)++ = out2; /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; #else /* Run the below code for Cortex-M0 */ /* Loop over blockSize number of values */ blkCnt = blockSize; #endif /* #ifndef ARM_MATH_CM0_FAMILY */ while(blkCnt > 0u) { /* C = (q15_t) A << 8 */ /* convert from q7 to q15 and then store the results in the destination buffer */ *pDst++ = (q15_t) * pIn++ << 8; /* Decrement the loop counter */ blkCnt--; } }
void arm_dot_prod_q7( q7_t * pSrcA, q7_t * pSrcB, uint32_t blockSize, q31_t * result) { uint32_t blkCnt; /* loop counter */ q31_t sum = 0; /* Temporary variables to store output */ #ifndef ARM_MATH_CM0_FAMILY /* Run the below code for Cortex-M4 and Cortex-M3 */ q31_t input1, input2; /* Temporary variables to store input */ q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* read 4 samples at a time from sourceA */ input1 = *__SIMD32(pSrcA)++; /* read 4 samples at a time from sourceB */ input2 = *__SIMD32(pSrcB)++; /* extract two q7_t samples to q15_t samples */ inA1 = __SXTB16(__ROR(input1, 8)); /* extract reminaing two samples */ inA2 = __SXTB16(input1); /* extract two q7_t samples to q15_t samples */ inB1 = __SXTB16(__ROR(input2, 8)); /* extract reminaing two samples */ inB2 = __SXTB16(input2); /* multiply and accumulate two samples at a time */ sum = __SMLAD(inA1, inB1, sum); sum = __SMLAD(inA2, inB2, sum); /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; while(blkCnt > 0u) { /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ /* Dot product and then store the results in a temporary buffer. */ sum = __SMLAD(*pSrcA++, *pSrcB++, sum); /* Decrement the loop counter */ blkCnt--; } #else /* Run the below code for Cortex-M0 */ /* Initialize blkCnt with number of samples */ blkCnt = blockSize; while(blkCnt > 0u) { /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ /* Dot product and then store the results in a temporary buffer. */ sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++); /* Decrement the loop counter */ blkCnt--; } #endif /* #ifndef ARM_MATH_CM0_FAMILY */ /* Store the result in the destination buffer in 18.14 format */ *result = sum; }
void arm_power_q7( q7_t * pSrc, uint32_t blockSize, q31_t * pResult) { q31_t sum = 0; /* Temporary result storage */ q7_t in; /* Temporary variable to store input */ uint32_t blkCnt; /* loop counter */ #ifndef ARM_MATH_CM0 /* Run the below code for Cortex-M4 and Cortex-M3 */ q31_t input1; /* Temporary variable to store packed input */ q31_t in1, in2; /* Temporary variables to store input */ /*loop Unrolling */ blkCnt = blockSize >> 2u; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ while(blkCnt > 0u) { /* Reading two inputs of pSrc vector and packing */ input1 = *__SIMD32(pSrc)++; in1 = __SXTB16(__ROR(input1, 8)); in2 = __SXTB16(input1); /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ /* calculate power and accumulate to accumulator */ sum = __SMLAD(in1, in1, sum); sum = __SMLAD(in2, in2, sum); /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x4u; #else /* Run the below code for Cortex-M0 */ /* Loop over blockSize number of values */ blkCnt = blockSize; #endif /* #ifndef ARM_MATH_CM0 */ while(blkCnt > 0u) { /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ /* Compute Power and then store the result in a temporary variable, sum. */ in = *pSrc++; sum += ((q15_t) in * in); /* Decrement the loop counter */ blkCnt--; } /* Store the result in 18.14 format */ *pResult = sum; }
/** * @brief AES CCM Authentication TAG generation. * @param hcryp: pointer to a CRYP_HandleTypeDef structure that contains * the configuration information for CRYP module * @param AuthTag: Pointer to the authentication buffer * @param Timeout: Timeout duration * @retval HAL status */ HAL_StatusTypeDef HAL_CRYPEx_AESCCM_GenerateAuthTAG(CRYP_HandleTypeDef *hcryp, uint32_t *AuthTag, uint32_t Timeout) { uint32_t tagaddr = (uint32_t)AuthTag; uint32_t ctr0 [4]={0}; uint32_t ctr0addr = (uint32_t)ctr0; uint32_t tickstart = 0U; if(hcryp->State == HAL_CRYP_STATE_READY) { /* Process locked */ __HAL_LOCK(hcryp); /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_BUSY; /* Check if initialization phase has already been performed */ if(hcryp->Phase == CRYPEx_PHASE_PROCESS) { /* Change the CRYP phase */ hcryp->Phase = CRYPEx_PHASE_FINAL; } else /* Initialization phase has not been performed*/ { /* Disable the peripheral */ __HAL_CRYP_DISABLE(hcryp); /* Sequence error code field */ hcryp->ErrorCode |= HAL_CRYP_ERROR_AUTH_TAG_SEQUENCE; /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); return HAL_ERROR; } /* Disable CRYP to start the final phase */ __HAL_CRYP_DISABLE(hcryp); /* Select final phase & ALGODIR bit must be set to ‘0’. */ MODIFY_REG(hcryp->Instance->CR, CRYP_CR_GCM_CCMPH|CRYP_CR_ALGODIR, CRYP_PHASE_FINAL|CRYP_OPERATINGMODE_ENCRYPT); /* Enable the CRYP peripheral */ __HAL_CRYP_ENABLE(hcryp); /* Write the counter block in the IN FIFO, CTR0 information from B0 data has to be swapped according to the DATATYPE*/ ctr0[0]=(hcryp->Init.B0[0]) & CRYP_CCM_CTR0_0; ctr0[1]=hcryp->Init.B0[1]; ctr0[2]=hcryp->Init.B0[2]; ctr0[3]=hcryp->Init.B0[3] & CRYP_CCM_CTR0_3; if(hcryp->Init.DataType == CRYP_DATATYPE_8B) { hcryp->Instance->DIN = __REV(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __REV(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __REV(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __REV(*(uint32_t*)(ctr0addr)); } else if(hcryp->Init.DataType == CRYP_DATATYPE_16B) { hcryp->Instance->DIN = __ROR(*(uint32_t*)(ctr0addr), 16U); ctr0addr+=4; hcryp->Instance->DIN = __ROR(*(uint32_t*)(ctr0addr), 16U); ctr0addr+=4; hcryp->Instance->DIN = __ROR(*(uint32_t*)(ctr0addr), 16U); ctr0addr+=4; hcryp->Instance->DIN = __ROR(*(uint32_t*)(ctr0addr), 16U); } else if(hcryp->Init.DataType == CRYP_DATATYPE_1B) { hcryp->Instance->DIN = __RBIT(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __RBIT(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __RBIT(*(uint32_t*)(ctr0addr)); ctr0addr+=4; hcryp->Instance->DIN = __RBIT(*(uint32_t*)(ctr0addr)); } else { hcryp->Instance->DIN = *(uint32_t*)(ctr0addr); ctr0addr+=4; hcryp->Instance->DIN = *(uint32_t*)(ctr0addr); ctr0addr+=4; hcryp->Instance->DIN = *(uint32_t*)(ctr0addr); ctr0addr+=4; hcryp->Instance->DIN = *(uint32_t*)(ctr0addr);; } /* Wait for OFNE flag to be raised */ tickstart = HAL_GetTick(); while(HAL_IS_BIT_CLR(hcryp->Instance->SR, CRYP_FLAG_OFNE)) { /* Check for the Timeout */ if(Timeout != HAL_MAX_DELAY) { if((Timeout == 0U)||((HAL_GetTick() - tickstart ) > Timeout)) { /* Disable the CRYP peripheral Clock */ __HAL_CRYP_DISABLE(hcryp); /* Change state */ hcryp->ErrorCode |= HAL_CRYP_ERROR_TIMEOUT; hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); return HAL_ERROR; } } } /* Read the Auth TAG in the IN FIFO */ *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); /* Disable CRYP */ __HAL_CRYP_DISABLE(hcryp); } else { /* Busy error code field */ hcryp->ErrorCode = HAL_CRYP_ERROR_BUSY; return HAL_ERROR; } /* Return function status */ return HAL_OK; }
/** * @brief generate the GCM authentication TAG. * @param hcryp: pointer to a CRYP_HandleTypeDef structure that contains * the configuration information for CRYP module * @param AuthTag: Pointer to the authentication buffer * @param Timeout: Timeout duration * @retval HAL status */ HAL_StatusTypeDef HAL_CRYPEx_AESGCM_GenerateAuthTAG(CRYP_HandleTypeDef *hcryp, uint32_t *AuthTag, uint32_t Timeout) { uint32_t tickstart = 0U; uint64_t headerlength = hcryp->Init.HeaderSize * 32U; /* Header length in bits */ uint64_t inputlength = (hcryp->Size) * 32U; /* input length in bits */ uint32_t tagaddr = (uint32_t)AuthTag; if(hcryp->State == HAL_CRYP_STATE_READY) { /* Process locked */ __HAL_LOCK(hcryp); /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_BUSY; /* Check if initialization phase has already been performed */ if(hcryp->Phase == CRYPEx_PHASE_PROCESS) { /* Change the CRYP phase */ hcryp->Phase = CRYPEx_PHASE_FINAL; } else /* Initialization phase has not been performed*/ { /* Disable the Peripheral */ __HAL_CRYP_DISABLE(hcryp); /* Sequence error code field */ hcryp->ErrorCode |= HAL_CRYP_ERROR_AUTH_TAG_SEQUENCE; /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); return HAL_ERROR; } /* Disable CRYP to start the final phase */ __HAL_CRYP_DISABLE(hcryp); /* Select final phase */ MODIFY_REG(hcryp->Instance->CR, CRYP_CR_GCM_CCMPH, CRYP_PHASE_FINAL); /*ALGODIR bit must be set to ‘0’.*/ hcryp->Instance->CR &= ~CRYP_CR_ALGODIR; /* Enable the CRYP peripheral */ __HAL_CRYP_ENABLE(hcryp); /* Write the number of bits in header (64 bits) followed by the number of bits in the payload */ if(hcryp->Init.DataType == CRYP_DATATYPE_1B) { hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __RBIT((uint32_t)(headerlength)); hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __RBIT((uint32_t)(inputlength)); } else if(hcryp->Init.DataType == CRYP_DATATYPE_8B) { hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __REV((uint32_t)(headerlength)); hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __REV((uint32_t)(inputlength)); } else if(hcryp->Init.DataType == CRYP_DATATYPE_16B) { hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __ROR((uint32_t)headerlength, 16U); hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = __ROR((uint32_t)inputlength, 16U); } else if(hcryp->Init.DataType == CRYP_DATATYPE_32B) { hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = (uint32_t)(headerlength); hcryp->Instance->DIN = 0U; hcryp->Instance->DIN = (uint32_t)(inputlength); } /* Wait for OFNE flag to be raised */ tickstart = HAL_GetTick(); while(HAL_IS_BIT_CLR(hcryp->Instance->SR, CRYP_FLAG_OFNE)) { /* Check for the Timeout */ if(Timeout != HAL_MAX_DELAY) { if((Timeout == 0U)||((HAL_GetTick() - tickstart ) > Timeout)) { /* Disable the CRYP Peripheral Clock */ __HAL_CRYP_DISABLE(hcryp); /* Change state */ hcryp->ErrorCode |= HAL_CRYP_ERROR_TIMEOUT; hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); return HAL_ERROR; } } } /* Read the authentication TAG in the output FIFO */ *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; tagaddr+=4U; *(uint32_t*)(tagaddr) = hcryp->Instance->DOUT; /* Disable the peripheral */ __HAL_CRYP_DISABLE(hcryp); /* Change the CRYP peripheral state */ hcryp->State = HAL_CRYP_STATE_READY; /* Process unlocked */ __HAL_UNLOCK(hcryp); } else { /* Busy error code field */ hcryp->ErrorCode |= HAL_CRYP_ERROR_BUSY; return HAL_ERROR; } /* Return function status */ return HAL_OK; }
void arm_q7_to_q15( q7_t * pSrc, q15_t * pDst, uint32_t blockSize) { q7_t *pIn = pSrc; /* Src pointer */ uint32_t blkCnt; /* loop counter */ q31_t in; q31_t in1, in2; q31_t out1, out2; q31_t and = 0xFF00FF00; /*loop Unrolling */ blkCnt = blockSize >> 3u; /* First part of the processing with loop unrolling. Compute 8 outputs at a time. ** a second loop below computes the remaining 1 to 7 samples. */ while(blkCnt > 0u) { /* C = (q15_t) A << 8 */ /* convert from q7 to q15 and then store the results in the destination buffer */ /* read 4 samples at a time */ in = *__SIMD32(pIn)++; #ifdef CCS /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(in, 8); /* extend remainig two q7_t values to q15_t values */ in2 = __SXTB16(in, 0); #else /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(__ROR(in, 8)); /* extend remainig two q7_t values to q15_t values */ in2 = __SXTB16(in); #endif /* shift in1 by 8 to convert q7_t value to q15_t value (ex: 0x00ff00ff ==> 0xff00ff00*/ in1 = in1 << 8u; in2 = in2 << 8u; /* read next 4 sampels */ in = *__SIMD32(pIn)++; /* anding with 0xff00ff00 */ in1 = in1 & and; out2 = in2 & and; /* pack two 16 bit values */ out1 = __PKHTB(in1, out2, 16); out2 = __PKHBT(out2, in1, 16); #ifndef ARM_MATH_BIG_ENDIAN /* store two q15_t samples at a time to destination */ _SIMD32_OFFSET(pDst + 2) = out1; #ifdef CCS /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(in, 8); #else /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(__ROR(in, 8)); #endif /* store two q15_t samples at a time to destination */ _SIMD32_OFFSET(pDst) = out2; #else /* store two q15_t samples at a time to destination */ _SIMD32_OFFSET(pDst) = out1; #ifdef CCS /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(in, 8); #else /* rotatate in by 8 and extend two q7_t values to q15_t values */ in1 = __SXTB16(__ROR(in, 8)); #endif /* store two q15_t samples at a time to destination */ _SIMD32_OFFSET(pDst + 2) = out2; #endif // #ifndef ARM_MATH_BIG_ENDIAN #ifdef CCS /* rotatate in by 8 and extend two q7_t values to q15_t values */ in2 = __SXTB16(in, 0); #else /* rotatate in by 8 and extend two q7_t values to q15_t values */ in2 = __SXTB16(in); #endif /* shift in1 by 8 to convert q7_t value to q15_t value (ex: 0x00ff00ff ==> 0xff00ff00*/ in1 = in1 << 8u; in2 = in2 << 8u; /* anding with 0xff00ff00 */ out1 = in1 & and; out2 = in2 & and; /* pack two 16 bit values */ out1 = __PKHTB(in1, out2, 16); out2 = __PKHBT(out2, in1, 16); /* store two q15_t samples at a time to destination */ #ifndef ARM_MATH_BIG_ENDIAN _SIMD32_OFFSET(pDst + 6) = out1; _SIMD32_OFFSET(pDst + 4) = out2; #else _SIMD32_OFFSET(pDst + 4) = out1; _SIMD32_OFFSET(pDst + 6) = out2; #endif // #ifndef ARM_MATH_BIG_ENDIAN /* incremnet destination pointer */ pDst += 8u; /* Decrement the loop counter */ blkCnt--; } /* If the blockSize is not a multiple of 8, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x8u; while(blkCnt > 0u) { /* C = (q15_t) A << 8 */ /* convert from q7 to q15 and then store the results in the destination buffer */ *pDst++ = (q15_t) * pIn++ << 8; /* Decrement the loop counter */ blkCnt--; } }
void arm_power_q7( q7_t * pSrc, uint32_t blockSize, q31_t * pResult) { q31_t acc = 0; /* Temporary result storage */ q31_t input1; /* Temporary variable to store packed input */ q7_t in; /* Temporary variable to store input */ uint32_t blkCnt; /* loop counter */ q31_t inA1, inA2; /* Temporary variables to hold intermiediate data */ q31_t acc1 = 0; /*loop Unrolling */ blkCnt = blockSize >> 3u; /* First part of the processing with loop unrolling. Compute 8 outputs at a time. ** a second loop below computes the remaining 1 to 7 samples. */ while(blkCnt > 0u) { /* read four samples at a time from soruce buffer */ input1 = _SIMD32_OFFSET(pSrc); /* extend two q7_t values to q15_t values */ #ifdef CCS inA1 = __SXTB16(input1, 8); inA2 = __SXTB16(input1, 0); #else inA1 = __SXTB16(__ROR(input1, 8)); inA2 = __SXTB16(input1); #endif // #ifdef CCS /* calculate power and accumulate to accumulator */ acc = __SMLAD(inA1, inA1, acc); /* read four samples at a time from soruce buffer */ input1 = _SIMD32_OFFSET(pSrc + 4); #ifdef CCS /* extend two q7_t values to q15_t values */ inA1 = __SXTB16(input1, 8); /* calculate power and accumulate to accumulator */ acc1 = __SMLAD(inA2, inA2, acc1); /* extend two q7_t values to q15_t values */ inA2 = __SXTB16(input1, 0); #else /* extend two q7_t values to q15_t values */ inA1 = __SXTB16(__ROR(input1, 8)); /* calculate power and accumulate to accumulator */ acc1 = __SMLAD(inA2, inA2, acc1); /* extend two q7_t values to q15_t values */ inA2 = __SXTB16(input1); #endif // #ifdef CCS /* calculate power and accumulate to accumulator */ acc = __SMLAD(inA1, inA1, acc); acc1 = __SMLAD(inA2, inA2, acc1); /* update source buffer to process next samples */ pSrc += 8u; /* Decrement the loop counter */ blkCnt--; } /* add accumulators */ acc = acc + acc1; /* If the blockSize is not a multiple of 8, compute any remaining output samples here. ** No loop unrolling is used. */ blkCnt = blockSize % 0x8u; while(blkCnt > 0u) { /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ /* Compute Power and then store the result in a temporary variable, acc. */ in = *pSrc++; acc += ((q15_t) in * in); /* Decrement the loop counter */ blkCnt--; } /* Store the result in 18.14 format */ *pResult = acc; }