Example #1
0
void arm_offset_q7(
  q7_t * pSrc,
  q7_t offset,
  q7_t * pDst,
  uint32_t blockSize)
{
  uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0

/* Run the below code for Cortex-M4 and Cortex-M3 */
  q31_t offset_packed;                           /* Offset packed to 32 bit */


  /*loop Unrolling */
  blkCnt = blockSize >> 2u;

  /* Offset is packed to 32 bit in order to use SIMD32 for addition */
  offset_packed = __PACKq7(offset, offset, offset, offset);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
   ** a second loop below computes the remaining 1 to 3 samples. */
  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
    *__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);

    /* Decrement the loop counter */
    blkCnt--;
  }

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4u;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
    *pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#else

  /* Run the below code for Cortex-M0 */

  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
    *pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#endif /* #ifndef ARM_MATH_CM0 */

}
Example #2
0
void arm_add_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  uint32_t blockSize)
{
  uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0

/* Run the below code for Cortex-M4 and Cortex-M3 */


  /*loop Unrolling */
  blkCnt = blockSize >> 2u;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
  while(blkCnt > 0u)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
    *__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);

    /* Decrement the loop counter */
    blkCnt--;
  }

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4u;

  while(blkCnt > 0u)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
    *pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#else

  /* Run the below code for Cortex-M0 */



  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

  while(blkCnt > 0u)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
    *pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#endif /* #ifndef ARM_MATH_CM0 */


}
Example #3
0
/**
\brief Test case: TC_CoreSimd_ParAddSub8
\details
- Check Parallel 8-bit addition and subtraction:
  __SADD8                                   S  Signed
  __SSUB8                                   Q  Signed Saturating
  __SHADD8                                  SH Signed Halving
  __SHSUB8                                  U  Unsigned
  __QADD8                                   UQ Unsigned Saturating
  __QSUB8                                   UH Unsigned Halving
  __UADD8
  __USUB8
  __UHADD8
  __UHSUB8
  __UQADD8
  __UQSUB8
*/
void TC_CoreSimd_ParAddSub8 (void) {
#if ((defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__  == 1)) || \
     (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))     )
  volatile uint32_t op1_u32, op2_u32;
  volatile uint32_t res_u32;

  volatile int32_t op1_s32, op2_s32;
  volatile int32_t res_s32;

  /* --- __SADD8 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x87858381;
  op2_s32 = (int32_t)0x08060402;
  res_s32 = __SADD8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x8F8B8783);

  /* --- __SSUB8 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x8F8B8783;
  op2_s32 = (int32_t)0x08060402;
  res_s32 = __SSUB8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x87858381);

  /* --- __SHADD8 Test ---------------------------------------------- */
  op1_s32 = 0x07050302;
  op2_s32 = 0x08060402;
  res_s32 = __SHADD8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == 0x07050302);

  /* --- __SHSUB8 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x8F8B8783;
  op2_s32 = 0x08060402;
  res_s32 = __SHSUB8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xC3C2C1C0);

  /* --- __QADD8 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x8085837F;
  op2_s32 = (int32_t)0xFF060402;
  res_s32 = __QADD8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x808B877F);

  /* --- __QSUB8 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x808B8783;
  op2_s32 = (int32_t)0x08060402;
  res_s32 = __QSUB8(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80858381);

  /* --- __UADD8 Test ---------------------------------------------- */
  op1_u32 = 0x07050301;
  op2_u32 = 0x08060402;
  res_u32 = __UADD8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x0F0B0703);

  /* --- __USUB8 Test ---------------------------------------------- */
  op1_u32 = 0x0F0B0703;
  op2_u32 = 0x08060402;
  res_u32 = __USUB8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x07050301);

  /* --- __UHADD8 Test ---------------------------------------------- */
  op1_u32 = 0x07050302;
  op2_u32 = 0x08060402;
  res_u32 = __UHADD8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x07050302);

  /* --- __UHSUB8 Test ---------------------------------------------- */
  op1_u32 = 0x0F0B0703;
  op2_u32 = 0x08060402;
  res_u32 = __UHSUB8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x03020100);

  /* --- __UQADD8 Test ---------------------------------------------- */
  op1_u32 = 0xFF050301;
  op2_u32 = 0x08060402;
  res_u32 = __UQADD8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0xFF0B0703);

  /* --- __UQSUB8 Test ---------------------------------------------- */
  op1_u32 = 0x080B0702;
  op2_u32 = 0x0F060408;
  res_u32 = __UQSUB8(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00050300);
#endif
}