void arm_negate_q7(
    q7_t * pSrc,
    q7_t * pDst,
    uint32_t blockSize)
{
    uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0

    /* Run the below code for Cortex-M4 and Cortex-M3 */
    q7_t in1;                                      /* Input value1 */
    q7_t in2;                                      /* Input value2 */
    q7_t in3;                                      /* Input value3 */
    q7_t in4;                                      /* Input value4 */


    /*loop Unrolling */
    blkCnt = blockSize >> 2u;

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
    while(blkCnt > 0u) {
        /* C = -A */
        /* Read four inputs */
        in1 = *pSrc++;
        in2 = *pSrc++;
        in3 = *pSrc++;
        in4 = *pSrc++;

        /* Store the Negated results in the destination buffer in a single cycle by packing the results */
        *__SIMD32(pDst)++ =
            __PACKq7(__SSAT(-in1, 8), __SSAT(-in2, 8), __SSAT(-in3, 8),
                     __SSAT(-in4, 8));

        /* Decrement the loop counter */
        blkCnt--;
    }

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
    blkCnt = blockSize % 0x4u;

#else

    /* Run the below code for Cortex-M0 */

    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

#endif /* #ifndef ARM_MATH_CM0 */

    while(blkCnt > 0u) {
        /* C = -A */
        /* Negate and then store the results in the destination buffer. */
        *pDst++ = __SSAT(-*pSrc++, 8);

        /* Decrement the loop counter */
        blkCnt--;
    }
}
void arm_fill_q7(
  q7_t value,
  q7_t * pDst,
  uint32_t blockSize)
{
  uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0_FAMILY

  /* Run the below code for Cortex-M4 and Cortex-M3 */

  q31_t packedValue;                             /* value packed to 32 bits */

  /*loop Unrolling */
  blkCnt = blockSize >> 2u;

  /* Packing four 8 bit values to 32 bit value in order to use SIMD */
  packedValue = __PACKq7(value, value, value, value);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
   ** a second loop below computes the remaining 1 to 3 samples. */
  while(blkCnt > 0u)
  {
    /* C = value */
    /* Fill the value in the destination buffer */
    *__SIMD32(pDst)++ = packedValue;

    /* Decrement the loop counter */
    blkCnt--;
  }

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4u;

#else

  /* Run the below code for Cortex-M0 */

  /* Loop over blockSize number of values */
  blkCnt = blockSize;

#endif /* #ifndef ARM_MATH_CM0_FAMILY */

  while(blkCnt > 0u)
  {
    /* C = value */
    /* Fill the value in the destination buffer */
    *pDst++ = value;

    /* Decrement the loop counter */
    blkCnt--;
  }
}
예제 #3
0
void arm_abs_q7( 
  q7_t * pSrc, 
  q7_t * pDst, 
  uint32_t blockSize) 
{ 
  uint32_t blkCnt;                               /* loop counter */ 
  q7_t in1;                                      /* Input value1 */ 
  q7_t in2;                                      /* Input value2 */ 
  q7_t in3;                                      /* Input value3 */ 
  q7_t in4;                                      /* Input value4 */ 
 
 
  /*loop Unrolling */ 
  blkCnt = blockSize >> 2u; 
 
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
   ** a second loop below computes the remaining 1 to 3 samples. */ 
  while(blkCnt > 0u) 
  { 
    /* C = |A| */ 
    /* Read 4 inputs */ 
    in1 = *pSrc++; 
    in2 = *pSrc++; 
    in3 = *pSrc++; 
    in4 = *pSrc++; 
 
    /* Store the Absolute result in the destination buffer by packing the 4 values in single cycle */ 
    *__SIMD32(pDst)++ = 
      __PACKq7(((in1 > 0) ? in1 : __SSAT(-in1, 8)), 
               ((in2 > 0) ? in2 : __SSAT(-in2, 8)), 
               ((in3 > 0) ? in3 : __SSAT(-in3, 8)), 
               ((in4 > 0) ? in4 : __SSAT(-in4, 8))); 
 
    /* Decrement the loop counter */ 
    blkCnt--; 
  } 
 
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
   ** No loop unrolling is used. */ 
  blkCnt = blockSize % 0x4u; 
 
  while(blkCnt > 0u) 
  { 
    /* C = |A| */ 
    /* Read the input */ 
    in1 = *pSrc++; 
 
    /* Store the Absolute result in the destination buffer */ 
    *pDst++ = (in1 > 0) ? in1 : __SSAT(-in1, 8); 
 
    /* Decrement the loop counter */ 
    blkCnt--; 
  } 
} 
예제 #4
0
void arm_offset_q7(
  q7_t * pSrc,
  q7_t offset,
  q7_t * pDst,
  uint32_t blockSize)
{
  uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0

/* Run the below code for Cortex-M4 and Cortex-M3 */
  q31_t offset_packed;                           /* Offset packed to 32 bit */


  /*loop Unrolling */
  blkCnt = blockSize >> 2u;

  /* Offset is packed to 32 bit in order to use SIMD32 for addition */
  offset_packed = __PACKq7(offset, offset, offset, offset);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
   ** a second loop below computes the remaining 1 to 3 samples. */
  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
    *__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);

    /* Decrement the loop counter */
    blkCnt--;
  }

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4u;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
    *pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#else

  /* Run the below code for Cortex-M0 */

  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
    *pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);

    /* Decrement the loop counter */
    blkCnt--;
  }

#endif /* #ifndef ARM_MATH_CM0 */

}