Exemplo n.º 1
0
//---------------------------------------------------
void HiHat_calcSyncBlock(int16_t* buf, const uint8_t size)
{
	//2 buffers for the mod oscs
	int16_t mod1[size],mod2[size];
	//calc next mod osc samples, scaled with mod amount
	calcNextOscSampleBlock(&hatVoice.modOsc,mod1,size, hatVoice.fmModAmount1);
	calcNextOscSampleBlock(&hatVoice.modOsc2,mod2,size,  hatVoice.fmModAmount2);

	//combine both mod oscs to 1 modulation signal
	bufferTool_addBuffersSaturating(mod1,mod2,size);

	calcNextOscSampleFmBlock(&hatVoice.osc,mod1,buf,size,0.5f) ;

	SVF_calcBlockZDF(&hatVoice.filter,hatVoice.filterType,buf,size);

	//calc transient sample
	transient_calcBlock(&hatVoice.transGen,mod1,size);

	uint8_t j;
	if(hatVoice.volumeMod)
	{
		for(j=0;j<size;j++)
		{
			//add filter to buffer
			buf[j] = __QADD16(buf[j],mod1[j]);
			buf[j] *= hatVoice.velo * hatVoice.vol * hatVoice.egValueOscVol;
		}
	}
	else
	{
		for(j=0;j<size;j++)
		{
			//add filter to buffer
			buf[j] = __QADD16(buf[j],mod1[j]);
			buf[j] *= hatVoice.vol * hatVoice.egValueOscVol;
		}
	}

	calcDistBlock(&hatVoice.distortion,buf,size);
}
Exemplo n.º 2
0
void arm_add_q15( 
  q15_t * pSrcA, 
  q15_t * pSrcB, 
  q15_t * pDst, 
  uint32_t blockSize) 
{ 
  uint32_t blkCnt;                               /* loop counter */ 
 
 
  /*loop Unrolling */ 
  blkCnt = blockSize >> 2u; 
 
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
   ** a second loop below computes the remaining 1 to 3 samples. */ 
  while(blkCnt > 0u) 
  { 
    /* C = A + B */ 
    /* Add and then store the results in the destination buffer. */ 
    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
 
    /* Decrement the loop counter */ 
    blkCnt--; 
  } 
 
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
   ** No loop unrolling is used. */ 
  blkCnt = blockSize % 0x4u; 
 
  while(blkCnt > 0u) 
  { 
    /* C = A + B */ 
    /* Add and then store the results in the destination buffer. */ 
    *pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++); 
 
    /* Decrement the loop counter */ 
    blkCnt--; 
  } 
 
} 
void arm_pid_init_q15(
  arm_pid_instance_q15 * S,
  int32_t resetStateFlag)
{

#ifndef ARM_MATH_CM0_FAMILY

  /* Run the below code for Cortex-M4 and Cortex-M3 */

  /* Derived coefficient A0 */
  S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);

  /* Derived coefficients and pack into A1 */

#ifndef  ARM_MATH_BIG_ENDIAN

  S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);

#else

  S->A1 = __PKHBT(S->Kd, -__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), 16);

#endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */

  /* Check whether state needs reset or not */
  if(resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
    memset(S->state, 0, 3u * sizeof(q15_t));
  }

#else

  /* Run the below code for Cortex-M0 */

  q31_t temp;                                    /*to store the sum */

  /* Derived coefficient A0 */
  temp = S->Kp + S->Ki + S->Kd;
  S->A0 = (q15_t) __SSAT(temp, 16);

  /* Derived coefficients and pack into A1 */
  temp = -(S->Kd + S->Kd + S->Kp);
  S->A1 = (q15_t) __SSAT(temp, 16);
  S->A2 = S->Kd;



  /* Check whether state needs reset or not */
  if(resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
    memset(S->state, 0, 3u * sizeof(q15_t));
  }

#endif /* #ifndef ARM_MATH_CM0_FAMILY */

}
Exemplo n.º 4
0
Arquivo: Snare.c Projeto: Yulquen/LXR
//---------------------------------------------------
void Snare_calcSyncBlock(int16_t* buf, const uint8_t size)
{
	int16_t transBuf[size];

	calcNoiseBlock(&snareVoice.noiseOsc,buf,size,0.9f);
	SVF_calcBlockZDF(&snareVoice.filter,snareVoice.filterType,buf,size);

	//calc transient sample
	transient_calcBlock(&snareVoice.transGen,transBuf,size);
	bufferTool_addBuffersSaturating(buf,transBuf,size);

	//calc next osc sample
	calcNextOscSampleBlock(&snareVoice.osc,transBuf,size,(1.f-snareVoice.mix));

	uint8_t j;
	if(snareVoice.volumeMod)
	{
		for(j=0;j<size;j++)
		{
			//add filter to buffer
			buf[j] *= snareVoice.mix;
			buf[j] = (__QADD16(buf[j],transBuf[j]));
			buf[j] *=  snareVoice.velo * snareVoice.vol * snareVoice.egValueOscVol;
		}
	}
	else
	{
		for(j=0;j<size;j++)
		{
			//add filter to buffer
			buf[j] *= snareVoice.mix;
			buf[j] = (__QADD16(buf[j],transBuf[j]));
			buf[j] *=  snareVoice.vol * snareVoice.egValueOscVol;
		}
	}

	calcDistBlock(&snareVoice.distortion,buf,size);
}
Exemplo n.º 5
0
//---------------------------------------------------
void Cymbal_calcSyncBlock(int16_t* buf, const uint8_t size)
{
		int16_t mod[size];
		int16_t mod2[size];
		//calc next mod osc sample
		calcNextOscSampleBlock(&cymbalVoice.modOsc,mod,size,cymbalVoice.fmModAmount1);
		calcNextOscSampleBlock(&cymbalVoice.modOsc2,mod2,size,cymbalVoice.fmModAmount2);

		//combine both mod oscs to 1 modulation signal
		bufferTool_addBuffersSaturating(mod,mod2,size);

		calcNextOscSampleFmBlock(&cymbalVoice.osc,mod,buf,size,1.f) ;
		SVF_calcBlockZDF(&cymbalVoice.filter,cymbalVoice.filterType,buf,size);

		//calc transient sample
		transient_calcBlock(&cymbalVoice.transGen,mod,size);

		uint8_t j;
		if(cymbalVoice.volumeMod)
		{
			for(j=0;j<size;j++)
			{
				//add filter to buffer
				buf[j] = (__QADD16(buf[j],mod[j])) ;
				buf[j] *=  cymbalVoice.velo * cymbalVoice.vol * cymbalVoice.egValueOscVol;
			}
		}
		else
		{
			for(j=0;j<size;j++)
			{
				//add filter to buffer
				buf[j] = (__QADD16(buf[j],mod[j])) ;
				buf[j] *=  cymbalVoice.vol * cymbalVoice.egValueOscVol;
			}
		}
		calcDistBlock(&cymbalVoice.distortion,buf,size);
}
Exemplo n.º 6
0
void arm_offset_q15(
  q15_t * pSrc,
  q15_t offset,
  q15_t * pDst,
  uint32_t blockSize)
{
  uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0_FAMILY

/* Run the below code for Cortex-M4 and Cortex-M3 */
  q31_t offset_packed;                           /* Offset packed to 32 bit */


  /*loop Unrolling */
  blkCnt = blockSize >> 2u;

  /* Offset is packed to 32 bit in order to use SIMD32 for addition */
  offset_packed = __PKHBT(offset, offset, 16);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
   ** a second loop below computes the remaining 1 to 3 samples. */
  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer, 2 samples at a time. */
    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);

    /* Decrement the loop counter */
    blkCnt--;
  }

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4u;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
    *pDst++ = (q15_t) __QADD16(*pSrc++, offset);

    /* Decrement the loop counter */
    blkCnt--;
  }

#else

  /* Run the below code for Cortex-M0 */

  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

  while(blkCnt > 0u)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
    *pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);

    /* Decrement the loop counter */
    blkCnt--;
  }

#endif /* #ifndef ARM_MATH_CM0_FAMILY */

}
void arm_add_q15(
    q15_t * pSrcA,
    q15_t * pSrcB,
    q15_t * pDst,
    uint32_t blockSize)
{
    uint32_t blkCnt;                               /* loop counter */

#ifndef ARM_MATH_CM0

    /* Run the below code for Cortex-M4 and Cortex-M3 */


    /*loop Unrolling */
    blkCnt = blockSize >> 2u;

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
    while(blkCnt > 0u) {
        /* C = A + B */
        /* Add and then store the results in the destination buffer. */
        *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
        *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);

        /* Decrement the loop counter */
        blkCnt--;
    }

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
    blkCnt = blockSize % 0x4u;

    while(blkCnt > 0u) {
        /* C = A + B */
        /* Add and then store the results in the destination buffer. */
        *pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);

        /* Decrement the loop counter */
        blkCnt--;
    }

#else

    /* Run the below code for Cortex-M0 */



    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

    while(blkCnt > 0u) {
        /* C = A + B */
        /* Add and then store the results in the destination buffer. */
        *pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);

        /* Decrement the loop counter */
        blkCnt--;
    }

#endif /* #ifndef ARM_MATH_CM0 */


}
buffer_c16_t TranslateByFSOver4AndDecimateBy2CIC3::execute(const buffer_c8_t& src, const buffer_c16_t& dst) {
	/* Translates incoming complex<int8_t> samples by -fs/4,
	 * decimates by two using a non-recursive third-order CIC filter.
	 */

	/* Derivation of algorithm:
	 * Original CIC filter (decimating by two):
	 * 	D_I0 = i3 * 1 + i2 * 3 + i1 * 3 + i0 * 1
	 * 	D_Q0 = q3 * 1 + q2 * 3 + q1 * 3 + q0 * 1
	 *
	 * 	D_I1 = i5 * 1 + i4 * 3 + i3 * 3 + i2 * 1
	 * 	D_Q1 = q5 * 1 + q4 * 3 + q3 * 3 + q2 * 1
	 *
	 * Translate -fs/4, phased 180 degrees, accomplished by complex multiplication
	 * of complex length-4 sequence:
	 *
	 * Substitute:
	 *	i0 = -i0, q0 = -q0
	 *	i1 = -q1, q1 =  i1
	 *	i2 =  i2, q2 =  q2
	 *	i3 =  q3, q3 = -i3
	 *	i4 = -i4, q4 = -q4
	 *	i5 = -q5, q5 =  i5
	 *
	 * Resulting taps (with decimation by 2, four samples in, two samples out):
	 *	D_I0 =  q3 * 1 +  i2 * 3 + -q1 * 3 + -i0 * 1
	 *	D_Q0 = -i3 * 1 +  q2 * 3 +  i1 * 3 + -q0 * 1
 	 *
	 *	D_I1 = -q5 * 1 + -i4 * 3 +  q3 * 3 +  i2 * 1
	 *	D_Q1 =  i5 * 1 + -q4 * 3 + -i3 * 3 +  q2 * 1
	 */

	// 6 cycles per complex input sample, not including loop overhead.
	uint32_t q1_i0 = _q1_i0;
	uint32_t q0_i1 = _q0_i1;
	/* 3:1 Scaled by 32 to normalize output to +/-32768-ish. */
	constexpr uint32_t scale_factor = 32;
	const uint32_t k_3_1 = 0x00030001 * scale_factor;
	uint32_t* src_p = reinterpret_cast<uint32_t*>(&src.p[0]);
	uint32_t* const src_end = reinterpret_cast<uint32_t*>(&src.p[src.count]);
	uint32_t* dst_p = reinterpret_cast<uint32_t*>(&dst.p[0]);
	while(src_p < src_end) {
		const uint32_t q3_i3_q2_i2 = *(src_p++);			// 3
		const uint32_t q5_i5_q4_i4 = *(src_p++);

		const uint32_t i2_i3 = __SXTB16(q3_i3_q2_i2, 16);			// 1: (q3_i3_q2_i2 ror 16)[23:16]:(q3_i3_q2_i2 ror 16)[7:0]
		const uint32_t q3_q2 = __SXTB16(q3_i3_q2_i2,  8);			// 1: (q3_i3_q2_i2 ror  8)[23:16]:(q3_i3_q2_i2 ror  8)[7:0]
		const uint32_t i2_q3 = __PKHTB(i2_i3, q3_q2, 16);			// 1: Rn[31:16]:(Rm>>16)[15:0]
		const uint32_t i3_q2 = __PKHBT(q3_q2, i2_i3, 16);			// 1:(Rm<<16)[31:16]:Rn[15:0]

		// D_I0 = 3 * (i2 - q1) + (q3 - i0)
		const uint32_t i2_m_q1_q3_m_i0 = __QSUB16(i2_q3, q1_i0);	// 1: Rn[31:16]-Rm[31:16]:Rn[15:0]-Rm[15:0]
		const uint32_t d_i0 = __SMUAD(k_3_1, i2_m_q1_q3_m_i0);		// 1: Rm[15:0]*Rs[15:0]+Rm[31:16]*Rs[31:16]

		// D_Q0 = 3 * (q2 + i1) - (i3 + q0)
		const uint32_t i3_p_q0_q2_p_i1 = __QADD16(i3_q2, q0_i1);	// 1: Rn[31:16]+Rm[31:16]:Rn[15:0]+Rm[15:0]
		const uint32_t d_q0 = __SMUSDX(i3_p_q0_q2_p_i1, k_3_1);		// 1: Rm[15:0]*Rs[31:16]–Rm[31:16]*RsX[15:0]
		const uint32_t d_q0_i0 = __PKHBT(d_i0, d_q0, 16);			// 1: (Rm<<16)[31:16]:Rn[15:0]

		const uint32_t i5_i4 = __SXTB16(q5_i5_q4_i4,  0);			// 1: (q5_i5_q4_i4 ror  0)[23:16]:(q5_i5_q4_i4 ror  0)[7:0]
		const uint32_t q4_q5 = __SXTB16(q5_i5_q4_i4, 24);			// 1: (q5_i5_q4_i4 ror 24)[23:16]:(q5_i5_q4_i4 ror 24)[7:0]
		const uint32_t q4_i5 = __PKHTB(q4_q5, i5_i4, 16);			// 1: Rn[31:16]:(Rm>>16)[15:0]
		const uint32_t q5_i4 = __PKHBT(i5_i4, q4_q5, 16);			// 1: (Rm<<16)[31:16]:Rn[15:0]

		// D_I1 = (i2 - q5) + 3 * (q3 - i4)
		const uint32_t i2_m_q5_q3_m_i4 = __QSUB16(i2_q3, q5_i4);	// 1: Rn[31:16]-Rm[31:16]:Rn[15:0]-Rm[15:0]
		const uint32_t d_i1 = __SMUADX(i2_m_q5_q3_m_i4, k_3_1);		// 1: Rm[15:0]*Rs[31:16]+Rm[31:16]*Rs[15:0]

		// D_Q1 = (i5 + q2) - 3 * (q4 + i3)
		const uint32_t q4_p_i3_i5_p_q2 = __QADD16(q4_i5, i3_q2);	// 1: Rn[31:16]+Rm[31:16]:Rn[15:0]+Rm[15:0]
		const uint32_t d_q1 = __SMUSD(k_3_1, q4_p_i3_i5_p_q2);		// 1: Rm[15:0]*Rs[15:0]–Rm[31:16]*Rs[31:16]
		const uint32_t d_q1_i1 = __PKHBT(d_i1, d_q1, 16);			// 1: (Rm<<16)[31:16]:Rn[15:0]
		*(dst_p++) = d_q0_i0;							// 3
		*(dst_p++) = d_q1_i1;

		q1_i0 = q5_i4;
		q0_i1 = q4_i5;
	}
	_q1_i0 = q1_i0;
	_q0_i1 = q0_i1;

	return { dst.p, src.count / 2, src.sampling_rate / 2 };
}
Exemplo n.º 9
0
/**
\brief Test case: TC_CoreSimd_ParAddSub16
\details
- Check Parallel 16-bit addition and subtraction:
  __SADD16
  __SSUB16
  __SASX
  __SSAX
  __SHADD16
  __SHSUB16
  __SHASX
  __SHSAX
  __QADD16
  __QSUB16
  __QASX
  __QSAX
  __UADD16
  __USUB16
  __UASX
  __USAX
  __UHADD16
  __UHSUB16
  __UHASX
  __UHSAX
  __UQSUB16
  __UQADD16
  __UQASX
  __UQSAX
*/
void TC_CoreSimd_ParAddSub16 (void) {
#if ((defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__  == 1)) || \
     (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))     )
  volatile uint32_t op1_u32, op2_u32;
  volatile uint32_t res_u32;

  volatile int32_t op1_s32, op2_s32;
  volatile int32_t res_s32;

  /* --- __SADD16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038001;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SADD16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80078003);

  /* --- __SSUB16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80078003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SSUB16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80038001);

  /* --- __SASX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80078003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SASX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80097FFF);

  /* --- __SSAX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038007;
  op2_s32 = (int32_t)0x00020004;
  res_s32 = __SSAX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x7FFF8009);

  /* --- __SHADD16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038001;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SHADD16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xC003C001);

  /* --- __SHSUB16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80078003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SHSUB16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xC001C000);

  /* --- __SHASX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80078003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __SHASX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xC004BFFF);

  /* --- __SHSAX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038007;
  op2_s32 = (int32_t)0x00020004;
  res_s32 = __SHSAX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0xBFFFC004);

  /* --- __QADD16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038000;
  op2_s32 = (int32_t)0x00048002;
  res_s32 = __QADD16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80078000);

  /* --- __QSUB16 Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __QSUB16(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80008001);

  /* --- __QASX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80078003;
  op2_s32 = (int32_t)0x00040002;
  res_s32 = __QASX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80098000);

  /* --- __QSAX Test ---------------------------------------------- */
  op1_s32 = (int32_t)0x80038007;
  op2_s32 = (int32_t)0x00020004;
  res_s32 = __QSAX(op1_s32, op2_s32);
  ASSERT_TRUE(res_s32 == (int32_t)0x80008009);

  /* --- __UADD16 Test ---------------------------------------------- */
  op1_u32 = 0x00010002;
  op2_u32 = 0x00020004;
  res_u32 = __UADD16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00030006);

  /* --- __USUB16 Test ---------------------------------------------- */
  op1_u32 = 0x00030006;
  op2_u32 = 0x00020004;
  res_u32 = __USUB16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00010002);

  /* --- __UASX Test ---------------------------------------------- */
  op1_u32 = 0x80078003;
  op2_u32 = 0x00040002;
  res_u32 = __UASX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x80097FFF);

  /* --- __USAX Test ---------------------------------------------- */
  op1_u32 = 0x80038007;
  op2_u32 = 0x00020004;
  res_u32 = __USAX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x7FFF8009);

  /* --- __UHADD16 Test ---------------------------------------------- */
  op1_u32 = 0x00010002;
  op2_u32 = 0x00020004;
  res_u32 = __UHADD16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00010003);

  /* --- __UHSUB16 Test ---------------------------------------------- */
  op1_u32 = 0x00030006;
  op2_u32 = 0x00020004;
  res_u32 = __UHSUB16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00000001);

  /* --- __UHASX Test ---------------------------------------------- */
  op1_u32 = 0x80078003;
  op2_u32 = 0x00040002;
  res_u32 = __UHASX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x40043FFF);

  /* --- __UHSAX Test ---------------------------------------------- */
  op1_u32 = 0x80038007;
  op2_u32 = 0x00020004;
  res_u32 = __UHSAX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x3FFF4004);

  /* --- __UQADD16 Test ---------------------------------------------- */
  op1_u32 = 0xFFFE0002;
  op2_u32 = 0x00020004;
  res_u32 = __UQADD16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0xFFFF0006);

  /* --- __UQSUB16 Test ---------------------------------------------- */
  op1_u32 = 0x00020006;
  op2_u32 = 0x00030004;
  res_u32 = __UQSUB16(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x00000002);

  /* --- __UQASX Test ---------------------------------------------- */
  op1_u32 = 0xFFF80003;
  op2_u32 = 0x00040009;
  res_u32 = __UQASX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0xFFFF0000);

  /* --- __UQSAX Test ---------------------------------------------- */
  op1_u32 = 0x0003FFF8;
  op2_u32 = 0x00090004;
  res_u32 = __UQSAX(op1_u32, op2_u32);
  ASSERT_TRUE(res_u32 == 0x0000FFFF);
#endif
}