Esempio n. 1
0
int
main (void)
{
  {
    __v2si val;
    __v2si val2;
    __v2si val3;

    init_val(&val);

    /* Copy val to val2.  */
    vector_store (&val2, val);

    /* Copy val2 to val3.  */
    val3 = vector_load (&val2);

    /* Compare val to val3.  */
    {
      char *p = (char*)&val;
      char *p2 = (char*)&val3;

      if (p[0] != p2[0])
	return 1;
      if (p[1] != p2[1])
	return 1;
      if (p[2] != p2[2])
	return 1;
      if (p[3] != p2[3])
	return 1;
      if (p[4] != p2[4])
	return 1;
      if (p[5] != p2[5])
	return 1;
      if (p[6] != p2[6])
	return 1;
      if (p[7] != p2[7])
	return 1;
    }
  }

  {
    __v2si val4 = vector_const ();
    char *p = (char*)&val4;

    if (p[0] != 1)
      return 1;
    if (p[1] != 0)
      return 1;
    if (p[2] != 0)
      return 1;
    if (p[3] != 0)
      return 1;
    if (p[4] != 2)
      return 1;
    if (p[5] != 0)
      return 1;
    if (p[6] != 0)
      return 1;
    if (p[7] != 0)
      return 1;
  }

  return 0;
}
Esempio n. 2
0
void
down_loop(struct rcvr_cb* rcb, int pass) {
	int i, j, k = 0;
	int dsample, count, coeff_len;
	bool IQ_swap;
	float* buf, *out;
	float* pSrc, *orig_buf;
	const vector_type* pFil;
	vector_type sum1;//, sum2;
	struct main_cb* mcb = rcb->mcb;

	if(1 == pass) {
		// 1st pass filter is hb for 768000
		IQ_swap = true;
		dsample = 2;
		count = RTL_READ_COUNT;
		coeff_len = COEFF3072_H_16_LENGTH;
		buf = &(rcb->iq_buf[0]);
		out = &(rcb->iq_buf_final[COEFF1536_H_32_LENGTH * 2]);
	} else {
		IQ_swap = false;
		count = RTL_READ_COUNT / 2;
		coeff_len = mcb->length_fir;
		buf = &(rcb->iq_buf_final[0]);
		out = &(rcb->iqSamples[rcb->iqSamples_remaining * 2]);

		switch(mcb->output_rate) {
		case 48000:
			dsample = DOWNSAMPLE_192 * 2;
			break;

		case 96000:
			dsample = DOWNSAMPLE_192;
			break;

		case 192000:
			dsample = DOWNSAMPLE_192 / 2;
			break;

		case 384000:
			dsample = DOWNSAMPLE_192 / 4;
			break;
		}
	}

	orig_buf = buf;

#if defined(INCLUDE_NEON) || defined(INCLUDE_SSE2)

	// filter is evaluated for two I/Q samples with each iteration, thus use of 'j += 2'
	for(j = 0; j < count / 2; j += 2) {
		pSrc = buf;

		// filter coefficients. NOTE: Assumes coefficients are aligned to 16-byte boundary
		if(1 == pass)
			pFil = (const vector_type*) mcb->align3072_768_H;
		else {
			switch(mcb->output_rate) {
			case 48000:
				pFil = (const vector_type*) mcb->align1536_48_H;
				break;

			case 96000:
				pFil = (const vector_type*) mcb->align1536_96_H;
				break;

			case 192000:
				pFil = (const vector_type*) mcb->align1536_192_H;
				break;

			case 384000:
				pFil = (const vector_type*) mcb->align1536_384_H;
				break;
			}
		}

		sum1 = vector_zero;
//        sum1 = sum2 = vector_zero;

		for(i = 0; i < coeff_len / 8; i++) {
			// Unroll loop for efficiency & calculate filter for 2*2 I/Q samples
			// at each pass

			// sum1 is accu for 2*2 filtered I/Q data at the primary data offset
			// sum2 is accu for 2*2 filtered I/Q data for the next sample offset.
			sum1 = vector_mac(sum1, pSrc, pFil[0]);
//            sum2 = vector_mac(sum2, pSrc+2, pFil[0]);

			sum1 = vector_mac(sum1, pSrc + 4, pFil[1]);
//            sum2 = vector_mac(sum2, pSrc+6, pFil[1]);

			sum1 = vector_mac(sum1, pSrc + 8, pFil[2]);
//            sum2 = vector_mac(sum2, pSrc+10, pFil[2]);

			sum1 = vector_mac(sum1, pSrc + 12, pFil[3]);
//            sum2 = vector_mac(sum2, pSrc+14, pFil[3]);

			pSrc += 16;
			pFil += 4;
		}

		buf += 4;

		// Now sum1 and sum2 both have a filtered 2-channel sample each, but we still need
		// to sum the two hi- and lo-floats of these registers together. Only perform this
		// if we actually need it for the downsampled output data.
		// UPDATE: since we're throwing away sum2, don't bother calculating it.
		if(0 == ((j + 2) % dsample)) {
			// post-shuffle & add the filtered values and store to dest.
			vector_store(rcb->dest, sum1, sum1);
#if 0
			_mm_store_ps(rcb->dest, _mm_add_ps(_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1, 0, 3, 2)),       // s2_1 s2_0 s1_3 s1_2
			                                   _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3, 2, 1, 0))      // s2_3 s2_2 s1_1 s1_0
			                                  ));
#endif

			// Since we're decimating we only need one of the sums
			out[k++] = (IQ_swap) ? rcb->dest[0] : rcb->dest[1];
			out[k++] = (IQ_swap) ? rcb->dest[1] : rcb->dest[0];

#else // non SIMD

	for(j = 0; j < count; j += 2) {

		pSrc = buf;

		if(1 == pass)
			pFil = mcb->coeff_768;
		else {
			switch(mcb->output_rate) {
			case 48000:
				pFil = mcb->coeff_48;
				break;

			case 96000:
				pFil = mcb->coeff_96;
				break;

			case 192000:
				pFil = mcb->coeff_192;
				break;

			case 384000:
				pFil = mcb->coeff_384;
				break;
			}
		}

		sum1 = sum2 = 0.0f;

		for(i = 0; i < coeff_len; i++) {
			sum1 += pSrc[0] * pFil[0];
			sum2 += pSrc[1] * pFil[0];
			pSrc += 2;
			pFil++;
		}

		buf += 2;

		if(0 == (((j + 2) / 2) % dsample)) {
			out[k++] = (IQ_swap) ? sum2 : sum1;
			out[k++] = (IQ_swap) ? sum1 : sum2;
#endif
		}
	}

	// Move the last coeff_len*2 length of buffer to the front for the next call
	memmove(orig_buf, &orig_buf[count - (coeff_len * 2)],
	        coeff_len * 2 * sizeof(float));
}

void
downsample(struct rcvr_cb * rcb) {
	down_loop(rcb, 1);
	down_loop(rcb, 2);
}