Пример #1
0
void test_vcageQf32 (void)
{
    uint32x4_t out_uint32x4_t;
    float32x4_t arg0_float32x4_t;
    float32x4_t arg1_float32x4_t;

    out_uint32x4_t = vcageq_f32 (arg0_float32x4_t, arg1_float32x4_t);
}
Пример #2
0
int main (int argc, char **argv) {
  int c = 0;
  int i = 0;
  int j = 0;
  uint num_loops = 0;
  bool interrupt_flag = false;
  uint number_samples = 0;
  uint decim_rate = 0;
  uint fft_size = 0;
  float threshold = 0.0;
  double gain = 0.0;
  int threshold_exceeded = 0;
  float threshold_exceeded_mag = 0.0;
  int threshold_exceeded_index = 0;
  uint32_t start_decision;
  uint32_t stop_decision;
  uint32_t start_sensing;
  uint32_t stop_sensing;
  uint32_t start_overhead;
  uint32_t stop_overhead;
  uint32_t start_dma;
  uint32_t stop_dma;
  float dma_time[30];
  float sensing_time[30];
  float decision_time[30];
  float32x4_t floats_real;
  float32x4_t floats_imag;
  float32x4_t floats_real_sqr;
  float32x4_t floats_imag_sqr;
  float32x4_t floats_add;
  float32x4_t floats_sqroot;
  float32x4_t thresholds;
  uint32x4_t compares;
  uint32_t decisions[4096];
  fftwf_complex *in1;
  fftwf_complex out[8192];  // Must be 2x max FFT size
  fftwf_plan p1;
  struct crash_plblock *usrp_intf_tx;
  struct crash_plblock *usrp_intf_rx;

  // Parse command line arguments
  while (1) {
    static struct option long_options[] = {
      /* These options don't set a flag.
         We distinguish them by their indices. */
      {"interrupt",   no_argument,       0, 'i'},
      {"loop prog",   no_argument,       0, 'l'},
      {"decim",       required_argument, 0, 'd'},
      {"fft size",    required_argument, 0, 'k'},
      {"threshold",   required_argument, 0, 't'},
      {0, 0, 0, 0}
    };
    /* getopt_long stores the option index here. */
    int option_index = 0;
    // 'n' is the short option, ':' means it requires an argument
    c = getopt_long (argc, argv, "ild:k:t:",
                     long_options, &option_index);
    /* Detect the end of the options. */
    if (c == -1) break;

    switch (c) {
      case 'i':
        interrupt_flag = true;
        break;
      case 'l':
        loop_prog = 1;
        break;
      case 'd':
        decim_rate = atoi(optarg);
        break;
      case 'k':
        fft_size = (uint)ceil(log2((double)atoi(optarg)));
        break;
      case 't':
        threshold = atof(optarg);
        break;
      case '?':
        /* getopt_long already printed an error message. */
        break;
      default:
        abort ();
    }
  }
  /* Print any remaining command line arguments (not options). */
  if (optind < argc)
  {
    printf ("Invalid options:\n");
    while (optind < argc) {
      printf ("\t%s\n", argv[optind++]);
    }
    return -1;
  }

  if (decim_rate == 0) {
    printf("INFO: Decimation rate not specified, defaulting to 1\n");
    decim_rate = 1;
  }

  if (decim_rate > 2047) {
    printf("ERROR: Decimation rate too high\n");
    return -1;
  }

  if (fft_size == 0) {
    printf("INFO: FFT size not specified, defaulting to 256\n");
    fft_size = 8;
  }

  // FFT size cannot be greater than 4096 or less than 64
  if (fft_size > 13 || fft_size < 6) {
    printf("ERROR: FFT size cannot be greater than 4096 or less than 64\n");
    return -1;
  }

  if (threshold == 0.0) {
    printf("INFO: Threshold not set, default to 1.0\n");
    threshold = 1.0;
  }

  number_samples = (uint)pow(2.0,(double)fft_size);

  // Set Ctrl-C handler
  signal(SIGINT, ctrl_c);

  // Set this process to be real time
  //struct sched_param param;
  //param.sched_priority = 99;
  //if (sched_setscheduler(0, SCHED_FIFO, & param) != 0) {
  //    perror("sched_setscheduler");
  //    exit(EXIT_FAILURE);
  //}


  usrp_intf_tx = crash_open(USRP_INTF_PLBLOCK_ID,WRITE);
  if (usrp_intf_tx == 0) {
    printf("ERROR: Failed to allocate usrp_intf_tx plblock\n");
    return -1;
  }

  usrp_intf_rx = crash_open(USRP_INTF_PLBLOCK_ID,READ);
  if (usrp_intf_rx == 0) {
    crash_close(usrp_intf_rx);
    printf("ERROR: Failed to allocate usrp_intf_rx plblock\n");
    return -1;
  }

  in1 = (fftw_complex *)(usrp_intf_rx->dma_buff);

  start_overhead = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);
  stop_overhead = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);
  printf("Overhead (us): %f\n",(1e6/150e6)*(stop_overhead - start_overhead));

  do {
    // Set threshold for NEON instruction
    thresholds[0] = threshold;
    thresholds[1] = threshold;
    thresholds[2] = threshold;
    thresholds[3] = threshold;

    // Setup FFTW3
    p1 = fftwf_plan_dft_1d(fft_size, in1, out, FFTW_FORWARD, FFTW_ESTIMATE);

    // Global Reset to get us to a clean slate
    crash_reset(usrp_intf_tx);

    if (interrupt_flag == true) {
      crash_set_bit(usrp_intf_tx->regs,DMA_MM2S_INTERRUPT);
    }
    // Wait for USRP DDR interface to finish calibrating (due to reset). This is necessary
    // as the next steps recalibrate the interface and are ignored if issued while it is
    // currently calibrating.
    while(!crash_get_bit(usrp_intf_tx->regs,USRP_RX_CAL_COMPLETE));
    while(!crash_get_bit(usrp_intf_tx->regs,USRP_TX_CAL_COMPLETE));

    // Set RX phase
    crash_write_reg(usrp_intf_tx->regs,USRP_RX_PHASE_INIT,RX_PHASE_CAL);
    crash_set_bit(usrp_intf_tx->regs,USRP_RX_RESET_CAL);
    //printf("RX PHASE INIT: %d\n",crash_read_reg(usrp_intf_tx->regs,USRP_RX_PHASE_INIT));
    while(!crash_get_bit(usrp_intf_tx->regs,USRP_RX_CAL_COMPLETE));

    // Set TX phase
    crash_write_reg(usrp_intf_tx->regs,USRP_TX_PHASE_INIT,TX_PHASE_CAL);
    crash_set_bit(usrp_intf_tx->regs,USRP_TX_RESET_CAL);
    //printf("TX PHASE INIT: %d\n",crash_read_reg(usrp_intf_tx->regs,USRP_TX_PHASE_INIT));
    while(!crash_get_bit(usrp_intf_tx->regs,USRP_TX_CAL_COMPLETE));

    // Set USRP TX / RX Modes
    while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY));
    crash_write_reg(usrp_intf_tx->regs,USRP_USRP_MODE_CTRL,CMD_TX_MODE + TX_DAC_RAW_MODE);
    while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY));
    while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY));
    crash_write_reg(usrp_intf_tx->regs,USRP_USRP_MODE_CTRL,CMD_RX_MODE + RX_ADC_DSP_MODE);
    while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY));

    // Setup RX path
    crash_set_bit(usrp_intf_tx->regs, USRP_RX_FIFO_BYPASS);                       // Bypass RX FIFO so stale data in the FIFO does not cause latency
    crash_write_reg(usrp_intf_tx->regs, USRP_AXIS_MASTER_TDEST, DMA_PLBLOCK_ID);  // Set tdest to spec_sense
    crash_write_reg(usrp_intf_tx->regs, USRP_RX_PACKET_SIZE, number_samples);     // Set packet size
    crash_clear_bit(usrp_intf_tx->regs, USRP_RX_FIX2FLOAT_BYPASS);                // Do not bypass fix2float
    if (decim_rate == 1) {
      crash_set_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS);                      // Bypass CIC Filter
      crash_set_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS);                       // Bypass HB Filter
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, 1);                       // Set gain = 1
    } else if (decim_rate == 2) {
      crash_set_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS);                      // Bypass CIC Filter
      crash_clear_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS);                     // Enable HB Filter
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, 1);                       // Set gain = 1
    // Even, use both CIC and Halfband filters
    } else if ((decim_rate % 2) == 0) {
      crash_clear_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS);                    // Enable CIC Filter
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_CIC_DECIM, decim_rate/2);       // Set CIC decimation rate (div by 2 as we are using HB filter)
      crash_clear_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS);                     // Enable HB Filter
      // Offset CIC bit growth. A 32-bit multiplier in the receive chain allows us
      // to scale the CIC output.
      gain = 26.0-3.0*log2(decim_rate/2);
      gain = (gain > 1.0) ? (ceil(pow(2.0,gain))) : (1.0);                        // Do not allow gain to be set to 0
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, (uint32_t)gain);          // Set gain
    // Odd, use only CIC filter
    } else {
      crash_clear_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS);                    // Enable CIC Filter
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_CIC_DECIM, decim_rate);         // Set CIC decimation rate
      crash_set_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS);                       // Bypass HB Filter
      //
      gain = 26.0-3.0*log2(decim_rate);
      gain = (gain > 1.0) ? (ceil(pow(2.0,gain))) : (1.0);                        // Do not allow gain to be set to 0
      crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, (uint32_t)gain);          // Set gain
    }

    // Setup TX path
    crash_clear_bit(usrp_intf_tx->regs, USRP_TX_FIX2FLOAT_BYPASS);                // Do not bypass fix2float
    crash_set_bit(usrp_intf_tx->regs, USRP_TX_CIC_BYPASS);                        // Bypass CIC Filter
    crash_set_bit(usrp_intf_tx->regs, USRP_TX_HB_BYPASS);                         // Bypass HB Filter
    crash_write_reg(usrp_intf_tx->regs, USRP_TX_GAIN, 1);                         // Set gain = 1

    // Create a CW signal to transmit
    float *tx_sample = (float*)(usrp_intf_tx->dma_buff);
    for (i = 0; i < 4095; i++) {
      tx_sample[2*i+1] = 0;
      tx_sample[2*i] = 0.5;
    }
    tx_sample[2*4095+1] = 0;
    tx_sample[2*4095] = 0;

    // Load waveform into TX FIFO so it can immediately trigger
    crash_write(usrp_intf_tx, USRP_INTF_PLBLOCK_ID, number_samples);

    crash_set_bit(usrp_intf_tx->regs,USRP_RX_ENABLE);                             // Enable RX

    // First, loop until threshold is exceeded
    j = 0;
    while (threshold_exceeded == 0) {
      crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples);
      // Run FFT
      fftwf_execute(p1);
      for (i = 0; i < number_samples/4; i++) {
        // Calculate sqrt(I^2 + Q^2)
        floats_real[0] = out[4*i][0];
        floats_real[1] = out[4*i+1][0];
        floats_real[2] = out[4*i+2][0];
        floats_real[3] = out[4*i+3][0];
        floats_real_sqr = vmulq_f32(floats_real, floats_real);
        floats_imag[0] = out[4*i][1];
        floats_imag[1] = out[4*i+1][1];
        floats_imag[2] = out[4*i+2][1];
        floats_imag[3] = out[4*i+3][1];
        floats_imag_sqr = vmulq_f32(floats_imag, floats_imag);
        floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr);
        floats_sqroot[0] = sqrt(floats_add[0]);
        floats_sqroot[1] = sqrt(floats_add[1]);
        floats_sqroot[2] = sqrt(floats_add[2]);
        floats_sqroot[3] = sqrt(floats_add[3]);
        compares = vcageq_f32(floats_sqroot,thresholds);
        if (compares[0] == -1) {
          // Do not break loop
          threshold_exceeded = 1;
          // Save threshold data
          threshold_exceeded_mag = floats_sqroot[0];
          threshold_exceeded_index = 4*i;
          break;
        } else if (compares[1] == -1) {
          // Do not break loop
          threshold_exceeded = 1;
          // Save threshold data
          threshold_exceeded_mag = floats_sqroot[1];
          threshold_exceeded_index = 4*i+1;
          break;
        } else if (compares[2] == -1) {
          // Do not break loop
          threshold_exceeded = 1;
          // Save threshold data
          threshold_exceeded_mag = floats_sqroot[2];
          threshold_exceeded_index = 4*i+2;
          break;
        } else if (compares[3] == -1) {
          // Do not break loop
          threshold_exceeded = 1;
          // Save threshold data
          threshold_exceeded_mag = floats_sqroot[3];
          threshold_exceeded_index = 4*i+3;
          break;
        }
      }
      if (j > 10) {
        printf("TIMEOUT: Threshold never exceeded\n");
        goto cleanup;
      }
      j++;
      sleep(1);
    }

    // Second, perform specturm sensing and the spectrum decision
    while (threshold_exceeded == 1) {
      threshold_exceeded = 0;
      crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples);
      // Run FFT
      fftwf_execute(p1);
      for (i = 0; i < number_samples/4; i++) {
        // Calculate sqrt(I^2 + Q^2)
        floats_real[0] = out[4*i][0];
        floats_real[1] = out[4*i+1][0];
        floats_real[2] = out[4*i+2][0];
        floats_real[3] = out[4*i+3][0];
        floats_real_sqr = vmulq_f32(floats_real, floats_real);
        floats_imag[0] = out[4*i][1];
        floats_imag[1] = out[4*i+1][1];
        floats_imag[2] = out[4*i+2][1];
        floats_imag[3] = out[4*i+3][1];
        floats_imag_sqr = vmulq_f32(floats_imag, floats_imag);
        floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr);
        floats_sqroot[0] = sqrt(floats_add[0]);
        floats_sqroot[1] = sqrt(floats_add[1]);
        floats_sqroot[2] = sqrt(floats_add[2]);
        floats_sqroot[3] = sqrt(floats_add[3]);
        compares = vcageq_f32(floats_sqroot,thresholds);
        // Was the threshold exceeded?
        if (compares[0] == -1 || compares[1] == -1 || compares[2] == -1 || compares[3] == -1) {
          // Do not break loop
          threshold_exceeded = 1;
          break;
        }
      }
      if (threshold_exceeded == 0) {
        // Enable TX
        crash_set_bit(usrp_intf_tx->regs,USRP_TX_ENABLE);
      }
    }

    // Calculate how long the DMA and the thresholding took by using a counter in the FPGA
    // running at 150 MHz.
    start_dma = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);
    crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples);
    stop_dma = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);

    // Set a huge threshold so we have to examine every bin
    thresholds[0] = 1000000000.0;
    thresholds[1] = 1000000000.0;
    thresholds[2] = 1000000000.0;
    thresholds[3] = 1000000000.0;
    start_sensing = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);
    fftwf_execute(p1);
    for (i = 0; i < number_samples/4; i++) {
      floats_real[0] = out[4*i][0];
      floats_real[1] = out[4*i+1][0];
      floats_real[2] = out[4*i+2][0];
      floats_real[3] = out[4*i+3][0];
      floats_real_sqr = vmulq_f32(floats_real, floats_real);
      floats_imag[0] = out[4*i][1];
      floats_imag[1] = out[4*i+1][1];
      floats_imag[2] = out[4*i+2][1];
      floats_imag[3] = out[4*i+3][1];
      floats_imag_sqr = vmulq_f32(floats_imag, floats_imag);
      floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr);
      floats_sqroot[0] = sqrt(floats_add[0]);
      floats_sqroot[1] = sqrt(floats_add[1]);
      floats_sqroot[2] = sqrt(floats_add[2]);
      floats_sqroot[3] = sqrt(floats_add[3]);
      compares = vcageq_f32(floats_sqroot,thresholds);
      decisions[4*i] = compares[0];
      decisions[4*i+1] = compares[1];
      decisions[4*i+2] = compares[2];
      decisions[4*i+3] = compares[3];
    }
    stop_sensing = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);

    start_decision = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);
    for (i = 0; i < number_samples; i++) {
        if (decisions[i] == -1) {
        printf("This shouldn't happen\n");
      }
    }
    stop_decision = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT);

    // Print threshold information
    printf("Threshold:\t\t\t%f\n",threshold);
    printf("Threshold Exceeded Index:\t%d\n",threshold_exceeded_index);
    printf("Threshold Exceeded Mag:\t\t%f\n",threshold_exceeded_mag);
    printf("DMA Time (us): %f\n",(1e6/150e6)*(stop_dma - start_dma));
    printf("Sensing Time (us): %f\n",(1e6/150e6)*(stop_sensing - start_sensing));
    printf("Decision Time (us): %f\n",(1e6/150e6)*(stop_decision - start_decision));

    // Keep track of times so we can report an average at the end
    if (num_loops < 30) {
      dma_time[num_loops] = (1e6/150e6)*(stop_dma - start_dma);
      sensing_time[num_loops] = (1e6/150e6)*(stop_sensing - start_sensing);
      decision_time[num_loops] = (1e6/150e6)*(stop_decision - start_decision);
    }
    num_loops++;

    if (loop_prog == 1) {
      printf("Ctrl-C to end program after this loop\n");
    }

    // Force printf to flush since. We are at a real-time priority, so it cannot unless we force it.
    fflush(stdout);
    //if (nanosleep(&ask_sleep,&act_sleep) < 0) {
    //    perror("nanosleep");
    //    exit(EXIT_FAILURE);
    //}

cleanup:
    crash_clear_bit(usrp_intf_tx->regs,USRP_RX_ENABLE);                           // Disable RX
    crash_clear_bit(usrp_intf_tx->regs,USRP_TX_ENABLE);                           // Disable TX
    threshold_exceeded = 0;
    threshold_exceeded_mag = 0.0;
    threshold_exceeded_index = 0;
    fftwf_destroy_plan(p1);
    sleep(1);
  } while (loop_prog == 1);

  float dma_time_avg = 0.0;
  float sensing_time_avg = 0.0;
  float decision_time_avg = 0.0;
  if (num_loops > 30) {
    for (i = 0; i < 30; i++) {
      dma_time_avg += dma_time[i];
      sensing_time_avg += sensing_time[i];
      decision_time_avg += decision_time[i];
    }
    dma_time_avg = dma_time_avg/30;
    sensing_time_avg = sensing_time_avg/30;
    decision_time_avg = decision_time_avg/30;
  } else {
    for (i = 0; i < num_loops; i++) {
      dma_time_avg += dma_time[i];
      sensing_time_avg += sensing_time[i];
      decision_time_avg += decision_time[i];
    }
    dma_time_avg = dma_time_avg/num_loops;
    sensing_time_avg = sensing_time_avg/num_loops;
    decision_time_avg = decision_time_avg/num_loops;
  }
  printf("Number of loops: %d\n",num_loops);
  printf("Average DMA time (us): %f\n",dma_time_avg);
  printf("Average Sensing time (us): %f\n",sensing_time_avg);
  printf("Average Decision time (us): %f\n",decision_time_avg);

  crash_close(usrp_intf_tx);
  crash_close(usrp_intf_rx);
  return 0;
}