void test_vcageQf32 (void) { uint32x4_t out_uint32x4_t; float32x4_t arg0_float32x4_t; float32x4_t arg1_float32x4_t; out_uint32x4_t = vcageq_f32 (arg0_float32x4_t, arg1_float32x4_t); }
int main (int argc, char **argv) { int c = 0; int i = 0; int j = 0; uint num_loops = 0; bool interrupt_flag = false; uint number_samples = 0; uint decim_rate = 0; uint fft_size = 0; float threshold = 0.0; double gain = 0.0; int threshold_exceeded = 0; float threshold_exceeded_mag = 0.0; int threshold_exceeded_index = 0; uint32_t start_decision; uint32_t stop_decision; uint32_t start_sensing; uint32_t stop_sensing; uint32_t start_overhead; uint32_t stop_overhead; uint32_t start_dma; uint32_t stop_dma; float dma_time[30]; float sensing_time[30]; float decision_time[30]; float32x4_t floats_real; float32x4_t floats_imag; float32x4_t floats_real_sqr; float32x4_t floats_imag_sqr; float32x4_t floats_add; float32x4_t floats_sqroot; float32x4_t thresholds; uint32x4_t compares; uint32_t decisions[4096]; fftwf_complex *in1; fftwf_complex out[8192]; // Must be 2x max FFT size fftwf_plan p1; struct crash_plblock *usrp_intf_tx; struct crash_plblock *usrp_intf_rx; // Parse command line arguments while (1) { static struct option long_options[] = { /* These options don't set a flag. We distinguish them by their indices. */ {"interrupt", no_argument, 0, 'i'}, {"loop prog", no_argument, 0, 'l'}, {"decim", required_argument, 0, 'd'}, {"fft size", required_argument, 0, 'k'}, {"threshold", required_argument, 0, 't'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; // 'n' is the short option, ':' means it requires an argument c = getopt_long (argc, argv, "ild:k:t:", long_options, &option_index); /* Detect the end of the options. */ if (c == -1) break; switch (c) { case 'i': interrupt_flag = true; break; case 'l': loop_prog = 1; break; case 'd': decim_rate = atoi(optarg); break; case 'k': fft_size = (uint)ceil(log2((double)atoi(optarg))); break; case 't': threshold = atof(optarg); break; case '?': /* getopt_long already printed an error message. */ break; default: abort (); } } /* Print any remaining command line arguments (not options). */ if (optind < argc) { printf ("Invalid options:\n"); while (optind < argc) { printf ("\t%s\n", argv[optind++]); } return -1; } if (decim_rate == 0) { printf("INFO: Decimation rate not specified, defaulting to 1\n"); decim_rate = 1; } if (decim_rate > 2047) { printf("ERROR: Decimation rate too high\n"); return -1; } if (fft_size == 0) { printf("INFO: FFT size not specified, defaulting to 256\n"); fft_size = 8; } // FFT size cannot be greater than 4096 or less than 64 if (fft_size > 13 || fft_size < 6) { printf("ERROR: FFT size cannot be greater than 4096 or less than 64\n"); return -1; } if (threshold == 0.0) { printf("INFO: Threshold not set, default to 1.0\n"); threshold = 1.0; } number_samples = (uint)pow(2.0,(double)fft_size); // Set Ctrl-C handler signal(SIGINT, ctrl_c); // Set this process to be real time //struct sched_param param; //param.sched_priority = 99; //if (sched_setscheduler(0, SCHED_FIFO, & param) != 0) { // perror("sched_setscheduler"); // exit(EXIT_FAILURE); //} usrp_intf_tx = crash_open(USRP_INTF_PLBLOCK_ID,WRITE); if (usrp_intf_tx == 0) { printf("ERROR: Failed to allocate usrp_intf_tx plblock\n"); return -1; } usrp_intf_rx = crash_open(USRP_INTF_PLBLOCK_ID,READ); if (usrp_intf_rx == 0) { crash_close(usrp_intf_rx); printf("ERROR: Failed to allocate usrp_intf_rx plblock\n"); return -1; } in1 = (fftw_complex *)(usrp_intf_rx->dma_buff); start_overhead = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); stop_overhead = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); printf("Overhead (us): %f\n",(1e6/150e6)*(stop_overhead - start_overhead)); do { // Set threshold for NEON instruction thresholds[0] = threshold; thresholds[1] = threshold; thresholds[2] = threshold; thresholds[3] = threshold; // Setup FFTW3 p1 = fftwf_plan_dft_1d(fft_size, in1, out, FFTW_FORWARD, FFTW_ESTIMATE); // Global Reset to get us to a clean slate crash_reset(usrp_intf_tx); if (interrupt_flag == true) { crash_set_bit(usrp_intf_tx->regs,DMA_MM2S_INTERRUPT); } // Wait for USRP DDR interface to finish calibrating (due to reset). This is necessary // as the next steps recalibrate the interface and are ignored if issued while it is // currently calibrating. while(!crash_get_bit(usrp_intf_tx->regs,USRP_RX_CAL_COMPLETE)); while(!crash_get_bit(usrp_intf_tx->regs,USRP_TX_CAL_COMPLETE)); // Set RX phase crash_write_reg(usrp_intf_tx->regs,USRP_RX_PHASE_INIT,RX_PHASE_CAL); crash_set_bit(usrp_intf_tx->regs,USRP_RX_RESET_CAL); //printf("RX PHASE INIT: %d\n",crash_read_reg(usrp_intf_tx->regs,USRP_RX_PHASE_INIT)); while(!crash_get_bit(usrp_intf_tx->regs,USRP_RX_CAL_COMPLETE)); // Set TX phase crash_write_reg(usrp_intf_tx->regs,USRP_TX_PHASE_INIT,TX_PHASE_CAL); crash_set_bit(usrp_intf_tx->regs,USRP_TX_RESET_CAL); //printf("TX PHASE INIT: %d\n",crash_read_reg(usrp_intf_tx->regs,USRP_TX_PHASE_INIT)); while(!crash_get_bit(usrp_intf_tx->regs,USRP_TX_CAL_COMPLETE)); // Set USRP TX / RX Modes while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY)); crash_write_reg(usrp_intf_tx->regs,USRP_USRP_MODE_CTRL,CMD_TX_MODE + TX_DAC_RAW_MODE); while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY)); while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY)); crash_write_reg(usrp_intf_tx->regs,USRP_USRP_MODE_CTRL,CMD_RX_MODE + RX_ADC_DSP_MODE); while(crash_get_bit(usrp_intf_tx->regs,USRP_UART_BUSY)); // Setup RX path crash_set_bit(usrp_intf_tx->regs, USRP_RX_FIFO_BYPASS); // Bypass RX FIFO so stale data in the FIFO does not cause latency crash_write_reg(usrp_intf_tx->regs, USRP_AXIS_MASTER_TDEST, DMA_PLBLOCK_ID); // Set tdest to spec_sense crash_write_reg(usrp_intf_tx->regs, USRP_RX_PACKET_SIZE, number_samples); // Set packet size crash_clear_bit(usrp_intf_tx->regs, USRP_RX_FIX2FLOAT_BYPASS); // Do not bypass fix2float if (decim_rate == 1) { crash_set_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS); // Bypass CIC Filter crash_set_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS); // Bypass HB Filter crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, 1); // Set gain = 1 } else if (decim_rate == 2) { crash_set_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS); // Bypass CIC Filter crash_clear_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS); // Enable HB Filter crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, 1); // Set gain = 1 // Even, use both CIC and Halfband filters } else if ((decim_rate % 2) == 0) { crash_clear_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS); // Enable CIC Filter crash_write_reg(usrp_intf_tx->regs, USRP_RX_CIC_DECIM, decim_rate/2); // Set CIC decimation rate (div by 2 as we are using HB filter) crash_clear_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS); // Enable HB Filter // Offset CIC bit growth. A 32-bit multiplier in the receive chain allows us // to scale the CIC output. gain = 26.0-3.0*log2(decim_rate/2); gain = (gain > 1.0) ? (ceil(pow(2.0,gain))) : (1.0); // Do not allow gain to be set to 0 crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, (uint32_t)gain); // Set gain // Odd, use only CIC filter } else { crash_clear_bit(usrp_intf_tx->regs, USRP_RX_CIC_BYPASS); // Enable CIC Filter crash_write_reg(usrp_intf_tx->regs, USRP_RX_CIC_DECIM, decim_rate); // Set CIC decimation rate crash_set_bit(usrp_intf_tx->regs, USRP_RX_HB_BYPASS); // Bypass HB Filter // gain = 26.0-3.0*log2(decim_rate); gain = (gain > 1.0) ? (ceil(pow(2.0,gain))) : (1.0); // Do not allow gain to be set to 0 crash_write_reg(usrp_intf_tx->regs, USRP_RX_GAIN, (uint32_t)gain); // Set gain } // Setup TX path crash_clear_bit(usrp_intf_tx->regs, USRP_TX_FIX2FLOAT_BYPASS); // Do not bypass fix2float crash_set_bit(usrp_intf_tx->regs, USRP_TX_CIC_BYPASS); // Bypass CIC Filter crash_set_bit(usrp_intf_tx->regs, USRP_TX_HB_BYPASS); // Bypass HB Filter crash_write_reg(usrp_intf_tx->regs, USRP_TX_GAIN, 1); // Set gain = 1 // Create a CW signal to transmit float *tx_sample = (float*)(usrp_intf_tx->dma_buff); for (i = 0; i < 4095; i++) { tx_sample[2*i+1] = 0; tx_sample[2*i] = 0.5; } tx_sample[2*4095+1] = 0; tx_sample[2*4095] = 0; // Load waveform into TX FIFO so it can immediately trigger crash_write(usrp_intf_tx, USRP_INTF_PLBLOCK_ID, number_samples); crash_set_bit(usrp_intf_tx->regs,USRP_RX_ENABLE); // Enable RX // First, loop until threshold is exceeded j = 0; while (threshold_exceeded == 0) { crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples); // Run FFT fftwf_execute(p1); for (i = 0; i < number_samples/4; i++) { // Calculate sqrt(I^2 + Q^2) floats_real[0] = out[4*i][0]; floats_real[1] = out[4*i+1][0]; floats_real[2] = out[4*i+2][0]; floats_real[3] = out[4*i+3][0]; floats_real_sqr = vmulq_f32(floats_real, floats_real); floats_imag[0] = out[4*i][1]; floats_imag[1] = out[4*i+1][1]; floats_imag[2] = out[4*i+2][1]; floats_imag[3] = out[4*i+3][1]; floats_imag_sqr = vmulq_f32(floats_imag, floats_imag); floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr); floats_sqroot[0] = sqrt(floats_add[0]); floats_sqroot[1] = sqrt(floats_add[1]); floats_sqroot[2] = sqrt(floats_add[2]); floats_sqroot[3] = sqrt(floats_add[3]); compares = vcageq_f32(floats_sqroot,thresholds); if (compares[0] == -1) { // Do not break loop threshold_exceeded = 1; // Save threshold data threshold_exceeded_mag = floats_sqroot[0]; threshold_exceeded_index = 4*i; break; } else if (compares[1] == -1) { // Do not break loop threshold_exceeded = 1; // Save threshold data threshold_exceeded_mag = floats_sqroot[1]; threshold_exceeded_index = 4*i+1; break; } else if (compares[2] == -1) { // Do not break loop threshold_exceeded = 1; // Save threshold data threshold_exceeded_mag = floats_sqroot[2]; threshold_exceeded_index = 4*i+2; break; } else if (compares[3] == -1) { // Do not break loop threshold_exceeded = 1; // Save threshold data threshold_exceeded_mag = floats_sqroot[3]; threshold_exceeded_index = 4*i+3; break; } } if (j > 10) { printf("TIMEOUT: Threshold never exceeded\n"); goto cleanup; } j++; sleep(1); } // Second, perform specturm sensing and the spectrum decision while (threshold_exceeded == 1) { threshold_exceeded = 0; crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples); // Run FFT fftwf_execute(p1); for (i = 0; i < number_samples/4; i++) { // Calculate sqrt(I^2 + Q^2) floats_real[0] = out[4*i][0]; floats_real[1] = out[4*i+1][0]; floats_real[2] = out[4*i+2][0]; floats_real[3] = out[4*i+3][0]; floats_real_sqr = vmulq_f32(floats_real, floats_real); floats_imag[0] = out[4*i][1]; floats_imag[1] = out[4*i+1][1]; floats_imag[2] = out[4*i+2][1]; floats_imag[3] = out[4*i+3][1]; floats_imag_sqr = vmulq_f32(floats_imag, floats_imag); floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr); floats_sqroot[0] = sqrt(floats_add[0]); floats_sqroot[1] = sqrt(floats_add[1]); floats_sqroot[2] = sqrt(floats_add[2]); floats_sqroot[3] = sqrt(floats_add[3]); compares = vcageq_f32(floats_sqroot,thresholds); // Was the threshold exceeded? if (compares[0] == -1 || compares[1] == -1 || compares[2] == -1 || compares[3] == -1) { // Do not break loop threshold_exceeded = 1; break; } } if (threshold_exceeded == 0) { // Enable TX crash_set_bit(usrp_intf_tx->regs,USRP_TX_ENABLE); } } // Calculate how long the DMA and the thresholding took by using a counter in the FPGA // running at 150 MHz. start_dma = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); crash_read(usrp_intf_rx, USRP_INTF_PLBLOCK_ID, number_samples); stop_dma = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); // Set a huge threshold so we have to examine every bin thresholds[0] = 1000000000.0; thresholds[1] = 1000000000.0; thresholds[2] = 1000000000.0; thresholds[3] = 1000000000.0; start_sensing = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); fftwf_execute(p1); for (i = 0; i < number_samples/4; i++) { floats_real[0] = out[4*i][0]; floats_real[1] = out[4*i+1][0]; floats_real[2] = out[4*i+2][0]; floats_real[3] = out[4*i+3][0]; floats_real_sqr = vmulq_f32(floats_real, floats_real); floats_imag[0] = out[4*i][1]; floats_imag[1] = out[4*i+1][1]; floats_imag[2] = out[4*i+2][1]; floats_imag[3] = out[4*i+3][1]; floats_imag_sqr = vmulq_f32(floats_imag, floats_imag); floats_add = vaddq_f32(floats_real_sqr,floats_imag_sqr); floats_sqroot[0] = sqrt(floats_add[0]); floats_sqroot[1] = sqrt(floats_add[1]); floats_sqroot[2] = sqrt(floats_add[2]); floats_sqroot[3] = sqrt(floats_add[3]); compares = vcageq_f32(floats_sqroot,thresholds); decisions[4*i] = compares[0]; decisions[4*i+1] = compares[1]; decisions[4*i+2] = compares[2]; decisions[4*i+3] = compares[3]; } stop_sensing = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); start_decision = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); for (i = 0; i < number_samples; i++) { if (decisions[i] == -1) { printf("This shouldn't happen\n"); } } stop_decision = crash_read_reg(usrp_intf_tx->regs,DMA_DEBUG_CNT); // Print threshold information printf("Threshold:\t\t\t%f\n",threshold); printf("Threshold Exceeded Index:\t%d\n",threshold_exceeded_index); printf("Threshold Exceeded Mag:\t\t%f\n",threshold_exceeded_mag); printf("DMA Time (us): %f\n",(1e6/150e6)*(stop_dma - start_dma)); printf("Sensing Time (us): %f\n",(1e6/150e6)*(stop_sensing - start_sensing)); printf("Decision Time (us): %f\n",(1e6/150e6)*(stop_decision - start_decision)); // Keep track of times so we can report an average at the end if (num_loops < 30) { dma_time[num_loops] = (1e6/150e6)*(stop_dma - start_dma); sensing_time[num_loops] = (1e6/150e6)*(stop_sensing - start_sensing); decision_time[num_loops] = (1e6/150e6)*(stop_decision - start_decision); } num_loops++; if (loop_prog == 1) { printf("Ctrl-C to end program after this loop\n"); } // Force printf to flush since. We are at a real-time priority, so it cannot unless we force it. fflush(stdout); //if (nanosleep(&ask_sleep,&act_sleep) < 0) { // perror("nanosleep"); // exit(EXIT_FAILURE); //} cleanup: crash_clear_bit(usrp_intf_tx->regs,USRP_RX_ENABLE); // Disable RX crash_clear_bit(usrp_intf_tx->regs,USRP_TX_ENABLE); // Disable TX threshold_exceeded = 0; threshold_exceeded_mag = 0.0; threshold_exceeded_index = 0; fftwf_destroy_plan(p1); sleep(1); } while (loop_prog == 1); float dma_time_avg = 0.0; float sensing_time_avg = 0.0; float decision_time_avg = 0.0; if (num_loops > 30) { for (i = 0; i < 30; i++) { dma_time_avg += dma_time[i]; sensing_time_avg += sensing_time[i]; decision_time_avg += decision_time[i]; } dma_time_avg = dma_time_avg/30; sensing_time_avg = sensing_time_avg/30; decision_time_avg = decision_time_avg/30; } else { for (i = 0; i < num_loops; i++) { dma_time_avg += dma_time[i]; sensing_time_avg += sensing_time[i]; decision_time_avg += decision_time[i]; } dma_time_avg = dma_time_avg/num_loops; sensing_time_avg = sensing_time_avg/num_loops; decision_time_avg = decision_time_avg/num_loops; } printf("Number of loops: %d\n",num_loops); printf("Average DMA time (us): %f\n",dma_time_avg); printf("Average Sensing time (us): %f\n",sensing_time_avg); printf("Average Decision time (us): %f\n",decision_time_avg); crash_close(usrp_intf_tx); crash_close(usrp_intf_rx); return 0; }