void pricing(hls::stream<float> in, hls::stream<float> out, hls::stream<float> out2, float strike_price) { //#pragma HLS PIPELINE II=1 const int BLOCK = 64; static ap_uint<32> res_cnt[BLOCK]; static float res_sum[BLOCK]; static float res_prod[BLOCK]; for (int i = 0; i < BLOCK; ++i) { #pragma HLS PIPELINE II=1 float path = in.read(); float res = max_0(hls::expf(path) - strike_price); ap_uint<32> l_cnt = res_cnt[i]; float l_sum = res_sum[i]; float l_prod = res_prod[i]; ap_uint<32> n_cnt = l_cnt + 1; float delta = res - l_sum; float n_sum = l_sum + delta / n_cnt; float n_prod = l_prod + delta * (res - n_sum); res_cnt[i] = n_cnt; res_sum[i] = n_sum; res_prod[i] = n_prod; out.write(res_sum[i]); out2.write(res_prod[i]); } //std::max(0.f, hls::expf(path) - strike_price); }
/** * Write the directions as u32 to the output AXI stream */ void writeDirections(hls::stream<uint32>& out, uint16 numDirections) { #pragma HLS INLINE out.write(numDirections); uint16 directionIx = 0; writeDirectionsLoop: for (uint8 i = 0; i < MAX_NUM_DIRECTIONS / DIRECTIONS_IN_BUS; i++) { #pragma HLS LOOP_TRIPCOUNT min=8 max=32 // Min: MIN_NUM_DIRECTIONS / DIRECTIONS_IN_BUS (8) Max: MAX_NUM_DIRECTIONS / DIRECTIONS_IN_BUS (32) Actual: numDirections uint32 output = 0; // Compress from a series of u8 to a smaller series of u32 with padding writeDirectionsShiftLoop: for (uint8 j = 0; j < DIRECTIONS_IN_BUS; j++) { #pragma HLS UNROLL output <<= DIRECTION_SIZE; uint8 padChar = 0; output |= i * DIRECTIONS_IN_BUS + j < numDirections ? directions[directionIx + j] : padChar; } out.write(output); directionIx += DIRECTIONS_IN_BUS; if (directionIx >= numDirections) { break; } } }
void toplevel(hls::stream<uint32>& in, hls::stream<uint32>& out) { #pragma HLS INTERFACE ap_fifo port=in #pragma HLS INTERFACE ap_fifo port=out #pragma HLS RESOURCE variable=in core=AXI4Stream #pragma HLS RESOURCE variable=out core=AXI4Stream #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS ARRAY_PARTITION variable=openings complete dim=1 #pragma HLS ARRAY_PARTITION variable=inGrid complete dim=1 #pragma HLS ARRAY_MAP variable=directions instance=instance1 horizontal #pragma HLS ARRAY_MAP variable=tile instance=instance1 horizontal uint8 tileCoords = in.read(); uint8 tileSize = in.read(); uint8 tileDataLen = in.read(); // Number of 32-bit bits readData(in, tileDataLen); uint8 numOpenings = findOpenings(tileSize); out.write(tileCoords); out.write(numOpenings); if (numOpenings > 0) { writeEntrance(out); findDeadEnds(tileSize); uint16 numDirections = findPath(tileSize); writeDirections(out, numDirections); } }
void antithetic( hls::stream<float> &rn_in, hls::stream<float> &rn_out_1, hls::stream<float> &rn_out_2) { #pragma HLS interface ap_fifo port=rn_in #pragma HLS resource core=AXI4Stream variable=rn_in #pragma HLS interface ap_fifo port=rn_out_1 #pragma HLS resource core=AXI4Stream variable=rn_out_1 #pragma HLS interface ap_fifo port=rn_out_2 #pragma HLS resource core=AXI4Stream variable=rn_out_2 #pragma HLS interface ap_ctrl_none port=return //for (int i = 0; i < 10 / 2; ++i) { { //while (true) { #pragma HLS PIPELINE II=2 float r1 = rn_in.read(); float r2 = rn_in.read(); rn_out_1.write(r1); rn_out_2.write(r2); rn_out_1.write(negate(r1)); rn_out_2.write(negate(r2)); } }
// ----------------------------------------------------------- void lsupdate2SW_step ( const uint32_t step, const uint32_t stepsMC, const uint32_t pathsMC, const float discount, hls::stream<float> &contin_in, hls::stream<float> &payoff_in, hls::stream<float> &cashFlow_in, hls::stream<float> &cashFlow_out, hls::stream<float> &cashFlowDisc_out, hls::stream<float> &toAccum_out ) { printf("lsupdate2SW_step\n"); /** zerosLoop:for(uint32_t path=0; path<pathsMC; ++path) { #pragma HLS PIPELINE II=1 enable_flush // --------------------------------- // write to outputs cashFlow_out.write((float) 0.0f); cashFlowDisc_out.write((float) 0.0f); } */ //stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step) { pathsLoop:for(uint32_t path=0; path<pathsMC; ++path) { float continuation = contin_in.read(); float payoff = payoff_in.read(); float cashFlow = cashFlow_in.read(); float discountedCashFlow = discount * cashFlow; // --------------------------------- float newY; if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) ) newY = payoff; else newY = discountedCashFlow; // --------------------------------- // write to outputs if(step < stepsMC) cashFlow_out.write(newY); if(step < stepsMC) cashFlowDisc_out.write(discount * newY); if(step == stepsMC) toAccum_out.write(newY); } } return; }
void dut( hls::stream<bit32_t> &strm_in, hls::stream<bit32_t> &strm_out ) { // Declare the input and output variables complex<float> out[4096]; complex<float> complex_In1[4096]; complex<float> complex_In2[4096]; float input_data_re = 0; //------------------------------------------------------- // Input processing //------------------------------------------------------- // Read the two input 32-bit words bit32_t input1_lo; bit32_t input2_hi; bit32_t output_r; bit32_t output_i; for(int i = 0; i < 4096 ;i++) { input1_lo = strm_in.read(); input2_hi = strm_in.read(); input_data_re = input1_lo; complex_In1[i] = complex<float>(input_data_re, 0); input_data_re = input2_hi; complex_In2[i] = complex<float>(input_data_re, 0); } // for(int m = 0; m < 4096 ;m++) // { //// input_data_re = in1[m]; // complex_In1[m] = complex<float>(80, 0); //// input_data_re = in2[m]; // complex_In2[m] = complex<float>(80, 0); // } // ------------------------------------------------------ // Call Hybrid Imaging // ------------------------------------------------------ hybrid_image(12, complex_In1, complex_In2, out ); // ------------------------------------------------------ // Output processing // ------------------------------------------------------ // Write out the computed digit value for(int i = 0; i < 4096 ;i++) { // printf("%f\n",out[i]); // output = out[i]; output_r = out[i].real(); output_i = out[i].imag(); strm_out.write(output_r); strm_out.write(output_i ); } }
//Top-level function void toplevel(hls::stream<uint32> &input, hls::stream<uint32> &output) { #pragma HLS INTERFACE ap_fifo port=input #pragma HLS INTERFACE ap_fifo port=output #pragma HLS RESOURCE variable=input core=AXI4Stream #pragma HLS RESOURCE variable=output core=AXI4Stream #pragma HLS INTERFACE ap_ctrl_none port=return uint32 command; init(); side = input.read(); ntiles = side * side; for(u8 t = 0; t < ntiles; t++) for (u8 e = 0; e < 4; e++) tiles[t][e] = input.read(); mapcolours(); // we start off with tile 0 in position 0 avail &= ~BIT36(0); seq = 1; while (!terminate) { if (seq == 1) solve(); if (terminate) { output.write(0); break; } /* use magic flag to enforce sequencing */ seq = 0; output.write(1); if (seq == 0) command = input.read(); seq = 1; /* command 0: terminate */ if (command == 0) break; /* command 1: write output */ if (command == 1) for (u8 p = 0; p < ntiles; p++) for(u8 e = 0; e < 4; e++) output.write(colour(p, e)); /* any other command (canonically 2) will cause search * to continue without output */ if (seq == 0) backtrack(); seq = 1; } }
void fill_gaussian_rng_stream(hls::stream<calc_t> &rns, unsigned size) { double z0, z1; for (unsigned i = 0; i < size; ++i) { box_muller(z0, z1); rns.write(z0); if (++i < size) rns.write(z1); } }
void gauss_transform( hls::stream<uint32_t> &uniform_rns, hls::stream<float> &gaussian_rns) { #pragma HLS interface ap_fifo port=uniform_rns #pragma HLS resource core=AXI4Stream variable=uniform_rns #pragma HLS interface ap_fifo port=gaussian_rns #pragma HLS resource core=AXI4Stream variable=gaussian_rns #pragma HLS interface ap_ctrl_none port=return float u1, u2, r, z1, z2; while (true){ //for (int i = 0; i < 100/2; ++i) { #pragma HLS PIPELINE II=2 // intervall (0:1] u1 = ((float)uniform_rns.read() + 1.f) * (float)(1.0 / 4294967296.0); // intervall (0:2PI] u2 = ((float)uniform_rns.read() + 1.f) * (float)(2 * M_PI / 4294967296.0); r = hls::sqrtf(-2 * hls::logf(u1)); z1 = r * hls::cosf(u2); z2 = r * hls::sinf(u2); gaussian_rns.write(z1); gaussian_rns.write(z2); } }
void fir_sw(hls::stream<int> &input_val, hls::stream<int> &output_val) { int i; static short shift_reg[TAPS] = {0}; const short coeff[TAPS] = {6,0,-4,-3,5,6,-6,-13,7,44,64,44,7,-13, -6,6,5,-3,-4,0,6}; for(i=0; i < RUN_LENGTH; i++){ int sample; sample = input_val.read(); //Shift Register for(int j=0; j < TAPS-1; j++){ shift_reg[j] = shift_reg[j+1]; } shift_reg[TAPS-1] = sample; //Filter Operation int acc = 0; for(int k=0; k < TAPS; k++){ acc += shift_reg[k] * coeff[k]; } output_val.write(acc); } }
void dut( hls::stream<bit32_t> &strm_in, hls::stream<bit32_t> &strm_out ) { // ----------------------------- // YOUR CODE GOES HERE // ----------------------------- digit in_digit; bit4_t out_bit4; // ------------------------------------------------------ // Input processing // ------------------------------------------------------ // read the two input 32-bit words (low word first) bit32_t input_lo = strm_in.read(); bit32_t input_hi = strm_in.read(); // Convert input raw bits to digit 49-bit representation via bit slicing in_digit(31, 0) = input_lo; in_digit(in_digit.length()-1, 32) = input_hi; // ------------------------------------------------------ // Call digitrec // ------------------------------------------------------ out_bit4 = digitrec( in_digit ); // ------------------------------------------------------ // Output processing // ------------------------------------------------------ // Write out the recognized digit (0-9) strm_out.write( out_bit4(out_bit4.length()-1, 0) ); }
// ----------------------------------------------------------- void accumSW( const uint32_t stepsMC, const uint32_t pathsMC, hls::stream<float> &inData, hls::stream<float> &outAccum ) { printf("accumSW\n"); float sums[ACCUM_ELEM]; #pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM #pragma HLS DEPENDENCE variable=sums false stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step) { // ------------------------------------------ resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i) { #pragma HLS PIPELINE II=1 enable_flush sums[i] = (float) 0.0f; } // ------------------------------------------ uint8_t index = (uint8_t) 0; pathsLoop:for(uint32_t i=0; i<pathsMC; ++i) { #pragma HLS PIPELINE II=1 enable_flush float data = inData.read(); float oldSum = sums[index]; float newSum = oldSum + data; sums[index] = newSum; index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0; } // ------------------------------------------ float totalSum = (float) 0.0f; totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i) { #pragma HLS PIPELINE II=1 totalSum += sums[i]; } // ------------------------------------------ outAccum.write(totalSum); } return; }
void fe_wfl(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize) { #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS INTERFACE ap_fifo port=featureFifo #pragma HLS INTERFACE ap_fifo port=sampleFifo ap_uint<32> data; ap_int<32> wflChannel1 = 0; ap_int<32> wflChannel2 = 0; ap_int<16> sampleChannel1 = 0; ap_int<16> sampleChannel2 = 0; ap_int<16> prevSampleChannel1 = 0; ap_int<16> prevSampleChannel2 = 0; ap_uint<8> cntSamples = 0; // Wait for Samples to arrive in FIFO while( windowSize == 0 ) { } while(1) { wflChannel1 = 0; wflChannel2 = 0; // Count zero-crossing for channel 1 & 2 for(cntSamples = 0; cntSamples < windowSize; cntSamples++) { // Read data from Sample-FIFO // 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel data = sampleFifo.read(); sampleChannel1 = data(15, 0); sampleChannel2 = data(31, 16); if (cntSamples > 0) { wflChannel1 += abs2(sampleChannel1 - prevSampleChannel1); wflChannel2 += abs2(sampleChannel2 - prevSampleChannel2); } prevSampleChannel1 = sampleChannel1; prevSampleChannel2 = sampleChannel2; } // Write back features to Feature-FIFO featureFifo.write(wflChannel1); featureFifo.write(wflChannel2); } }
/** * Write out the entrance coordinates and directions */ void writeEntrance(hls::stream<uint32>& out) { #pragma HLS INLINE uint32 output = 0; output |= openings[0]; // Entrance row output <<= 4; output |= openings[1]; // Entrance column output <<= 4; output |= openings[2]; // Entrance side output <<= 4; output |= openings[3]; // Exit row output <<= 4; output |= openings[4]; // Exit col output <<= 4; output |= openings[5]; // Exit side out.write(output); }
void heston_kernel_ml(const params_ml params, hls::stream<calc_t> &gaussian_rn1, hls::stream<calc_t> &gaussian_rn2, hls::stream<calc_t> &prices) { #pragma HLS interface ap_none port=params #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \ variable=params #pragma HLS interface ap_fifo port=gaussian_rn1 #pragma HLS resource core=AXI4Stream variable=gaussian_rn1 #pragma HLS interface ap_fifo port=gaussian_rn2 #pragma HLS resource core=AXI4Stream variable=gaussian_rn2 #pragma HLS interface ap_fifo port=prices #pragma HLS resource core=AXI4Stream variable=prices #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \ variable=return // write block size to stream prices.write(BLOCK_SIZE); state_t state_coarse[BLOCK_SIZE]; #pragma HLS data_pack variable=state_coarse state_t state_fine[BLOCK_SIZE]; #pragma HLS data_pack variable=state_fine w_both_t w_both[BLOCK_SIZE]; #pragma HLS data_pack variable=w_both ap_uint<6> upper_j = params.ml_constant + (params.do_multilevel ? 1 : 0); for (uint32_t path = 0; path < params.path_cnt; path += BLOCK_SIZE) { for (uint32_t step = 0; step != params.step_cnt_coarse; ++step) { for (ap_uint<6> j = 0; j != upper_j; ++j) { for (ap_uint<10> block_i = 0; block_i != BLOCK_SIZE; ++block_i) { #pragma HLS PIPELINE II=1 bool is_fine = j != params.ml_constant; // // initialize // state_t l_state_coarse, l_state_fine; w_both_t l_w_both; if (step == 0 && j == 0) { l_state_coarse = get_init_state(params); l_state_fine = get_init_state(params); l_w_both = get_w_zero(); } else { l_state_coarse = state_coarse[block_i]; l_state_fine = state_fine[block_i]; l_w_both = w_both[block_i]; } // // calculate next step // state_t n_state_coarse, n_state_fine; w_both_t n_w_both; if (is_fine) { // step fine float w_stock = gaussian_rn1.read(); float w_vola = gaussian_rn2.read(); n_state_fine = get_next_step(params, l_state_fine, w_stock, w_vola, true); n_state_coarse = l_state_coarse; // accumulate random numbers n_w_both.w_stock = l_w_both.w_stock + w_stock; n_w_both.w_vola = l_w_both.w_vola + w_vola; } else { // step coarse n_state_coarse = get_next_step(params, l_state_coarse, l_w_both.w_stock, l_w_both.w_vola, false); n_state_fine = l_state_fine; n_w_both = get_w_zero(); } state_coarse[block_i] = n_state_coarse; state_fine[block_i] = n_state_fine; w_both[block_i] = n_w_both; // // write out // if ((step + 1 == params.step_cnt_coarse) && (j + 1 >= params.ml_constant)) { if (is_fine) { prices.write(get_log_price(n_state_fine)); } else { prices.write(get_log_price(n_state_coarse)); } } } } } } }
void pyrconstuct_top( cmpxDataIn imgIn[IMG_WIDTH], //hls::stream<t_image> &imgIn, hls::stream<t_pyr_complex> &pyrFilOut, const int nL ) { //#pragma HLS interface ap_fifo depth=1 port=ovflo #pragma HLS interface ap_fifo depth=512 port=imgIn #pragma HLS interface ap_fifo depth=1520 port=pyrFilOut //#pragma HLS data_pack variable=imgIn #pragma HLS data_pack variable=pyrFilOut #pragma HLS dataflow cmpxDataIn imgInTmp[FFT_LENGTH]; //#pragma HLS RESOURCE variable=imgInTmp core=RAM_2P_BRAM cmpxDataOut imgOutTmpFFTStream[FFT_LENGTH]; cmpxDataOut imgOutTmpBlockRam[FFT_LENGTH]; cmpxDataOut fftPyrOut[1520]; config_t fft_config; config2_t fft_config2; status_t fft_status; status2_t fft_status2; #pragma HLS data_pack variable=fft_config #pragma HLS data_pack variable=fft_config2 #pragma HLS STREAM variable=imgInTmp depth=512 dim=1 #pragma HLS STREAM variable=imgOutTmpFFTStream depth=512 dim=1 //#pragma HLS STREAM variable=fftPyrOut depth=1520 dim=1 fft_config.setDir(true); //dummy_proc_fe(imgIn, imgInTmp); for(int i=0;i<512;i++) { //imgInTmp[i] = imgIn.read(); imgInTmp[i] = imgIn[i]; } hls::fft<config1>(imgInTmp, imgOutTmpFFTStream, &fft_status, &fft_config); //imgOutTmp dummy_proc2(imgOutTmpFFTStream, imgOutTmpBlockRam); pyrbuild_top(imgOutTmpBlockRam, fftPyrOut, 512, 512); //return; #ifndef __SYNTHESIS__ { FILE * fo = fopen("fftOut.txt", "wb"); for(int i=0;i<512;i++) { fprintf(fo, "%.8f %.8f\n", imgOutTmpFFTStream[i].real().to_float(), imgOutTmpFFTStream[i].imag().to_float()); } fclose(fo); } { FILE * fo = fopen("fftOutFilter.txt", "wb"); for(int i=0;i<1520;i++) { fprintf(fo, "%.8f %.8f\n", fftPyrOut[i].real().to_float(), fftPyrOut[i].imag().to_float()); } } /* for(int i=0;i<512;i++) std::cout << "FFT Out " << i << " : " << imgOutTmp[i] << std::endl; */ #endif int fsizes[7]={ 512,512,256,128,64,32,16 }; //int l = 0; LPH: for(int l=0;l<Kset;l++){ cmpxDataOut2 ifftPyrOut2[512]; #pragma HLS STREAM variable=ifftPyrOut2 depth=512 dim=1 #pragma HLS DATAFLOW //optimized able cmpxDataOut2 ifftPyrOut[512]; //#pragma HLS RESOURCE variable=ifftPyrOut core=RAM_2P_BRAM //#pragma HLS ARRAY_PARTITION variable=ifftPyrOut complete dim=1 #pragma HLS STREAM variable=ifftPyrOut depth=512 dim=1 cmpxDataIn imgInTmp2[512]; #pragma HLS STREAM variable=imgInTmp2 depth=512 dim=1 int cidx = climits[l]; int nlimit = limits[l]; int fsize = fsizes[l]; int lshift = lshifts[l]; for(int i=0;i<fsize;i++) { #pragma HLS pipeline cmpxDataIn val (0,0); if (i<nlimit) { val.real() = (fftPyrOut[cidx + i ].real() >> lshift); val.imag() = (fftPyrOut[cidx + i ].imag() >> lshift); } imgInTmp2[i] = val; } fft_config2.setDir(false); //fft_config2.setSch(0x2AB); fft_config2.setNfft(llimits[l]); hls::fft<config2>(imgInTmp2, ifftPyrOut, &fft_status2, &fft_config2); dummy_proc2<cmpxDataOut2>(ifftPyrOut,ifftPyrOut2); //#ifndef __SYNTHESIS__ // if (l==0) // { // FILE * fo = fopen("ifft_in.txt", "wb"); // for(int i=0;i<512;i++) { // fprintf(fo, "%.8f %.8f\n", imgInTmp2[i].real().to_float(), imgInTmp2[i].imag().to_float()); // } // fclose(fo); // // fo = fopen("ifft_out.txt", "wb"); // for(int i=0;i<512;i++) { // fprintf(fo, "%.8f %.8f\n", ifftPyrOut[i].real().to_float(), ifftPyrOut[i].imag().to_float()); // } // fclose(fo); // } //#endif for(int i=0;i<fsize;i++) { #pragma HLS pipeline t_pyr_complex val; val.real() = ifftPyrOut2[i].real() >> pshifts[l]; val.imag() = ifftPyrOut2[i].imag() >> pshifts[l]; pyrFilOut.write(val); } }//end of ifft
void pyrconstuct_top ( std::complex<ap_fixed<16, 1, (ap_q_mode) 5, (ap_o_mode)3, 0> > imgIn[512], hls::stream<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > >& pyrFilOut, const int nL) { fstream wrapc_switch_file_token; wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log"); int AESL_i; if (wrapc_switch_file_token.good()) { static unsigned AESL_transaction_pc = 0; string AESL_token; string AESL_num; static AESL_FILE_HANDLER aesl_fh; // define output stream variables: "pyrFilOut" std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0; int aesl_tmp_1; int aesl_tmp_2 = 0; // read output stream size: "pyrFilOut" aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[transaction]] aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_num); // transaction number if (atoi(AESL_num.c_str()) == AESL_transaction_pc) { aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // pop_size aesl_tmp_1 = atoi(AESL_token.c_str()); aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[/transaction]] } // output port post check: "pyrFilOut_V" aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // [[transaction]] if (AESL_token != "[[transaction]]") { exit(1); } aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_num); // transaction number if (atoi(AESL_num.c_str()) == AESL_transaction_pc) { aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data std::vector<sc_bv<34> > pyrFilOut_V_pc_buffer; int i = 0; while (AESL_token != "[[/transaction]]") { bool no_x = false; bool err = false; // search and replace 'X' with "0" from the 1st char of token while (!no_x) { size_t x_found = AESL_token.find('X'); if (x_found != string::npos) { if (!err) { cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl; err = true; } AESL_token.replace(x_found, 1, "0"); } else { no_x = true; } } no_x = false; // search and replace 'x' with "0" from the 3rd char of token while (!no_x) { size_t x_found = AESL_token.find('x', 2); if (x_found != string::npos) { if (!err) { cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl; err = true; } AESL_token.replace(x_found, 1, "0"); } else { no_x = true; } } // push token into output port buffer if (AESL_token != "") { pyrFilOut_V_pc_buffer.push_back(AESL_token.c_str()); i++; } aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data or [[/transaction]] if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_pyrFilOut_V)) { exit(1); } } // correct the buffer size the current transaction if (i != aesl_tmp_1) { aesl_tmp_1 = i; } if (aesl_tmp_1 > 0 && aesl_tmp_0.size() < aesl_tmp_1) { int aesl_tmp_0_size = aesl_tmp_0.size(); for (int tmp_aesl_tmp_0 = 0; tmp_aesl_tmp_0 < aesl_tmp_1 - aesl_tmp_0_size; tmp_aesl_tmp_0++) { std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > tmp; aesl_tmp_0.push_back(tmp); } } // *********************************** if (i > 0) { // RTL Name: pyrFilOut_V { // bitslice(16, 0) // { // celement: pyrFilOut.V._M_real.V(16, 0) // { sc_lv<17>* pyrFilOut_V__M_real_V_lv0_0_1519_1 = new sc_lv<17>[1520]; // } // } // bitslice(33, 17) // { // celement: pyrFilOut.V._M_imag.V(16, 0) // { sc_lv<17>* pyrFilOut_V__M_imag_V_lv0_0_1519_1 = new sc_lv<17>[1520]; // } // } // bitslice(16, 0) { int hls_map_index = 0; // celement: pyrFilOut.V._M_real.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others { pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(16, 0)); } } } } // bitslice(33, 17) { int hls_map_index = 0; // celement: pyrFilOut.V._M_imag.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others { pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(33, 17)); } } } } // bitslice(16, 0) { int hls_map_index = 0; // celement: pyrFilOut.V._M_real.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { // sub : i_0 // ori_name : aesl_tmp_0[i_0].real() // sub_1st_elem : 0 // ori_name_1st_elem : aesl_tmp_0[0].real() // output_left_conversion : (aesl_tmp_0[i_0].real()).range() // output_type_conversion : (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str() if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others { (aesl_tmp_0[i_0].real()).range() = (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str(); } } } } // bitslice(33, 17) { int hls_map_index = 0; // celement: pyrFilOut.V._M_imag.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { // sub : i_0 // ori_name : aesl_tmp_0[i_0].imag() // sub_1st_elem : 0 // ori_name_1st_elem : aesl_tmp_0[0].imag() // output_left_conversion : (aesl_tmp_0[i_0].imag()).range() // output_type_conversion : (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str() if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others { (aesl_tmp_0[i_0].imag()).range() = (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str(); } } } } } } } // push back output stream: "pyrFilOut" for (int i = 0; i < aesl_tmp_1; i++) { pyrFilOut.write(aesl_tmp_0[i]); } AESL_transaction_pc++; } else { static unsigned AESL_transaction; static AESL_FILE_HANDLER aesl_fh; // "imgIn_M_real_V" char* tvin_imgIn_M_real_V = new char[50]; aesl_fh.touch(AUTOTB_TVIN_imgIn_M_real_V); // "imgIn_M_imag_V" char* tvin_imgIn_M_imag_V = new char[50]; aesl_fh.touch(AUTOTB_TVIN_imgIn_M_imag_V); // "pyrFilOut_V" char* tvin_pyrFilOut_V = new char[50]; aesl_fh.touch(AUTOTB_TVIN_pyrFilOut_V); char* tvout_pyrFilOut_V = new char[50]; aesl_fh.touch(AUTOTB_TVOUT_pyrFilOut_V); char* wrapc_stream_size_out_pyrFilOut_V = new char[50]; aesl_fh.touch(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V); char* wrapc_stream_egress_status_pyrFilOut_V = new char[50]; aesl_fh.touch(WRAPC_STREAM_EGRESS_STATUS_pyrFilOut_V); static INTER_TCL_FILE tcl_file(INTER_TCL); int leading_zero; // dump stream tvin: "pyrFilOut" std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0; int aesl_tmp_1 = 0; while (!pyrFilOut.empty()) { aesl_tmp_0.push_back(pyrFilOut.read()); aesl_tmp_1++; } // [[transaction]] sprintf(tvin_imgIn_M_real_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V); sc_bv<16>* imgIn_M_real_V_tvin_wrapc_buffer = new sc_bv<16>[512]; // RTL Name: imgIn_M_real_V { // bitslice(15, 0) { int hls_map_index = 0; // celement: imgIn._M_real.V(15, 0) { // carray: (0) => (511) @ (1) for (int i_0 = 0; i_0 <= 511; i_0 += 1) { // sub : i_0 // ori_name : imgIn[i_0].real() // sub_1st_elem : 0 // ori_name_1st_elem : imgIn[0].real() // regulate_c_name : imgIn__M_real_V // input_type_conversion : (imgIn[i_0].real()).range().to_string(SC_BIN).c_str() if (&(imgIn[0].real()) != NULL) // check the null address if the c port is array or others { sc_lv<16> imgIn__M_real_V_tmp_mem; imgIn__M_real_V_tmp_mem = (imgIn[i_0].real()).range().to_string(SC_BIN).c_str(); imgIn_M_real_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_real_V_tmp_mem.range(15, 0); } } } } } // dump tv to file for (int i = 0; i < 512; i++) { sprintf(tvin_imgIn_M_real_V, "%s\n", (imgIn_M_real_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str()); aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V); } tcl_file.set_num(512, &tcl_file.imgIn_M_real_V_depth); sprintf(tvin_imgIn_M_real_V, "[[/transaction]] \n"); aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V); // release memory allocation delete [] imgIn_M_real_V_tvin_wrapc_buffer; // [[transaction]] sprintf(tvin_imgIn_M_imag_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V); sc_bv<16>* imgIn_M_imag_V_tvin_wrapc_buffer = new sc_bv<16>[512]; // RTL Name: imgIn_M_imag_V { // bitslice(15, 0) { int hls_map_index = 0; // celement: imgIn._M_imag.V(15, 0) { // carray: (0) => (511) @ (1) for (int i_0 = 0; i_0 <= 511; i_0 += 1) { // sub : i_0 // ori_name : imgIn[i_0].imag() // sub_1st_elem : 0 // ori_name_1st_elem : imgIn[0].imag() // regulate_c_name : imgIn__M_imag_V // input_type_conversion : (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str() if (&(imgIn[0].imag()) != NULL) // check the null address if the c port is array or others { sc_lv<16> imgIn__M_imag_V_tmp_mem; imgIn__M_imag_V_tmp_mem = (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str(); imgIn_M_imag_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_imag_V_tmp_mem.range(15, 0); } } } } } // dump tv to file for (int i = 0; i < 512; i++) { sprintf(tvin_imgIn_M_imag_V, "%s\n", (imgIn_M_imag_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str()); aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V); } tcl_file.set_num(512, &tcl_file.imgIn_M_imag_V_depth); sprintf(tvin_imgIn_M_imag_V, "[[/transaction]] \n"); aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V); // release memory allocation delete [] imgIn_M_imag_V_tvin_wrapc_buffer; // push back input stream: "pyrFilOut" for (int i = 0; i < aesl_tmp_1; i++) { pyrFilOut.write(aesl_tmp_0[i]); } // [call_c_dut] ----------> AESL_ORIG_DUT_pyrconstuct_top(imgIn, pyrFilOut, nL); // pop output stream: "pyrFilOut" int aesl_tmp_2 = aesl_tmp_1; aesl_tmp_1 = 0; aesl_tmp_0.clear(); while (!pyrFilOut.empty()) { aesl_tmp_0.push_back(pyrFilOut.read()); aesl_tmp_1++; } // [[transaction]] sprintf(tvout_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V); sc_bv<34>* pyrFilOut_V_tvout_wrapc_buffer = new sc_bv<34>[1520]; // RTL Name: pyrFilOut_V { // bitslice(16, 0) { int hls_map_index = 0; // celement: pyrFilOut.V._M_real.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { // sub : i_0 // ori_name : aesl_tmp_0[i_0].real() // sub_1st_elem : 0 // ori_name_1st_elem : aesl_tmp_0[0].real() // regulate_c_name : pyrFilOut_V__M_real_V // input_type_conversion : (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str() if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others { sc_lv<17> pyrFilOut_V__M_real_V_tmp_mem; pyrFilOut_V__M_real_V_tmp_mem = (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str(); pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(16, 0) = pyrFilOut_V__M_real_V_tmp_mem.range(16, 0); } } } } // bitslice(33, 17) { int hls_map_index = 0; // celement: pyrFilOut.V._M_imag.V(16, 0) { // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1) for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1) { // sub : i_0 // ori_name : aesl_tmp_0[i_0].imag() // sub_1st_elem : 0 // ori_name_1st_elem : aesl_tmp_0[0].imag() // regulate_c_name : pyrFilOut_V__M_imag_V // input_type_conversion : (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str() if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others { sc_lv<17> pyrFilOut_V__M_imag_V_tmp_mem; pyrFilOut_V__M_imag_V_tmp_mem = (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str(); pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(33, 17) = pyrFilOut_V__M_imag_V_tmp_mem.range(16, 0); } } } } } // dump tv to file for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2; i++) { sprintf(tvout_pyrFilOut_V, "%s\n", (pyrFilOut_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str()); aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V); } tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2, &tcl_file.pyrFilOut_V_depth); sprintf(tvout_pyrFilOut_V, "[[/transaction]] \n"); aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V); // release memory allocation delete [] pyrFilOut_V_tvout_wrapc_buffer; // dump stream size sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V); sprintf(wrapc_stream_size_out_pyrFilOut_V, "%d\n", aesl_tmp_1 - aesl_tmp_2); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V); sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[/transaction]] \n"); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V); // push back output stream: "pyrFilOut" for (int i = 0; i < aesl_tmp_1; i++) { pyrFilOut.write(aesl_tmp_0[i]); } // release memory allocation: "imgIn_M_real_V" delete [] tvin_imgIn_M_real_V; // release memory allocation: "imgIn_M_imag_V" delete [] tvin_imgIn_M_imag_V; // release memory allocation: "pyrFilOut_V" delete [] tvout_pyrFilOut_V; delete [] tvin_pyrFilOut_V; delete [] wrapc_stream_size_out_pyrFilOut_V; AESL_transaction++; tcl_file.set_num(AESL_transaction , &tcl_file.trans_num); } }
void dummy_proc3( T imgIn[IMG_WIDTH],hls::stream<T> &out,int length){ #pragma HLS inline for(int i=0;i<length;i++) out.write( imgIn[i]); }
void pyrconstuct_top( cmpxDataIn imgIn[IMG_WIDTH], //hls::stream<t_image> &imgIn, hls::stream<t_pyr_complex> &pyrFilOut, const int nL ) { #pragma HLS interface ap_fifo depth=512 port=imgIn #pragma HLS interface ap_fifo depth=1520 port=pyrFilOut #pragma HLS data_pack variable=pyrFilOut cmpxDataIn imgInTmp[FFT_LENGTH]; cmpxDataOut imgOutTmpFFTStream[FFT_LENGTH]; cmpxDataOut imgOutTmpBlockRam[FFT_LENGTH]; cmpxDataOut fftPyrOut[1520]; #pragma HLS STREAM variable=imgInTmp depth=512 dim=1 #pragma HLS STREAM variable=imgOutTmpFFTStream depth=512 dim=1 #pragma HLS STREAM variable=fftPyrOut depth=1520 dim=1 LPH: for(int l=0;l<8;l++){ //#pragma HLS DATAFLOW const int cidx = climits[l]; const int nlimit = limits[l]; const int fsize = fsizes[l]; const int lshift = lshifts[l]; bool direction =false; bool ovflo; if( l ==0){ direction = true; for(int i=0;i<512;i++) { #pragma HLS pipeline //cmpxDataIn val (0,0); //val.real() = imgIn[i].real(); //val.imag() =imgIn[i].real() imgInTmp[i] = imgIn[i]; } } else { for(int i=0;i<512;i++) { #pragma HLS pipeline cmpxDataIn val (0,0); if (i<nlimit) { val.real() = (fftPyrOut[cidx + i ].real() >> lshift); val.imag() = (fftPyrOut[cidx + i ].imag() >> lshift); } imgInTmp[i] = val; } } fft_top<config2_t,IMG_WIDTH,cmpxDataIn,cmpxDataOut>(direction,imgIn,imgOutTmpFFTStream,&ovflo,nlimit); if(l==0){ dummy_proc2(imgOutTmpFFTStream, imgOutTmpBlockRam); pyrbuild_top(imgOutTmpBlockRam, fftPyrOut, 512, 512); #ifndef __SYNTHESIS__ { FILE * fo = fopen("fftOut.txt", "wb"); for(int i=0;i<512;i++) { fprintf(fo, "%.8f %.8f\n", imgOutTmpFFTStream[i].real().to_float(), imgOutTmpFFTStream[i].imag().to_float()); } fclose(fo); } { FILE * fo = fopen("fftOutFilter.txt", "wb"); for(int i=0;i<1520;i++) { fprintf(fo, "%.8f %.8f\n", fftPyrOut[i].real().to_float(), fftPyrOut[i].imag().to_float()); } } /* for(int i=0;i<512;i++) std::cout << "FFT Out " << i << " : " << imgOutTmp[i] << std::endl; */ #endif }//end of input else{ for(int i=0;i<512;i++) { #pragma HLS pipeline if(i<nlimit){ t_pyr_complex val; cmpxDataOut2 tmp = imgOutTmpFFTStream[i]; val.real() = tmp.real() >> pshifts[l]; val.imag() = tmp.imag() >> pshifts[l]; pyrFilOut.write(val); } } }
// ----------------------------------------------------------- void lsupdate2SWvector ( const uint32_t stepsMC, const uint32_t pathsMC, const float discount, hls::stream<float> &contin_in, hls::stream<float> &payoff_in, hls::stream<float> &cashFlowDisc_out, hls::stream<float> &toAccum_out ) { printf("lsupdate2SWvector\n"); float cashFlowVector[CASHFLOW_SIZE]; #pragma HLS RESOURCE variable=cashFlowVector core=RAM_2P_BRAM // ---------------------------------------------------- zerosLoop:for(uint32_t path=0; path<pathsMC; ++path) { #pragma HLS PIPELINE II=1 enable_flush cashFlowVector[path] = (float) 0.0f; } // ---------------------------------------------------- // ---------------------------------------------------- stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step) { pathsLoop:for(uint32_t path=0; path<pathsMC; ++path) { #pragma HLS PIPELINE II=1 enable_flush uint32_t indexRead = path; uint32_t indexWrite = path; float continuation = contin_in.read(); float payoff = payoff_in.read(); float cashFlow = cashFlowVector[indexRead]; float discountedCashFlow = discount * cashFlow; // --------------------------------- float newY; if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) ) newY = payoff; else newY = discountedCashFlow; // --------------------------------- // write to outputs cashFlowVector[indexWrite] = newY; if(step < stepsMC) cashFlowDisc_out.write(discount * newY); if(step == stepsMC) toAccum_out.write(newY); } } return; }
void receive( hls::stream<t_s_xgmii> &s_xgmii, hls::stream<t_axis> &m_axis, hls::stream<t_rx_status> &rx_status ) { #ifdef RELEASE #pragma HLS interface ap_ctrl_none port=return #pragma HLS data_pack variable=s_xgmii #endif #pragma HLS INTERFACE axis port=m_axis #pragma HLS data_pack variable=rx_status int i; t_s_xgmii cur = {0, 0}; t_s_xgmii precur = {0, 0}; t_s_xgmii last_word; ap_uint<32> crc_state = 0xffffffff; ap_uint<16> frm_cnt = 0; int data_err = 0; ap_uint<16> len_type = 0xffff; int last_user_byte_lane_before_frame_end; int user_data_end_detected = 0; int last_user_word_pos; ap_uint<3> last_user_byte_lane; MAIN: while (1) { if (user_data_end_detected) { int fcs_err = (crc_state != 0x00000000); int len_err = 0; int frm_byte_cnt = frm_cnt*8 + last_user_byte_lane_before_frame_end + 1 + 4; int under = (frm_byte_cnt < 64); int over = (frm_byte_cnt > 1500); if (is_len(len_type)) { if (len_type > 2) { len_err = (len_type != ((last_user_word_pos - 2) * 8 + 2 + last_user_byte_lane + 1)); } else { len_err = (len_type != ((last_user_word_pos - 2) * 8 + 2 + 8 - (last_user_byte_lane + 1))); } } int good = !(fcs_err | len_err | data_err | under | over); m_axis.write((t_axis){last_word.rxd, mask_up_to_bit(8, last_user_byte_lane), !good, 1}); rx_status.write((t_rx_status) {frm_cnt, good, 0, 0, under, len_err, fcs_err, data_err, 0, over}); } cur = precur; if (!s_xgmii.read_nb(precur)) return; frm_cnt = 0; crc_state = 0xffffffff; len_type = 0xffff; IDLE_AND_PREAMBLE: while (!((cur.rxc == 0x01) && (cur.rxd == 0xd5555555555555fb))) { #pragma HLS LATENCY max=0 min=0 cur = precur; if (!s_xgmii.read_nb(precur)) return; // printf("RXD 0x%016lx, RXC 0x%02x\n", cur.rxd.to_long(), cur.rxc.to_int()); } cur = precur; if (!s_xgmii.read_nb(precur)) return; // printf("RXD 0x%016lx, RXC 0x%02x\n", cur.rxd.to_long(), cur.rxc.to_int()); user_data_end_detected = 0; int frame_end_detected = 0; USER_DATA: do { #pragma HLS LATENCY max=0 min=0 ap_uint<8> crc_field_mask; // END-OF-FRAME detection if (cur.rxc != 0x00) { switch(cur.rxc) { case 0xe0 : last_user_byte_lane_before_frame_end = 0; crc_field_mask = 0x1e; break; case 0xc0 : last_user_byte_lane_before_frame_end = 1; crc_field_mask = 0x3c; break; case 0x80 : last_user_byte_lane_before_frame_end = 2; crc_field_mask = 0x78; break; default: crc_field_mask = 0xff; break; } frame_end_detected = 1; if (!user_data_end_detected) { last_word = cur; last_user_byte_lane = last_user_byte_lane_before_frame_end; } user_data_end_detected = 1; } else if ((precur.rxc != 0x00) && ((ap_uint<8>) ~precur.rxc <= 0x0f)) { switch(precur.rxc) { case 0xf0 : last_user_byte_lane_before_frame_end = 7; break; case 0xf8 : last_user_byte_lane_before_frame_end = 6; crc_field_mask = 0x80; break; case 0xfc : last_user_byte_lane_before_frame_end = 5; crc_field_mask = 0xc0; break; case 0xfe : last_user_byte_lane_before_frame_end = 4; crc_field_mask = 0xe0; break; case 0xff : last_user_byte_lane_before_frame_end = 3; crc_field_mask = 0xf0; frame_end_detected = 1; break; } if (!user_data_end_detected) { last_word = cur; last_user_byte_lane = last_user_byte_lane_before_frame_end; } user_data_end_detected = 1; } else { crc_field_mask = 0x00; } // END-OF-USER-DATA detection if (!user_data_end_detected) { if (frm_cnt == 1) { len_type = ((ap_uint<16>) wbyte(cur.rxd, 4) << 8) | wbyte(cur.rxd, 5); if (is_len(len_type)) { // Calculate the position of the last word within the frame // which contains valid user data (based on LENGTH/TYPE field // value. -3 is subtracted before division because first two // bytes of user data are not word aligned, and 1 is subtracted // additionally since we want to find out the last word that // still contains data, not the number of user data words. +2 // since first word aligned user data byte starts at word #2. last_user_word_pos = (len_type - 3) / 8 + 2; last_user_byte_lane = (len_type - 3) % 8; if (len_type <= 2) { user_data_end_detected = 1; last_word = cur; } } } else if (is_len(len_type)) { if (frm_cnt == last_user_word_pos) { last_word = cur; user_data_end_detected = 1; } } } if (!user_data_end_detected) { m_axis.write((t_axis){cur.rxd, 0xff, 0, 0}); frm_cnt++; } ap_uint<64> crc_data = 0; CRC_MASK_CALC: for (i = 0; i < 8; i++) { #pragma HLS LOOP unroll if (!wbit(cur.rxc, i)) { ap_uint<8> d = wbyte(cur.rxd, i); if (wbit(crc_field_mask,i)) { d = ~d; } crc_data = replace_byte(crc_data, d, i); } } crc32<ap_uint<64>>(crc_data, &crc_state); // printf("RXD 0x%016lx, RXC 0x%02x, CRC_DATA 0x%016lx, crc_field_mask 0x%02x, CRC_STATE 0x%08lx, FRMEND %d\n", cur.rxd.to_long(), cur.rxc.to_int(), crc_data.to_long(), crc_field_mask.to_int(), crc_state.to_int(), frame_end_detected); //// printf("RXD 0x%016lx, RXC 0x%02x, CRC_STATE 0x%08lx, FRMEND %d\n", cur.rxd.to_long(), cur.rxc.to_int(), crc_state.to_int(), frame_end_detected); cur = precur; if (!s_xgmii.read_nb(precur)) return; } while(!frame_end_detected); } }
/* * Core that apply a 3x3(Configurable) 2d Convolution, Erode, Dilate on * grayscale images * http://www.xilinx.com/support/documentation/sw_manuals/xilinx2014_1/ug902-vivado-high-level-synthesis.pdf * */ void doImgProc(hls::stream<uint_8_side_channel>& inStream, hls::stream<int_8_side_channel> & outStream, char kernel[KERNEL_DIM * KERNEL_DIM], int operation) { #pragma HLS INTERFACE axis port=inStream #pragma HLS INTERFACE axis port=outStream #pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS #pragma HLS INTERFACE s_axilite port=operation bundle=CRTL_BUS #pragma HLS INTERFACE s_axilite port=kernel bundle=KERNEL_BUS // Defining the line buffer and setting the inter dependency to false through // pragmas hls::LineBuffer<KERNEL_DIM, IMG_WIDTH, unsigned char> lineBuff; hls::Window<KERNEL_DIM, KERNEL_DIM, short> window; // Index used to keep track of row,col int idxCol = 0; int idxRow = 0; int pixConvolved = 0; // Calculate delay to fix line-buffer offset int waitTicks = (IMG_WIDTH * (KERNEL_DIM - 1) + KERNEL_DIM) / 2;// 241; int countWait = 0; int sentPixels = 0; int_8_side_channel dataOutSideChannel; uint_8_side_channel currPixelSideChannel; // Iterate on all pixels for our 320x240 image, the HLS PIPELINE improves our // latency for (int idxPixel = 0; idxPixel < (IMG_WIDTH * IMG_HEIGHT); idxPixel++) { #pragma HLS PIPELINE // Read and cache (Block here if FIFO sender is empty) currPixelSideChannel = inStream.read(); // Get the pixel data unsigned char pixelIn = currPixelSideChannel.data; // Put data on the LineBuffer lineBuff.shift_up(idxCol); lineBuff.insert_top(pixelIn, idxCol); // Will put in val[2] of line buffer // (Check Debug) // Put data on the window and multiply with the kernel for (int idxWinRow = 0; idxWinRow < KERNEL_DIM; idxWinRow++) { for (int idxWinCol = 0; idxWinCol < KERNEL_DIM; idxWinCol++) { // idxWinCol + pixConvolved, will slide the window ... short val = (short)lineBuff.getval(idxWinRow, idxWinCol + pixConvolved); // Multiply kernel by the sampling window val = (short)kernel[(idxWinRow * KERNEL_DIM) + idxWinCol] * val; window.insert(val, idxWinRow, idxWinCol); } } // Avoid calculate out of the image boundaries and if we can convolve short valOutput = 0; if ((idxRow >= KERNEL_DIM - 1) && (idxCol >= KERNEL_DIM - 1)) { switch (operation) { case 0: // Convolution valOutput = sumWindow(&window); valOutput = valOutput / 8; // Avoid negative values if (valOutput < 0) valOutput = 0; break; case 1: // Erode valOutput = minWindow(&window); break; case 2: // Dilate valOutput = maxWindow(&window); break; } pixConvolved++; } // Calculate row and col index if (idxCol < IMG_WIDTH - 1) { idxCol++; } else { // New line idxCol = 0; idxRow++; pixConvolved = 0; } /* * Fix the line buffer delay, on a 320x240 image with 3x3 kernel, the delay * will be * ((240*2) + 3)/2 = 241 * So we wait for 241 ticks send the results than put more 241 zeros */ // Put data on output stream (side-channel(tlast) way...) /*dataOutSideChannel.data = valOutput; dataOutSideChannel.keep = currPixelSideChannel.keep; dataOutSideChannel.strb = currPixelSideChannel.strb; dataOutSideChannel.user = currPixelSideChannel.user; dataOutSideChannel.last = currPixelSideChannel.last; dataOutSideChannel.id = currPixelSideChannel.id; dataOutSideChannel.dest = currPixelSideChannel.dest; // Send to the stream (Block if the FIFO receiver is full) outStream.write(dataOutSideChannel);*/ countWait++; if (countWait > waitTicks) { dataOutSideChannel.data = valOutput; dataOutSideChannel.keep = currPixelSideChannel.keep; dataOutSideChannel.strb = currPixelSideChannel.strb; dataOutSideChannel.user = currPixelSideChannel.user; dataOutSideChannel.last = 0; dataOutSideChannel.id = currPixelSideChannel.id; dataOutSideChannel.dest = currPixelSideChannel.dest; // Send to the stream (Block if the FIFO receiver is full) outStream.write(dataOutSideChannel); sentPixels++; } } // Now send the remaining zeros (Just the (Number of delayed ticks) for (countWait = 0; countWait < waitTicks; countWait++) { dataOutSideChannel.data = 0; dataOutSideChannel.keep = currPixelSideChannel.keep; dataOutSideChannel.strb = currPixelSideChannel.strb; dataOutSideChannel.user = currPixelSideChannel.user; // Send last on the last item if (countWait < waitTicks - 1) dataOutSideChannel.last = 0; else dataOutSideChannel.last = 1; dataOutSideChannel.id = currPixelSideChannel.id; dataOutSideChannel.dest = currPixelSideChannel.dest; // Send to the stream (Block if the FIFO receiver is full) outStream.write(dataOutSideChannel); } }
void hls_cropping_strm ( hls::stream< ap_int<8> > & src, hls::stream< ap_int<16> > & dst) { fstream wrapc_switch_file_token; wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log"); int AESL_i; if (wrapc_switch_file_token.good()) { static unsigned AESL_transaction_pc; string AESL_token; string AESL_num; static AESL_FILE_HANDLER aesl_fh; aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[transaction]] aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_num); //transaction number if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) { aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //pop_size int aesl_tmp_1 = atoi(AESL_token.c_str()); for (int i = 0 ; i < aesl_tmp_1 ; i++) { src.read(); } aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[/transaction]] } int aesl_tmp_4; int aesl_tmp_5 = 0; aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[transaction]] aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_num); //transaction number if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) { aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //pop_size aesl_tmp_4 = atoi(AESL_token.c_str()); aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[/transaction]] } std::vector<ap_int<16> > aesl_tmp_3; aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //[[transaction]] if ( AESL_token != "[[transaction]]") { exit(1); } aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_num); //transaction number if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) { aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data std::vector < sc_bv<16> > dst_V_V_pc_buffer; int i = 0; while (AESL_token != "[[/transaction]]") { bool no_x = false; bool err = false; while (!no_x) { size_t x_found = AESL_token.find('X'); if (x_found != string::npos) { if (!err) { cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; err = true; } AESL_token.replace(x_found, 1, "0"); } else { no_x = true; } } no_x = false; while (!no_x) { size_t x_found = AESL_token.find('x', 2); if (x_found != string::npos) { if (!err) { cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; err = true; } AESL_token.replace(x_found, 1, "0"); } else { no_x = true; } } if (AESL_token != "") { dst_V_V_pc_buffer.push_back( AESL_token.c_str() ); i++; } aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data or [[/transaction]] if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_dst_V_V)) { exit(1); } } if (i != aesl_tmp_4) { aesl_tmp_4 = i; } if (aesl_tmp_4 > 0 && aesl_tmp_3.size() < aesl_tmp_4) { int aesl_tmp_3_size = aesl_tmp_3.size(); for (int tmp_aesl_tmp_3 = 0 ; tmp_aesl_tmp_3 < aesl_tmp_4 - aesl_tmp_3_size ; tmp_aesl_tmp_3 ++ ) { ap_int<16> tmp; aesl_tmp_3.push_back(tmp); } } if (i > 0) { sc_lv<16> *dst_V_V_lv0_0_0_1 = new sc_lv<16>[aesl_tmp_4 - aesl_tmp_5]; AESL_i = 0; //subscript for rtl array for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) { if(&(aesl_tmp_3[0]) != 0) { dst_V_V_lv0_0_0_1[0 + AESL_i].range(15, 0) = sc_bv<16>(dst_V_V_pc_buffer[0 + AESL_i].range(15, 0)); } AESL_i++; } AESL_i = 0; //subscript for rtl array for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) { if(&(aesl_tmp_3[0]) != 0) { aesl_tmp_3[i_0] = (dst_V_V_lv0_0_0_1[0 + AESL_i]).to_string(SC_BIN).c_str(); } AESL_i++; } } } for (int i = 0; i < aesl_tmp_4; i++) { dst.write(aesl_tmp_3[i]); } AESL_transaction_pc ++ ; } else { static unsigned AESL_transaction; static AESL_FILE_HANDLER aesl_fh; char* tvin_src_V_V = new char[50]; char* wrapc_stream_size_in_src_V_V = new char[50]; char* tvout_dst_V_V = new char[50]; char* tvin_dst_V_V = new char[50]; aesl_fh.touch(AUTOTB_TVIN_dst_V_V); char* wrapc_stream_size_out_dst_V_V = new char[50]; static INTER_TCL_FILE tcl_file(INTER_TCL); int leading_zero; std::vector<ap_int<8> > aesl_tmp_0; int aesl_tmp_1 = 0; while (!src.empty()) { aesl_tmp_0.push_back(src.read()); aesl_tmp_1 ++; } std::vector<ap_int<16> > aesl_tmp_3; int aesl_tmp_4 = 0; while (!dst.empty()) { aesl_tmp_3.push_back(dst.read()); aesl_tmp_4 ++; } for (int i = 0; i < aesl_tmp_1; i++) { src.write(aesl_tmp_0[i]); } AESL_ORIG_DUT_hls_cropping_strm(src,dst); int aesl_tmp_2 = src.size(); int aesl_tmp_5 = aesl_tmp_4; while (!dst.empty()) { aesl_tmp_3.push_back(dst.read()); aesl_tmp_4 ++; } sprintf(tvin_src_V_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V); sc_bv<8> *src_V_V_tvin_wrapc_buffer = new sc_bv<8>[aesl_tmp_1 - aesl_tmp_2]; AESL_i = 0; //subscript for rtl array for (int i_0 = 0; i_0 <= aesl_tmp_1 - aesl_tmp_2 - 1 ; i_0+= 1) { sc_lv<8> src_V_V_tmp_mem; if(&(aesl_tmp_0[0]) != 0) { src_V_V_tmp_mem = (aesl_tmp_0[i_0]).to_string(2).c_str(); src_V_V_tvin_wrapc_buffer[0 + AESL_i].range(7, 0) = src_V_V_tmp_mem.range(7, 0 ) ; } AESL_i++; } for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2 ; i++) { sprintf(tvin_src_V_V, "%s\n", (src_V_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str()); aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V); } tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2,&tcl_file.src_V_V_depth); sprintf(tvin_src_V_V, "[[/transaction]] \n"); aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V); delete [] src_V_V_tvin_wrapc_buffer; sprintf(wrapc_stream_size_in_src_V_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V); sprintf(wrapc_stream_size_in_src_V_V, "%d\n", aesl_tmp_1 - aesl_tmp_2); aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V); sprintf(wrapc_stream_size_in_src_V_V, "[[/transaction]] \n"); aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V); sprintf(tvout_dst_V_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V); sc_bv<16> *dst_V_V_tvout_wrapc_buffer = new sc_bv<16>[aesl_tmp_4 - aesl_tmp_5]; AESL_i = 0; //subscript for rtl array for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) { sc_lv<16> dst_V_V_tmp_mem; if(&(aesl_tmp_3[0]) != 0) { dst_V_V_tmp_mem = (aesl_tmp_3[i_0]).to_string(2).c_str(); dst_V_V_tvout_wrapc_buffer[0 + AESL_i].range(15, 0) = dst_V_V_tmp_mem.range(15, 0 ) ; } AESL_i++; } for (int i = 0; i < aesl_tmp_4 - aesl_tmp_5 ; i++) { sprintf(tvout_dst_V_V, "%s\n", (dst_V_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str()); aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V); } tcl_file.set_num(aesl_tmp_4 - aesl_tmp_5,&tcl_file.dst_V_V_depth); sprintf(tvout_dst_V_V, "[[/transaction]] \n"); aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V); delete [] dst_V_V_tvout_wrapc_buffer; sprintf(wrapc_stream_size_out_dst_V_V, "[[transaction]] %d\n", AESL_transaction); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V); sprintf(wrapc_stream_size_out_dst_V_V, "%d\n", aesl_tmp_4 - aesl_tmp_5); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V); sprintf(wrapc_stream_size_out_dst_V_V, "[[/transaction]] \n"); aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V); for (int i = 0; i < aesl_tmp_4; i++) { dst.write(aesl_tmp_3[i]); } delete [] tvin_src_V_V; delete [] wrapc_stream_size_in_src_V_V; delete [] tvout_dst_V_V; delete [] tvin_dst_V_V; delete [] wrapc_stream_size_out_dst_V_V; AESL_transaction++; tcl_file.set_num(AESL_transaction , &tcl_file.trans_num); } }
// For next interface change //TODO(brugger): remove step_size //TODO(brugger): path_cnt should be uint64_t //TODO(brugger): add correlation void heston_kernel_sl( // call option calc_t log_spot_price, calc_t reversion_rate_TIMES_step_size, calc_t long_term_avg_vola, calc_t vol_of_vol_TIMES_sqrt_step_size, calc_t double_riskless_rate, // = 2 * riskless_rate calc_t vola_0, // calc_t correlation, // calc_t time_to_maturity, // both knockout calc_t log_lower_barrier_value, calc_t log_upper_barrier_value, // simulation params uint32_t step_cnt, calc_t step_size, // = time_to_maturity / step_cnt calc_t half_step_size, // = step_size / 2 calc_t sqrt_step_size, // = sqrt(step_size) calc_t barrier_correction_factor, // = BARRIER_HIT_CORRECTION * sqrt_step_size uint32_t path_cnt, hls::stream<calc_t> &gaussian_rn1, hls::stream<calc_t> &gaussian_rn2, hls::stream<calc_t> &prices) { #pragma HLS interface ap_none port=log_spot_price #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_spot_price #pragma HLS interface ap_none port=reversion_rate_TIMES_step_size #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=reversion_rate_TIMES_step_size #pragma HLS interface ap_none port=long_term_avg_vola #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=long_term_avg_vola #pragma HLS interface ap_none port=vol_of_vol_TIMES_sqrt_step_size #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vol_of_vol_TIMES_sqrt_step_size #pragma HLS interface ap_none port=double_riskless_rate #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=double_riskless_rate #pragma HLS interface ap_none port=vola_0 #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vola_0 // #pragma HLS interface ap_none port=correlation // #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=correlation // #pragma HLS interface ap_none port=time_to_maturity // #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=time_to_maturity #pragma HLS interface ap_none port=log_lower_barrier_value #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_lower_barrier_value #pragma HLS interface ap_none port=log_upper_barrier_value #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_upper_barrier_value #pragma HLS interface ap_none port=step_cnt #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_cnt #pragma HLS interface ap_none port=step_size #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_size #pragma HLS interface ap_none port=half_step_size #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=half_step_size #pragma HLS interface ap_none port=sqrt_step_size #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=sqrt_step_size #pragma HLS interface ap_none port=barrier_correction_factor #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=barrier_correction_factor #pragma HLS interface ap_none port=path_cnt #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=path_cnt #pragma HLS interface ap_fifo port=gaussian_rn1 #pragma HLS resource core=AXI4Stream variable=gaussian_rn1 #pragma HLS interface ap_fifo port=gaussian_rn2 #pragma HLS resource core=AXI4Stream variable=gaussian_rn2 #pragma HLS interface ap_fifo port=prices #pragma HLS resource core=AXI4Stream variable=prices #pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=return //////////////////////////////////////////////////////////////////////////////////////////////////////////// state_t states[BLOCK_SIZE]; #pragma HLS data_pack variable=states for (uint32_t block = 0; block < path_cnt; block += BLOCK_SIZE) { for (uint32_t step = 0; step != step_cnt; ++step) { // TODO(brugger): use data type with less bits for inner counter for (uint32_t i = 0; i != BLOCK_SIZE; ++i) { #pragma HLS PIPELINE II=1 state_t l_state; // initialize if (step == 0) { l_state.stock = log_spot_price; l_state.vola = vola_0; l_state.barrier_hit = false; } else { l_state = states[i]; } // calcualte next step state_t n_state; calc_t max_vola = MAX((calc_t) 0., l_state.vola); calc_t sqrt_vola = hls::sqrtf(max_vola); n_state.stock = l_state.stock + (double_riskless_rate - max_vola) * half_step_size + sqrt_step_size * sqrt_vola * gaussian_rn1.read(); n_state.vola = l_state.vola + reversion_rate_TIMES_step_size * (long_term_avg_vola - max_vola) + vol_of_vol_TIMES_sqrt_step_size * sqrt_vola * (calc_t) gaussian_rn2.read(); calc_t barrier_correction = barrier_correction_factor * sqrt_vola; #pragma HLS RESOURCE variable=barrier_correction \ core=FMul_meddsp n_state.barrier_hit = l_state.barrier_hit | (n_state.stock < log_lower_barrier_value + barrier_correction) | (n_state.stock > log_upper_barrier_value - barrier_correction); states[i] = n_state; // write out if (step + 1 == step_cnt && (block + i) < path_cnt) prices.write(n_state.barrier_hit ? -std::numeric_limits<calc_t>::infinity() : n_state.stock); } } } }
void accumregio( const uint32_t stepsMC, const uint32_t pathsMC, volatile uint32_t *peekStep, volatile uint32_t *peekPath, hls::stream<float> &inData, hls::stream<float> &outAccum ) { //#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL //#pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL //#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL //#pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL //#pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL //#pragma HLS INTERFACE axis register port=inData //#pragma HLS INTERFACE axis register port=outAccum float sums[ACCUM_ELEM]; #pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM #pragma HLS DEPENDENCE variable=sums false *peekStep = 0xFFFFFFFF; *peekPath = 0xFFFFFFFF; stepsLoop:for(uint32_t step=0; step<stepsMC; ++step) { //#pragma HLS PIPELINE enable_flush resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i) { #pragma HLS PIPELINE II=1 sums[i] = (float) 0.0f; } *peekStep = step; // ------------------------------------------ uint8_t index = (uint8_t) 0; pathsLoop:for(uint32_t path=0; path<pathsMC; ++path) { #pragma HLS PIPELINE II=1 float data = inData.read(); float oldSum = sums[index]; float newSum = oldSum + data; sums[index] = newSum; index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0; *peekPath = path; } // ------------------------------------------ float totalSum = (float) 0.0f; totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i) { #pragma HLS PIPELINE II=1 totalSum += sums[i]; } // ------------------------------------------ outAccum.write(totalSum); //*peekStep = step; } return; }
void lsdatagenregio ( const uint32_t stepsMC, const uint32_t pathsMC, const float K, const uint32_t callPut, volatile uint32_t *peekStep, volatile uint32_t *peekPath, hls::stream<float> &stock, hls::stream<float> &cashFlow, hls::stream<float> &stream_x0, hls::stream<float> &stream_x1, hls::stream<float> &stream_x2, hls::stream<float> &stream_x3, hls::stream<float> &stream_x4, hls::stream<float> &stream_y, hls::stream<float> &stream_yx, hls::stream<float> &stream_yx2 ) { #pragma HLS INTERFACE axis register port=stock #pragma HLS INTERFACE axis register port=cashFlow #pragma HLS INTERFACE axis register port=stream_x0 #pragma HLS INTERFACE axis register port=stream_x1 #pragma HLS INTERFACE axis register port=stream_x2 #pragma HLS INTERFACE axis register port=stream_x3 #pragma HLS INTERFACE axis register port=stream_x4 #pragma HLS INTERFACE axis register port=stream_y #pragma HLS INTERFACE axis register port=stream_yx #pragma HLS INTERFACE axis register port=stream_yx2 #pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL #pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL #pragma HLS INTERFACE s_axilite port=callPut bundle=CONTROL #pragma HLS INTERFACE s_axilite port=K bundle=CONTROL #pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL #pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL #pragma HLS INTERFACE s_axilite port=return bundle=CONTROL *peekStep = 0xFFFFFFFF; *peekPath = 0xFFFFFFFF; stepsLoop:for(uint32_t step=0; step < stepsMC; ++step) { *peekStep = step; pathsLoop:for(uint32_t path=0; path<pathsMC; ++path) { //#pragma HLS PIPELINE II=1 enable_flush #pragma HLS PIPELINE II=1 float s = stock.read(); float cflow = cashFlow.read(); // --------------------------------- // in-the-money calculation float diff = (s-K); float payoff; if(callPut == 0) payoff = diff; else payoff = -diff; bool inTheMoney; if( payoff > 0.0f ) inTheMoney = true; else inTheMoney = false; // --------------------------------- // basis functions float s2 = s*s; float x0; float x1; float x2; float y; if(inTheMoney == true) { x0 = (float) 1.0f; x1 = (float) s; x2 = (float) s2; y = (float) cflow; } else { x0 = (float) 0.0f; x1 = (float) 0.0f; x2 = (float) 0.0f; y = (float) 0.0f; } // remaining multipliers float x3 = x1*x2; float x4 = x2*x2; float yx = y*x1; float yx2 = y*x2; // write to streams stream_x0.write(x0); stream_x1.write(x1); stream_x2.write(x2); stream_x3.write(x3); stream_x4.write(x4); stream_y.write(y); stream_yx.write(yx); stream_yx2.write(yx2); //*peekStep = step; *peekPath = path; } } return; }
void lsupdate1SW ( const uint32_t stepsMC, const uint32_t pathsMC, const float K, const uint32_t callPut, hls::stream<float> &stock, hls::stream<float> &b0_in, hls::stream<float> &b1_in, hls::stream<float> &b2_in, hls::stream<float> &contin_out, hls::stream<float> &payoff_out ) { printf("lsupdate1SW\n"); printf("lsupdateSW\n"); stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step) { // --------------------------------- // continuation value float b0; float b1; float b2; if(step == 0) { b0 = (float) 0.0f; b1 = (float) 0.0f; b2 = (float) 0.0f; } else { b0 = b0_in.read(); b1 = b1_in.read(); b2 = b2_in.read(); } // ------------------------------------- pathsLoop:for(uint32_t path=0; path<pathsMC; ++path) { #pragma HLS PIPELINE II=1 enable_flush float s = stock.read(); // --------------------------------- // payoff calculation float diff = (s-K); float callPutDiff; if(callPut == 0) callPutDiff = diff; else callPutDiff = -diff; float payoff = fmaxf(callPutDiff, (float) 0.0f); // --------------------------------- // basis functions float s2 = s*s; float x0 = (float) 1.0f; float x1 = s; float x2 = s2; // --------------------------------- // continuation value float continuation = b0*x0 + b1*x1 + b2*x2; // --------------------------------- // write to output contin_out.write(continuation); payoff_out.write(payoff); } } return; }
void fe_zc(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize, ap_uint<32> threshold) { #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS INTERFACE ap_fifo port=featureFifo #pragma HLS INTERFACE ap_fifo port=sampleFifo ap_uint<32> data; ap_int<16> sampleChannel1 = 0; ap_int<16> sampleChannel2 = 0; ap_uint<32> zcChannel1 = 0; ap_uint<32> zcChannel2 = 0; ap_int<2> stateChannel1 = 0; ap_int<2> stateChannel2 = 0; ap_uint<8> cntSamples = 0; // Wait for Samples to arrive in FIFO while( windowSize == 0 ) { } while(1) { zcChannel1 = 0; zcChannel2 = 0; stateChannel1 = 0; stateChannel2 = 0; // Count zero-crossing for channel 1 & 2 for(cntSamples=0; cntSamples < windowSize; cntSamples++) { // Read data from Sample-FIFO // 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel data = sampleFifo.read(); sampleChannel1 = data(15, 0); if( abs2(sampleChannel1) < threshold ) { sampleChannel1 = 0; } sampleChannel2 = data(31, 16); if( abs2(sampleChannel2) < threshold ) { sampleChannel2 = 0; } // Check whether a zero-crossing occurred or not // Channel 1 if( stateChannel1 == 0 ) { if( sampleChannel1 < 0 ) { stateChannel1 = -1; } else if( sampleChannel1 == 0 ) { stateChannel1 = 0; } else { stateChannel1 = 1; } } else if( stateChannel1 < 0 ) { if( sampleChannel1 > 0 ) { zcChannel1++; if( sampleChannel1 < 0 ) { stateChannel1 = -1; } else if( sampleChannel1 == 0 ) { stateChannel1 = 0; } else { stateChannel1 = 1; } } } else if( stateChannel1 > 0 ) { if( sampleChannel1 < 0 ) { zcChannel1++; if( sampleChannel1 < 0 ) { stateChannel1 = -1; } else if( sampleChannel1 == 0 ) { stateChannel1 = 0; } else { stateChannel1 = 1; } } } // Channel 2 if( stateChannel2 == 0 ) { if( sampleChannel2 < 0 ) { stateChannel2 = -1; } else if( sampleChannel2 == 0 ) { stateChannel2 = 0; } else { stateChannel2 = 1; } } else if( stateChannel2 < 0 ) { if( sampleChannel2 > 0 ) { zcChannel2++; if( sampleChannel2 < 0 ) { stateChannel2 = -1; } else if( sampleChannel2 == 0 ) { stateChannel2 = 0; } else { stateChannel2 = 1; } } } else if( stateChannel2 > 0 ) { if( sampleChannel2 < 0 ) { zcChannel2++; if( sampleChannel2 < 0 ) { stateChannel2 = -1; } else if( sampleChannel2 == 0 ) { stateChannel2 = 0; } else { stateChannel2 = 1; } } } } // Write back features to Feature-FIFO featureFifo.write(zcChannel1); featureFifo.write(zcChannel2); } }
void nufft_top_pyr(hls::stream<t_input_complex> &sig, hls::stream<t_disp_scalar> &dispFilter, hls::stream<t_nufft_output_complex> &sigStreamOutH, hls::stream<t_nufft_output_complex> &sigStreamOutL0, hls::stream<t_nufft_output_complex> &sigStreamOutLA, hls::stream<t_nufft_output_complex> &sigStreamOutLP) { #pragma HLS inline off /* #ifndef NUFFTB #pragma HLS INTERFACE axis port=sig #pragma HLS INTERFACE axis port=dispFilter #pragma HLS INTERFACE axis port=sigStreamOutH #pragma HLS INTERFACE axis port=sigStreamOutL0 #pragma HLS INTERFACE axis port=sigStreamOutLA #pragma HLS INTERFACE axis port=sigStreamOutLP #endif */ #pragma HLS data_pack variable=sig #pragma HLS DATAFLOW hls::stream<t_input_complex> sigH; hls::stream<t_input_complex> sigL0; hls::stream<t_input_complex> sigLA; hls::stream<t_disp_scalar > disp0; hls::stream<t_disp_scalar > disp1; hls::stream<t_disp_scalar > disp2; #pragma HLS data_pack variable=sigH #pragma HLS data_pack variable=sigL0 #pragma HLS data_pack variable=sigLA #pragma HLS data_pack variable=sigStreamOutH #pragma HLS data_pack variable=sigStreamOutL0 #pragma HLS data_pack variable=sigStreamOutLA #pragma HLS data_pack variable=sigStreamOutLP #pragma HLS stream variable=disp0 depth=512 #pragma HLS stream variable=disp1 depth=512 #pragma HLS stream variable=disp2 depth=512 #pragma HLS stream variable=sigH depth=512 #pragma HLS stream variable=sigL0 depth=512 #pragma HLS stream variable=sigLA depth=512 //#pragma HLS stream variable=sigStreamOutH depth=512 //#pragma HLS stream variable=sigStreamOutL0 depth=512 //#pragma HLS stream variable=sigStreamOutLA depth=490 //#pragma HLS stream variable=sigStreamOutLP depth=64 hls::stream<t_input_complex> sigInMem; #pragma HLS data_pack variable=sigInMem #pragma HLS stream variable=sigInMem depth=1520 for(int coefIdx = 0;coefIdx < 1520; coefIdx++) { #pragma HLS pipeline sigInMem.write(sig.read()); } int l = 0; int i = 0; for(int coefIdx = 0;coefIdx < 1520; coefIdx++) { #pragma HLS pipeline t_input_complex v = sigInMem.read(); if (coefIdx >=0 && coefIdx < climits[1]) sigH.write(v); if (coefIdx >=climits[1] && coefIdx < climits[2]) sigL0.write(v); if (coefIdx >=climits[2] && coefIdx < climits[6]) sigLA.write(v); if (coefIdx >=climits[6]) sigStreamOutLP.write(v); t_disp_scalar dispVal = dispFilter.read(); if (coefIdx >=0 && coefIdx < climits[1]) disp0.write(dispVal); if (coefIdx >=climits[1] && coefIdx < climits[2]) disp1.write(dispVal); if (coefIdx >=climits[2] && coefIdx < climits[6]) disp2.write(dispVal); } part1: nufft_top<C, 512>( sigH, disp0, sigStreamOutH, 512,255); part2: nufft_top<C, 512>( sigL0, disp1, sigStreamOutL0, 512,255); part3: //const int limits[] = { 512, 512,256,128,64,32,16}; //const int Klimits[] = { 255, 255, 127, 63, 31, 15, 3}; for(int k=0;k<4;k++) { //#pragma HLS DATAFLOW const int limit = 256>>k; const int klimit = 127 >> k; int level = k; nufft_top<C, 256>(sigLA, disp2, sigStreamOutLA, limit, level, klimit); } // for(int k=2;k<6;k++) { // nufft_top<C, 256>(sigL[2], disp2, sigStreamOutLA, limits[k],k-1,Klimits[k]); // } //if (disp0.) }