void pricing(hls::stream<float> in, hls::stream<float> out, hls::stream<float> out2, float strike_price) {
	//#pragma HLS PIPELINE II=1

	const int BLOCK = 64;
	static ap_uint<32> res_cnt[BLOCK];
	static float res_sum[BLOCK];
	static float res_prod[BLOCK];

	for (int i = 0; i < BLOCK; ++i) {
		#pragma HLS PIPELINE II=1
		float path = in.read();
		float res = max_0(hls::expf(path) - strike_price);


		ap_uint<32> l_cnt = res_cnt[i];
		float l_sum = res_sum[i];
		float l_prod = res_prod[i];

		ap_uint<32> n_cnt = l_cnt + 1;
		float delta = res - l_sum;
		float n_sum = l_sum + delta / n_cnt;
		float n_prod = l_prod + delta * (res - n_sum);

		res_cnt[i] = n_cnt;
		res_sum[i] = n_sum;
		res_prod[i] = n_prod;


		out.write(res_sum[i]);
		out2.write(res_prod[i]);
	}

	//std::max(0.f, hls::expf(path) - strike_price);
}
예제 #2
0
void dut(
    hls::stream<bit32_t> &strm_in,
    hls::stream<bit32_t> &strm_out
)
{
  // -----------------------------
  // YOUR CODE GOES HERE
  // -----------------------------
  digit  in_digit;
  bit4_t out_bit4;

  // ------------------------------------------------------
  // Input processing
  // ------------------------------------------------------
  // read the two input 32-bit words (low word first)
  bit32_t input_lo = strm_in.read();
  bit32_t input_hi = strm_in.read();

  // Convert input raw bits to digit 49-bit representation via bit slicing
  in_digit(31, 0) = input_lo;
  in_digit(in_digit.length()-1, 32) = input_hi;

  // ------------------------------------------------------
  // Call digitrec
  // ------------------------------------------------------
  out_bit4 = digitrec( in_digit );

  // ------------------------------------------------------
  // Output processing
  // ------------------------------------------------------

  // Write out the recognized digit (0-9)
  strm_out.write( out_bit4(out_bit4.length()-1, 0) );
}
예제 #3
0
void fir_sw(hls::stream<int> &input_val, hls::stream<int> &output_val)
{
	int i;
	static short shift_reg[TAPS] = {0};
	const short coeff[TAPS] = {6,0,-4,-3,5,6,-6,-13,7,44,64,44,7,-13,
                                 -6,6,5,-3,-4,0,6};

	for(i=0; i < RUN_LENGTH; i++){
		int sample;
		sample = input_val.read();

		//Shift Register
		for(int j=0; j < TAPS-1; j++){
			shift_reg[j] = shift_reg[j+1];
		}
		shift_reg[TAPS-1] = sample;

		//Filter Operation
		int acc = 0;
		for(int k=0; k < TAPS; k++){
			acc += shift_reg[k] * coeff[k];
		}
		output_val.write(acc);
	}
}
예제 #4
0
/**
 * Write the directions as u32 to the output AXI stream
 */
void writeDirections(hls::stream<uint32>& out, uint16 numDirections) {
#pragma HLS INLINE

	out.write(numDirections);

	uint16 directionIx = 0;

	writeDirectionsLoop: for (uint8 i = 0; i < MAX_NUM_DIRECTIONS / DIRECTIONS_IN_BUS; i++) {
#pragma HLS LOOP_TRIPCOUNT min=8 max=32 // Min: MIN_NUM_DIRECTIONS / DIRECTIONS_IN_BUS (8) Max: MAX_NUM_DIRECTIONS / DIRECTIONS_IN_BUS (32) Actual: numDirections
		uint32 output = 0;

		// Compress from a series of u8 to a smaller series of u32 with padding
		writeDirectionsShiftLoop: for (uint8 j = 0; j < DIRECTIONS_IN_BUS; j++) {
#pragma HLS UNROLL
			output <<= DIRECTION_SIZE;

			uint8 padChar = 0;
			output |= i * DIRECTIONS_IN_BUS + j < numDirections ? directions[directionIx + j] : padChar;
		}

		out.write(output);

		directionIx += DIRECTIONS_IN_BUS;

		if (directionIx >= numDirections) {
			break;
		}
	}
}
예제 #5
0
void toplevel(hls::stream<uint32>& in, hls::stream<uint32>& out) {

#pragma HLS INTERFACE ap_fifo port=in
#pragma HLS INTERFACE ap_fifo port=out
#pragma HLS RESOURCE variable=in core=AXI4Stream
#pragma HLS RESOURCE variable=out core=AXI4Stream
#pragma HLS INTERFACE ap_ctrl_none port=return

#pragma HLS ARRAY_PARTITION variable=openings complete dim=1
#pragma HLS ARRAY_PARTITION variable=inGrid complete dim=1
#pragma HLS ARRAY_MAP variable=directions instance=instance1 horizontal
#pragma HLS ARRAY_MAP variable=tile instance=instance1 horizontal

	uint8 tileCoords = in.read();
	uint8 tileSize = in.read();
	uint8 tileDataLen = in.read(); // Number of 32-bit bits
	readData(in, tileDataLen);

	uint8 numOpenings = findOpenings(tileSize);
	out.write(tileCoords);
	out.write(numOpenings);

	if (numOpenings > 0) {
		writeEntrance(out);
		findDeadEnds(tileSize);
		uint16 numDirections = findPath(tileSize);
		writeDirections(out, numDirections);
	}
}
void antithetic(
		hls::stream<float> &rn_in,
		hls::stream<float> &rn_out_1,
		hls::stream<float> &rn_out_2)
{
	#pragma HLS interface ap_fifo port=rn_in
	#pragma HLS resource core=AXI4Stream variable=rn_in

	#pragma HLS interface ap_fifo port=rn_out_1
	#pragma HLS resource core=AXI4Stream variable=rn_out_1
	#pragma HLS interface ap_fifo port=rn_out_2
	#pragma HLS resource core=AXI4Stream variable=rn_out_2

	#pragma HLS interface ap_ctrl_none port=return

	//for (int i = 0; i < 10 / 2; ++i) {
	{
	//while (true) {
		#pragma HLS PIPELINE II=2

		float r1 = rn_in.read();
		float r2 = rn_in.read();

		rn_out_1.write(r1);
		rn_out_2.write(r2);

		rn_out_1.write(negate(r1));
		rn_out_2.write(negate(r2));
	}
}
void gauss_transform(
		hls::stream<uint32_t> &uniform_rns,
		hls::stream<float> &gaussian_rns) {
	#pragma HLS interface ap_fifo port=uniform_rns
	#pragma HLS resource core=AXI4Stream variable=uniform_rns

	#pragma HLS interface ap_fifo port=gaussian_rns
	#pragma HLS resource core=AXI4Stream variable=gaussian_rns

	#pragma HLS interface ap_ctrl_none port=return

	float u1, u2, r, z1, z2;
	while (true){
	//for (int i = 0; i < 100/2; ++i) {
		#pragma HLS PIPELINE II=2
		// intervall (0:1]
		u1 = ((float)uniform_rns.read() + 1.f) * (float)(1.0 / 4294967296.0);
		// intervall (0:2PI]
		u2 = ((float)uniform_rns.read() + 1.f) * (float)(2 * M_PI / 4294967296.0);
		r = hls::sqrtf(-2 * hls::logf(u1));
		z1 = r * hls::cosf(u2);
		z2 = r * hls::sinf(u2);
		gaussian_rns.write(z1);
		gaussian_rns.write(z2);
	}

}
void dut(
    hls::stream<bit32_t> &strm_in,
    hls::stream<bit32_t> &strm_out
)
{
  // Declare the input and output variables
  complex<float> out[4096];
  complex<float> complex_In1[4096];
  complex<float> complex_In2[4096];
  float input_data_re = 0;

  //-------------------------------------------------------
  // Input processing
  //-------------------------------------------------------
  // Read the two input 32-bit words
  bit32_t input1_lo;
  bit32_t input2_hi;
  bit32_t output_r;
  bit32_t output_i;
 
  for(int i = 0; i < 4096 ;i++) 
  {
    input1_lo = strm_in.read();
    input2_hi = strm_in.read();
    input_data_re = input1_lo;
    complex_In1[i] = complex<float>(input_data_re, 0);
    input_data_re = input2_hi;
    complex_In2[i] = complex<float>(input_data_re, 0);

  }

//  for(int m = 0; m < 4096 ;m++)
//  {
////    input_data_re = in1[m];
//    complex_In1[m] = complex<float>(80, 0);
////    input_data_re = in2[m];
//    complex_In2[m] = complex<float>(80, 0);
//  }
 
  // ------------------------------------------------------
  // Call Hybrid Imaging
  // ------------------------------------------------------
  hybrid_image(12, complex_In1, complex_In2, out );

  // ------------------------------------------------------
  // Output processing
  // ------------------------------------------------------
  // Write out the computed digit value
  
  for(int i = 0; i < 4096 ;i++)  
  { 
//    printf("%f\n",out[i]);
//    output = out[i];
    output_r = out[i].real();
    output_i = out[i].imag();
    strm_out.write(output_r);
    strm_out.write(output_i );
  }
}
void fill_gaussian_rng_stream(hls::stream<calc_t> &rns, unsigned size) {
	double z0, z1;
	for (unsigned i = 0; i < size; ++i) {
		box_muller(z0, z1);
		rns.write(z0);
		if (++i < size)
			rns.write(z1);
	}
}
// -----------------------------------------------------------
void accumSW(	const uint32_t stepsMC,
				const uint32_t pathsMC,
				hls::stream<float> &inData,
				hls::stream<float> &outAccum )
{
	printf("accumSW\n");

	float sums[ACCUM_ELEM];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS DEPENDENCE variable=sums false

	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		// ------------------------------------------
		resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			sums[i] = (float) 0.0f;
		}

		// ------------------------------------------
		uint8_t index = (uint8_t) 0;

		pathsLoop:for(uint32_t i=0; i<pathsMC; ++i)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			float data = inData.read();
			float oldSum = sums[index];
			float newSum = oldSum + data;

			sums[index] = newSum;

			index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0;
		}

		// ------------------------------------------
		float totalSum = (float) 0.0f;

		totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i)
		{
	#pragma HLS PIPELINE II=1
			totalSum += sums[i];
		}

		// ------------------------------------------
		outAccum.write(totalSum);
	}

	return;
}
예제 #11
0
void fe_wfl(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_fifo port=featureFifo
#pragma HLS INTERFACE ap_fifo port=sampleFifo

    ap_uint<32> data;

    ap_int<32> wflChannel1 = 0;
    ap_int<32> wflChannel2 = 0;

    ap_int<16> sampleChannel1 = 0;
    ap_int<16> sampleChannel2 = 0;

    ap_int<16> prevSampleChannel1 = 0;
    ap_int<16> prevSampleChannel2 = 0;   

    ap_uint<8> cntSamples = 0;
    
    // Wait for Samples to arrive in FIFO
    while( windowSize == 0 ) {

    }

    while(1) {
        wflChannel1 = 0;
        wflChannel2 = 0;

        // Count zero-crossing for channel 1 & 2
        for(cntSamples = 0; cntSamples < windowSize; cntSamples++) {
            // Read data from Sample-FIFO
            // 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel
            data = sampleFifo.read();

            sampleChannel1 = data(15, 0);
            sampleChannel2 = data(31, 16);

            if (cntSamples > 0) {
                wflChannel1 += abs2(sampleChannel1 - prevSampleChannel1);
                wflChannel2 += abs2(sampleChannel2 - prevSampleChannel2);
            }

            prevSampleChannel1 = sampleChannel1;
            prevSampleChannel2 = sampleChannel2;
        }

        // Write back features to Feature-FIFO
        featureFifo.write(wflChannel1);
        featureFifo.write(wflChannel2);
    }
}
// -----------------------------------------------------------
void lsupdate2SW	( 	const uint32_t stepsMC,
						const uint32_t pathsMC,
						const float discount,
						hls::stream<float> &contin_in,
						hls::stream<float> &payoff_in,
						hls::stream<float> &cashFlow_in,
						hls::stream<float> &cashFlow_out,
						hls::stream<float> &cashFlowDisc_out,
						hls::stream<float> &toAccum_out )
{
	printf("lsupdate2SW\n");

	zerosLoop:for(uint32_t path=0; path<pathsMC; ++path)
	{
#pragma HLS PIPELINE II=1 enable_flush
		// ---------------------------------
		// write to outputs
		cashFlow_out.write((float) 0.0f);

		cashFlowDisc_out.write((float) 0.0f);
	}


	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			float continuation = contin_in.read();
			float payoff       = payoff_in.read();

			float cashFlow = cashFlow_in.read();

			float discountedCashFlow = discount * cashFlow;

			// ---------------------------------
			float newY;

			if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) )
				newY = payoff;
			else
				newY = discountedCashFlow;

			// ---------------------------------
			// write to outputs
			if(step < stepsMC)
				cashFlow_out.write(newY);

			if(step < stepsMC)
				cashFlowDisc_out.write(discount * newY);

			if(step == stepsMC)
				toAccum_out.write(newY);
		}
	}

	return;
}
void cflowaccumregio(	const uint32_t pathsMC,
						float *totalSum,
						hls::stream<float> &inData )

{
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=totalSum bundle=CONTROL

#pragma HLS INTERFACE axis register port=inData


	////////////////////////////////////////////////////////////////////////////////////////////////////////////

	//static float sums[N];
	float sums[NELEMENTS];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS dependence variable=sums false

	resetLoop:for(uint8_t i=0; i<NELEMENTS; ++i)
	{
#pragma HLS PIPELINE II=1

		sums[i] = (float) 0.0f;
	}

	uint8_t index = (uint8_t) 0;

	pathsLoop:for(uint32_t i=0; i<pathsMC; ++i)
	{
#pragma HLS PIPELINE II=1

		float data = inData.read();
		float oldSum = sums[index];
		float newSum = oldSum + data;

		sums[index] = newSum;

		index = (index<(NELEMENTS-1))?++index:(uint8_t)0;
	}


	float total = (float) 0.0f;

	totalLoop:for(uint8_t i=0; i<NELEMENTS;++i)
	{
#pragma HLS PIPELINE II=1
		total += sums[i];
	}

	*totalSum = total;

	return;
}
예제 #14
0
//Top-level function
void toplevel(hls::stream<uint32> &input, hls::stream<uint32> &output) {
#pragma HLS INTERFACE ap_fifo port=input
#pragma HLS INTERFACE ap_fifo port=output
#pragma HLS RESOURCE variable=input core=AXI4Stream
#pragma HLS RESOURCE variable=output core=AXI4Stream
#pragma HLS INTERFACE ap_ctrl_none port=return

		uint32 command;

		init();

		side = input.read();
		ntiles = side * side;

		for(u8 t = 0; t < ntiles; t++)
			for (u8 e = 0; e < 4; e++)
				tiles[t][e] = input.read();

		mapcolours();

		// we start off with tile 0 in position 0
	    avail &= ~BIT36(0);

	    seq = 1;
	    while (!terminate) {
	    	if (seq == 1)
	    		solve();

			if (terminate) {
				output.write(0);
				break;
			}

			/* use magic flag to enforce sequencing */
			seq = 0;
			output.write(1);
			if (seq == 0)
				command = input.read();
			seq = 1;

			/* command 0: terminate */
			if (command == 0)
				break;

			/* command 1: write output */
			if (command == 1)
				for (u8 p = 0; p < ntiles; p++)
					for(u8 e = 0; e < 4; e++)
						output.write(colour(p, e));

			/* any other command (canonically 2) will cause search
			 * to continue without output */
			if (seq == 0)
				backtrack();
			seq = 1;
	    }
}
예제 #15
0
/**
 * Read data from the AXI stream and convert from uint32 to uint1
 */
void readData(hls::stream<uint32> &in, uint8 tileDataLen) {
#pragma HLS INLINE

	readLoop: for (uint8 i = 0; i < tileDataLen; i++) {
#pragma HLS LOOP_TRIPCOUNT min=5 max=18 // Min: MIN_TILE_DATA_32 (5) Max: MAX_TILE_DATA_32 (18) Actual: tileDataLen
		inGrid[i] = in.read();

		// Convert data from 18 * 32 => 576 * 1
		readShiftLoop: for (uint8 j = 0; j < BUS_WIDTH; j++) {
#pragma HLS UNROLL

			uint16 ix = i * BUS_WIDTH + (BUS_WIDTH - j - 1); // (BUS_WIDTH - j - 1) necessary to preserve direction
			tile[ix] = (inGrid[i] & (1 << j)) ? 1 : 0;
		}
	}
}
예제 #16
0
/**
 * Write out the entrance coordinates and directions
 */
void writeEntrance(hls::stream<uint32>& out) {
#pragma HLS INLINE

	uint32 output = 0;

	output |= openings[0]; // Entrance row
	output <<= 4;
	output |= openings[1]; // Entrance column
	output <<= 4;
	output |= openings[2]; // Entrance side
	output <<= 4;
	output |= openings[3]; // Exit row
	output <<= 4;
	output |= openings[4]; // Exit col
	output <<= 4;
	output |= openings[5]; // Exit side

	out.write(output);
}
void lsdatagenregio ( const uint32_t stepsMC,
						const uint32_t pathsMC,
						const float K,
						const uint32_t callPut,
						volatile uint32_t *peekStep,
						volatile uint32_t *peekPath,
						hls::stream<float> &stock,
						hls::stream<float> &cashFlow,
						hls::stream<float> &stream_x0,
						hls::stream<float> &stream_x1,
						hls::stream<float> &stream_x2,
						hls::stream<float> &stream_x3,
						hls::stream<float> &stream_x4,
						hls::stream<float> &stream_y,
						hls::stream<float> &stream_yx,
						hls::stream<float> &stream_yx2 )
{
#pragma HLS INTERFACE axis register port=stock
#pragma HLS INTERFACE axis register port=cashFlow

#pragma HLS INTERFACE axis register port=stream_x0
#pragma HLS INTERFACE axis register port=stream_x1
#pragma HLS INTERFACE axis register port=stream_x2
#pragma HLS INTERFACE axis register port=stream_x3
#pragma HLS INTERFACE axis register port=stream_x4
#pragma HLS INTERFACE axis register port=stream_y
#pragma HLS INTERFACE axis register port=stream_yx
#pragma HLS INTERFACE axis register port=stream_yx2

#pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=callPut bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=K bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL


	*peekStep = 0xFFFFFFFF;
	*peekPath = 0xFFFFFFFF;

	stepsLoop:for(uint32_t step=0; step < stepsMC; ++step)
	{
		*peekStep = step;

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
//#pragma HLS PIPELINE II=1 enable_flush
#pragma HLS PIPELINE II=1

			float s = stock.read();
			float cflow = cashFlow.read();

			// ---------------------------------
			// in-the-money calculation
			float diff = (s-K);

			float payoff;

			if(callPut == 0)
				payoff = diff;
			else
				payoff = -diff;

			bool inTheMoney;

			if( payoff > 0.0f )
				inTheMoney = true;
			else
				inTheMoney = false;

			// ---------------------------------
			// basis functions
			float s2 = s*s;

			float x0;
			float x1;
			float x2;
			float y;

			if(inTheMoney == true)
			{
				x0 = (float) 1.0f;
				x1 = (float) s;
				x2 = (float) s2;
				y  = (float) cflow;
			}
			else
			{
				x0 = (float) 0.0f;
				x1 = (float) 0.0f;
				x2 = (float) 0.0f;
				y  = (float) 0.0f;
			}

			// remaining multipliers
			float x3  = x1*x2;
			float x4  = x2*x2;
			float yx  =  y*x1;
			float yx2 =  y*x2;



			// write to streams
			stream_x0.write(x0);
			stream_x1.write(x1);
			stream_x2.write(x2);
			stream_x3.write(x3);
			stream_x4.write(x4);
			stream_y.write(y);
			stream_yx.write(yx);
			stream_yx2.write(yx2);



			//*peekStep = step;
			*peekPath = path;
		}
	}
	return;
}
void pyrconstuct_top (
    std::complex<ap_fixed<16, 1, (ap_q_mode) 5, (ap_o_mode)3, 0> > imgIn[512],
    hls::stream<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > >& pyrFilOut,
    const int nL)
{
    fstream wrapc_switch_file_token;
    wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log");
    int AESL_i;
    if (wrapc_switch_file_token.good())
    {
        static unsigned AESL_transaction_pc = 0;
        string AESL_token;
        string AESL_num;
        static AESL_FILE_HANDLER aesl_fh;

        // define output stream variables: "pyrFilOut"
        std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0;
        int aesl_tmp_1;
        int aesl_tmp_2 = 0;

        // read output stream size: "pyrFilOut"
        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[transaction]]
        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_num); // transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc)
        {
            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // pop_size
            aesl_tmp_1 = atoi(AESL_token.c_str());
            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[/transaction]]
        }

        // output port post check: "pyrFilOut_V"
        aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // [[transaction]]
        if (AESL_token != "[[transaction]]")
        {
            exit(1);
        }
        aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_num); // transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc)
        {
            aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data

            std::vector<sc_bv<34> > pyrFilOut_V_pc_buffer;
            int i = 0;

            while (AESL_token != "[[/transaction]]")
            {
                bool no_x = false;
                bool err = false;

                // search and replace 'X' with "0" from the 1st char of token
                while (!no_x)
                {
                    size_t x_found = AESL_token.find('X');
                    if (x_found != string::npos)
                    {
                        if (!err)
                        {
                            cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl;
                            err = true;
                        }
                        AESL_token.replace(x_found, 1, "0");
                    }
                    else
                    {
                        no_x = true;
                    }
                }

                no_x = false;

                // search and replace 'x' with "0" from the 3rd char of token
                while (!no_x)
                {
                    size_t x_found = AESL_token.find('x', 2);

                    if (x_found != string::npos)
                    {
                        if (!err)
                        {
                            cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl;
                            err = true;
                        }
                        AESL_token.replace(x_found, 1, "0");
                    }
                    else
                    {
                        no_x = true;
                    }
                }

                // push token into output port buffer
                if (AESL_token != "")
                {
                    pyrFilOut_V_pc_buffer.push_back(AESL_token.c_str());
                    i++;
                }

                aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data or [[/transaction]]

                if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_pyrFilOut_V))
                {
                    exit(1);
                }
            }

            // correct the buffer size the current transaction
            if (i != aesl_tmp_1)
            {
                aesl_tmp_1 = i;
            }

            if (aesl_tmp_1 > 0 && aesl_tmp_0.size() < aesl_tmp_1)
            {
                int aesl_tmp_0_size = aesl_tmp_0.size();

                for (int tmp_aesl_tmp_0 = 0; tmp_aesl_tmp_0 < aesl_tmp_1 - aesl_tmp_0_size; tmp_aesl_tmp_0++)
                {
                    std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > tmp;
                    aesl_tmp_0.push_back(tmp);
                }
            }

            // ***********************************
            if (i > 0)
            {
                // RTL Name: pyrFilOut_V
                {
                    // bitslice(16, 0)
                    // {
                    // celement: pyrFilOut.V._M_real.V(16, 0)
                    // {
                    sc_lv<17>* pyrFilOut_V__M_real_V_lv0_0_1519_1 = new sc_lv<17>[1520];
                    // }
                    // }
                    // bitslice(33, 17)
                    // {
                    // celement: pyrFilOut.V._M_imag.V(16, 0)
                    // {
                    sc_lv<17>* pyrFilOut_V__M_imag_V_lv0_0_1519_1 = new sc_lv<17>[1520];
                    // }
                    // }

                    // bitslice(16, 0)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_real.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                                {
                                    pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(16, 0));
                                }
                            }
                        }
                    }
                    // bitslice(33, 17)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_imag.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                                {
                                    pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(33, 17));
                                }
                            }
                        }
                    }

                    // bitslice(16, 0)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_real.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                // sub                    : i_0
                                // ori_name               : aesl_tmp_0[i_0].real()
                                // sub_1st_elem           : 0
                                // ori_name_1st_elem      : aesl_tmp_0[0].real()
                                // output_left_conversion : (aesl_tmp_0[i_0].real()).range()
                                // output_type_conversion : (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str()
                                if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                                {
                                    (aesl_tmp_0[i_0].real()).range() = (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str();
                                }
                            }
                        }
                    }
                    // bitslice(33, 17)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_imag.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                // sub                    : i_0
                                // ori_name               : aesl_tmp_0[i_0].imag()
                                // sub_1st_elem           : 0
                                // ori_name_1st_elem      : aesl_tmp_0[0].imag()
                                // output_left_conversion : (aesl_tmp_0[i_0].imag()).range()
                                // output_type_conversion : (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str()
                                if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                                {
                                    (aesl_tmp_0[i_0].imag()).range() = (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str();
                                }
                            }
                        }
                    }
                }
            }
        }

        // push back output stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

        AESL_transaction_pc++;
    }
    else
    {
        static unsigned AESL_transaction;

        static AESL_FILE_HANDLER aesl_fh;

        // "imgIn_M_real_V"
        char* tvin_imgIn_M_real_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_imgIn_M_real_V);

        // "imgIn_M_imag_V"
        char* tvin_imgIn_M_imag_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_imgIn_M_imag_V);

        // "pyrFilOut_V"
        char* tvin_pyrFilOut_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_pyrFilOut_V);
        char* tvout_pyrFilOut_V = new char[50];
        aesl_fh.touch(AUTOTB_TVOUT_pyrFilOut_V);
        char* wrapc_stream_size_out_pyrFilOut_V = new char[50];
        aesl_fh.touch(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V);
        char* wrapc_stream_egress_status_pyrFilOut_V = new char[50];
        aesl_fh.touch(WRAPC_STREAM_EGRESS_STATUS_pyrFilOut_V);

        static INTER_TCL_FILE tcl_file(INTER_TCL);
        int leading_zero;

        // dump stream tvin: "pyrFilOut"
        std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0;
        int aesl_tmp_1 = 0;
        while (!pyrFilOut.empty())
        {
            aesl_tmp_0.push_back(pyrFilOut.read());
            aesl_tmp_1++;
        }

        // [[transaction]]
        sprintf(tvin_imgIn_M_real_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);

        sc_bv<16>* imgIn_M_real_V_tvin_wrapc_buffer = new sc_bv<16>[512];

        // RTL Name: imgIn_M_real_V
        {
            // bitslice(15, 0)
            {
                int hls_map_index = 0;
                // celement: imgIn._M_real.V(15, 0)
                {
                    // carray: (0) => (511) @ (1)
                    for (int i_0 = 0; i_0 <= 511; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : imgIn[i_0].real()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : imgIn[0].real()
                        // regulate_c_name       : imgIn__M_real_V
                        // input_type_conversion : (imgIn[i_0].real()).range().to_string(SC_BIN).c_str()
                        if (&(imgIn[0].real()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<16> imgIn__M_real_V_tmp_mem;
                            imgIn__M_real_V_tmp_mem = (imgIn[i_0].real()).range().to_string(SC_BIN).c_str();
                            imgIn_M_real_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_real_V_tmp_mem.range(15, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < 512; i++)
        {
            sprintf(tvin_imgIn_M_real_V, "%s\n", (imgIn_M_real_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);
        }

        tcl_file.set_num(512, &tcl_file.imgIn_M_real_V_depth);
        sprintf(tvin_imgIn_M_real_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);

        // release memory allocation
        delete [] imgIn_M_real_V_tvin_wrapc_buffer;

        // [[transaction]]
        sprintf(tvin_imgIn_M_imag_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);

        sc_bv<16>* imgIn_M_imag_V_tvin_wrapc_buffer = new sc_bv<16>[512];

        // RTL Name: imgIn_M_imag_V
        {
            // bitslice(15, 0)
            {
                int hls_map_index = 0;
                // celement: imgIn._M_imag.V(15, 0)
                {
                    // carray: (0) => (511) @ (1)
                    for (int i_0 = 0; i_0 <= 511; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : imgIn[i_0].imag()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : imgIn[0].imag()
                        // regulate_c_name       : imgIn__M_imag_V
                        // input_type_conversion : (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str()
                        if (&(imgIn[0].imag()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<16> imgIn__M_imag_V_tmp_mem;
                            imgIn__M_imag_V_tmp_mem = (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str();
                            imgIn_M_imag_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_imag_V_tmp_mem.range(15, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < 512; i++)
        {
            sprintf(tvin_imgIn_M_imag_V, "%s\n", (imgIn_M_imag_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);
        }

        tcl_file.set_num(512, &tcl_file.imgIn_M_imag_V_depth);
        sprintf(tvin_imgIn_M_imag_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);

        // release memory allocation
        delete [] imgIn_M_imag_V_tvin_wrapc_buffer;

        // push back input stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

// [call_c_dut] ---------->

        AESL_ORIG_DUT_pyrconstuct_top(imgIn, pyrFilOut, nL);

        // pop output stream: "pyrFilOut"
        int aesl_tmp_2 = aesl_tmp_1;
        aesl_tmp_1 = 0;
        aesl_tmp_0.clear();
        while (!pyrFilOut.empty())
        {
            aesl_tmp_0.push_back(pyrFilOut.read());
            aesl_tmp_1++;
        }

        // [[transaction]]
        sprintf(tvout_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);

        sc_bv<34>* pyrFilOut_V_tvout_wrapc_buffer = new sc_bv<34>[1520];

        // RTL Name: pyrFilOut_V
        {
            // bitslice(16, 0)
            {
                int hls_map_index = 0;
                // celement: pyrFilOut.V._M_real.V(16, 0)
                {
                    // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                    for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : aesl_tmp_0[i_0].real()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : aesl_tmp_0[0].real()
                        // regulate_c_name       : pyrFilOut_V__M_real_V
                        // input_type_conversion : (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str()
                        if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<17> pyrFilOut_V__M_real_V_tmp_mem;
                            pyrFilOut_V__M_real_V_tmp_mem = (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str();
                            pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(16, 0) = pyrFilOut_V__M_real_V_tmp_mem.range(16, 0);
                        }
                    }
                }
            }
            // bitslice(33, 17)
            {
                int hls_map_index = 0;
                // celement: pyrFilOut.V._M_imag.V(16, 0)
                {
                    // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                    for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : aesl_tmp_0[i_0].imag()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : aesl_tmp_0[0].imag()
                        // regulate_c_name       : pyrFilOut_V__M_imag_V
                        // input_type_conversion : (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str()
                        if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<17> pyrFilOut_V__M_imag_V_tmp_mem;
                            pyrFilOut_V__M_imag_V_tmp_mem = (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str();
                            pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(33, 17) = pyrFilOut_V__M_imag_V_tmp_mem.range(16, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2; i++)
        {
            sprintf(tvout_pyrFilOut_V, "%s\n", (pyrFilOut_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);
        }

        tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2, &tcl_file.pyrFilOut_V_depth);
        sprintf(tvout_pyrFilOut_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);

        // release memory allocation
        delete [] pyrFilOut_V_tvout_wrapc_buffer;

        // dump stream size
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "%d\n", aesl_tmp_1 - aesl_tmp_2);
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[/transaction]] \n");
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);

        // push back output stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

        // release memory allocation: "imgIn_M_real_V"
        delete [] tvin_imgIn_M_real_V;
        // release memory allocation: "imgIn_M_imag_V"
        delete [] tvin_imgIn_M_imag_V;
        // release memory allocation: "pyrFilOut_V"
        delete [] tvout_pyrFilOut_V;
        delete [] tvin_pyrFilOut_V;
        delete [] wrapc_stream_size_out_pyrFilOut_V;

        AESL_transaction++;

        tcl_file.set_num(AESL_transaction , &tcl_file.trans_num);
    }
}
// For next interface change
//TODO(brugger): remove step_size
//TODO(brugger): path_cnt should be uint64_t
//TODO(brugger): add correlation
void heston_kernel_sl(
		// call option
		calc_t log_spot_price,
		calc_t reversion_rate_TIMES_step_size,
		calc_t long_term_avg_vola,
		calc_t vol_of_vol_TIMES_sqrt_step_size,
		calc_t double_riskless_rate, // = 2 * riskless_rate
		calc_t vola_0,
//		calc_t correlation,
//		calc_t time_to_maturity,
	    // both knockout
		calc_t log_lower_barrier_value,
		calc_t log_upper_barrier_value,
		// simulation params
		uint32_t step_cnt,
		calc_t step_size, // = time_to_maturity / step_cnt
		calc_t half_step_size, // = step_size / 2
		calc_t sqrt_step_size, // = sqrt(step_size)
		calc_t barrier_correction_factor, // = BARRIER_HIT_CORRECTION * sqrt_step_size
		uint32_t path_cnt,

		hls::stream<calc_t> &gaussian_rn1,
		hls::stream<calc_t> &gaussian_rn2,
		hls::stream<calc_t> &prices)
{
	#pragma HLS interface ap_none port=log_spot_price
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_spot_price
	#pragma HLS interface ap_none port=reversion_rate_TIMES_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=reversion_rate_TIMES_step_size
	#pragma HLS interface ap_none port=long_term_avg_vola
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=long_term_avg_vola
	#pragma HLS interface ap_none port=vol_of_vol_TIMES_sqrt_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vol_of_vol_TIMES_sqrt_step_size
	#pragma HLS interface ap_none port=double_riskless_rate
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=double_riskless_rate
	#pragma HLS interface ap_none port=vola_0
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vola_0
//	#pragma HLS interface ap_none port=correlation
//	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=correlation
//	#pragma HLS interface ap_none port=time_to_maturity
//	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=time_to_maturity
	#pragma HLS interface ap_none port=log_lower_barrier_value
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_lower_barrier_value
	#pragma HLS interface ap_none port=log_upper_barrier_value
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_upper_barrier_value
	#pragma HLS interface ap_none port=step_cnt
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_cnt
	#pragma HLS interface ap_none port=step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_size
	#pragma HLS interface ap_none port=half_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=half_step_size
	#pragma HLS interface ap_none port=sqrt_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=sqrt_step_size
	#pragma HLS interface ap_none port=barrier_correction_factor
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=barrier_correction_factor
	#pragma HLS interface ap_none port=path_cnt
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=path_cnt

	#pragma HLS interface ap_fifo port=gaussian_rn1
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn1
	#pragma HLS interface ap_fifo port=gaussian_rn2
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn2
	#pragma HLS interface ap_fifo port=prices
	#pragma HLS resource core=AXI4Stream variable=prices

	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=return

	////////////////////////////////////////////////////////////////////////////////////////////////////////////

	state_t states[BLOCK_SIZE];
	#pragma HLS data_pack variable=states

	for (uint32_t block = 0; block < path_cnt; block += BLOCK_SIZE) {
		for (uint32_t step = 0; step != step_cnt; ++step) {
			// TODO(brugger): use data type with less bits for inner counter
			for (uint32_t i = 0; i != BLOCK_SIZE; ++i) {
				#pragma HLS PIPELINE II=1

				state_t l_state;
				// initialize
				if (step == 0) {
					l_state.stock = log_spot_price;
					l_state.vola = vola_0;
					l_state.barrier_hit = false;
				} else {
					l_state = states[i];
				}

				// calcualte next step
				state_t n_state;
				calc_t max_vola = MAX((calc_t) 0., l_state.vola);
				calc_t sqrt_vola = hls::sqrtf(max_vola);
				n_state.stock = l_state.stock + (double_riskless_rate - max_vola) *
						half_step_size + sqrt_step_size * sqrt_vola *
						gaussian_rn1.read();
				n_state.vola = l_state.vola + reversion_rate_TIMES_step_size *
						(long_term_avg_vola - max_vola) +
						vol_of_vol_TIMES_sqrt_step_size * sqrt_vola *
						(calc_t) gaussian_rn2.read();
				calc_t barrier_correction = barrier_correction_factor *
						sqrt_vola;
				#pragma HLS RESOURCE variable=barrier_correction \
						core=FMul_meddsp
				n_state.barrier_hit = l_state.barrier_hit |  (n_state.stock <
						log_lower_barrier_value + barrier_correction) |
						(n_state.stock > log_upper_barrier_value -
						barrier_correction);
				states[i] = n_state;

				// write out
				if (step + 1 == step_cnt && (block + i) < path_cnt)
					prices.write(n_state.barrier_hit ?
							-std::numeric_limits<calc_t>::infinity() :
							n_state.stock);

			}
		}
	}
}
void hls_cropping_strm ( hls::stream< ap_int<8> > & src,  hls::stream< ap_int<16> > & dst) {

    fstream wrapc_switch_file_token;

    wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log");

    int AESL_i;

    if (wrapc_switch_file_token.good()) {

        static unsigned AESL_transaction_pc;

        string AESL_token;

        string AESL_num;

        static AESL_FILE_HANDLER aesl_fh;

        aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[transaction]]

        aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //pop_size

            int aesl_tmp_1 = atoi(AESL_token.c_str());

            for (int i = 0 ; i < aesl_tmp_1  ; i++) {

                src.read();

            }

            aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[/transaction]]

        }

        int aesl_tmp_4;

        int aesl_tmp_5 = 0;

        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[transaction]]

        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //pop_size

            aesl_tmp_4 = atoi(AESL_token.c_str());

            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[/transaction]]

        }

        std::vector<ap_int<16> > aesl_tmp_3;

        aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //[[transaction]]

        if ( AESL_token != "[[transaction]]") {

           exit(1);

        }

        aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data

            std::vector < sc_bv<16> > dst_V_V_pc_buffer;

            int i = 0;

            while (AESL_token != "[[/transaction]]") {

                bool no_x = false;

                bool err = false;

                while (!no_x) {

                size_t x_found = AESL_token.find('X');

                if (x_found != string::npos) {

                    if (!err) {

                        cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; 

                        err = true;

                    }

                    AESL_token.replace(x_found, 1, "0");

                } else {

                    no_x = true;

                }

                }

                no_x = false;

                while (!no_x) {

                size_t x_found = AESL_token.find('x', 2);

                if (x_found != string::npos) {

                    if (!err) {

                        cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; 

                        err = true;

                    }

                    AESL_token.replace(x_found, 1, "0");

                } else {

                    no_x = true;

                }

                }

                if (AESL_token != "") {

                    dst_V_V_pc_buffer.push_back( AESL_token.c_str() );

                    i++;

                }

                aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data or [[/transaction]]

                if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_dst_V_V)) {

                   exit(1);

                }

            }

            if (i != aesl_tmp_4) {

               aesl_tmp_4 = i;

            }

            if (aesl_tmp_4 > 0 && aesl_tmp_3.size() < aesl_tmp_4) {

                int aesl_tmp_3_size = aesl_tmp_3.size();

                for (int tmp_aesl_tmp_3 = 0 ; tmp_aesl_tmp_3 < aesl_tmp_4 - aesl_tmp_3_size ; tmp_aesl_tmp_3 ++ ) {

                    ap_int<16> tmp;

                    aesl_tmp_3.push_back(tmp);

                }

            }

            if (i > 0) {

                sc_lv<16> *dst_V_V_lv0_0_0_1 = new sc_lv<16>[aesl_tmp_4 - aesl_tmp_5];

                AESL_i = 0; //subscript for rtl array

                for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

                    if(&(aesl_tmp_3[0]) != 0) {

                       dst_V_V_lv0_0_0_1[0 + AESL_i].range(15, 0) = sc_bv<16>(dst_V_V_pc_buffer[0 + AESL_i].range(15, 0));

                    }

                    AESL_i++;

                }

                AESL_i = 0; //subscript for rtl array

                for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

                    if(&(aesl_tmp_3[0]) != 0) {

                       aesl_tmp_3[i_0] = (dst_V_V_lv0_0_0_1[0 + AESL_i]).to_string(SC_BIN).c_str();

                    }

                    AESL_i++;

                }

                }

        }

        for (int i = 0; i < aesl_tmp_4; i++) {

            dst.write(aesl_tmp_3[i]);

        }

        AESL_transaction_pc ++ ;

    } else {

        static unsigned AESL_transaction;

        static AESL_FILE_HANDLER aesl_fh;

        char* tvin_src_V_V = new char[50];

        char* wrapc_stream_size_in_src_V_V = new char[50];

        char* tvout_dst_V_V = new char[50];

        char* tvin_dst_V_V = new char[50];

        aesl_fh.touch(AUTOTB_TVIN_dst_V_V);

        char* wrapc_stream_size_out_dst_V_V = new char[50];

        static INTER_TCL_FILE tcl_file(INTER_TCL);


        int leading_zero;

        std::vector<ap_int<8> > aesl_tmp_0;

        int aesl_tmp_1 = 0;

        while (!src.empty()) {

            aesl_tmp_0.push_back(src.read());

            aesl_tmp_1 ++;

        }

        std::vector<ap_int<16> > aesl_tmp_3;

        int aesl_tmp_4 = 0;

        while (!dst.empty()) {

            aesl_tmp_3.push_back(dst.read());

            aesl_tmp_4 ++;

        }

        for (int i = 0; i < aesl_tmp_1; i++) {

            src.write(aesl_tmp_0[i]);

        }

        AESL_ORIG_DUT_hls_cropping_strm(src,dst);

        int aesl_tmp_2 = src.size();

        int aesl_tmp_5 = aesl_tmp_4;

        while (!dst.empty()) {

            aesl_tmp_3.push_back(dst.read());

            aesl_tmp_4 ++;

        }

        sprintf(tvin_src_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        sc_bv<8> *src_V_V_tvin_wrapc_buffer = new sc_bv<8>[aesl_tmp_1 - aesl_tmp_2];

        AESL_i = 0; //subscript for rtl array

        for (int i_0 = 0; i_0 <= aesl_tmp_1 - aesl_tmp_2 - 1 ; i_0+= 1) {

        sc_lv<8> src_V_V_tmp_mem; 

            if(&(aesl_tmp_0[0]) != 0) {

            src_V_V_tmp_mem = (aesl_tmp_0[i_0]).to_string(2).c_str();

               src_V_V_tvin_wrapc_buffer[0 + AESL_i].range(7, 0) = src_V_V_tmp_mem.range(7, 0 ) ;

            }

            AESL_i++;

        }

        for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2 ; i++) {

            sprintf(tvin_src_V_V, "%s\n", (src_V_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());

            aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        }

        tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2,&tcl_file.src_V_V_depth);

        sprintf(tvin_src_V_V, "[[/transaction]] \n");

        aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        delete [] src_V_V_tvin_wrapc_buffer;

        sprintf(wrapc_stream_size_in_src_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(wrapc_stream_size_in_src_V_V, "%d\n", aesl_tmp_1 - aesl_tmp_2);

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(wrapc_stream_size_in_src_V_V, "[[/transaction]] \n");

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(tvout_dst_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        sc_bv<16> *dst_V_V_tvout_wrapc_buffer = new sc_bv<16>[aesl_tmp_4 - aesl_tmp_5];

        AESL_i = 0; //subscript for rtl array

        for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

        sc_lv<16> dst_V_V_tmp_mem; 

            if(&(aesl_tmp_3[0]) != 0) {

            dst_V_V_tmp_mem = (aesl_tmp_3[i_0]).to_string(2).c_str();

               dst_V_V_tvout_wrapc_buffer[0 + AESL_i].range(15, 0) = dst_V_V_tmp_mem.range(15, 0 ) ;

            }

            AESL_i++;

        }

        for (int i = 0; i < aesl_tmp_4 - aesl_tmp_5 ; i++) {

            sprintf(tvout_dst_V_V, "%s\n", (dst_V_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str());

            aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        }

        tcl_file.set_num(aesl_tmp_4 - aesl_tmp_5,&tcl_file.dst_V_V_depth);

        sprintf(tvout_dst_V_V, "[[/transaction]] \n");

        aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        delete [] dst_V_V_tvout_wrapc_buffer;

        sprintf(wrapc_stream_size_out_dst_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        sprintf(wrapc_stream_size_out_dst_V_V, "%d\n", aesl_tmp_4 - aesl_tmp_5);

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        sprintf(wrapc_stream_size_out_dst_V_V, "[[/transaction]] \n");

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        for (int i = 0; i < aesl_tmp_4; i++) {

            dst.write(aesl_tmp_3[i]);

        }

        delete [] tvin_src_V_V;

        delete [] wrapc_stream_size_in_src_V_V;

        delete [] tvout_dst_V_V;

        delete [] tvin_dst_V_V;

        delete [] wrapc_stream_size_out_dst_V_V;

        AESL_transaction++;

        tcl_file.set_num(AESL_transaction , &tcl_file.trans_num);

    }
}
void lsupdate1SW  ( const uint32_t stepsMC,
					const uint32_t pathsMC,
					const float K,
					const uint32_t callPut,
					hls::stream<float> &stock,
					hls::stream<float> &b0_in,
					hls::stream<float> &b1_in,
					hls::stream<float> &b2_in,
					hls::stream<float> &contin_out,
					hls::stream<float> &payoff_out )
{
	printf("lsupdate1SW\n");
	printf("lsupdateSW\n");
	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		// ---------------------------------
		// continuation value
		float b0;
		float b1;
		float b2;

		if(step == 0)
		{
			b0 = (float) 0.0f;
			b1 = (float) 0.0f;
			b2 = (float) 0.0f;
		}
		else
		{
			b0 = b0_in.read();
			b1 = b1_in.read();
			b2 = b2_in.read();
		}

		// -------------------------------------

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			float s = stock.read();

			// ---------------------------------
			// payoff calculation
			float diff = (s-K);

			float callPutDiff;

			if(callPut == 0)
				callPutDiff = diff;
			else
				callPutDiff = -diff;

			float payoff = fmaxf(callPutDiff, (float) 0.0f);

			// ---------------------------------
			// basis functions
			float s2 = s*s;

			float x0 = (float) 1.0f;
			float x1 = s;
			float x2 = s2;

			// ---------------------------------
			// continuation value

			float continuation = b0*x0 + b1*x1 + b2*x2;

			// ---------------------------------
			// write to output
			contin_out.write(continuation);
			payoff_out.write(payoff);

		}
	}

	return;
}
예제 #22
0
void pyrconstuct_top(
			  cmpxDataIn imgIn[IMG_WIDTH],
		      //hls::stream<t_image> &imgIn,
			  hls::stream<t_pyr_complex> &pyrFilOut,
		      const int nL
		      ) {
#pragma HLS interface ap_fifo depth=512 port=imgIn
#pragma HLS interface ap_fifo depth=1520 port=pyrFilOut
#pragma HLS data_pack variable=pyrFilOut

	cmpxDataIn imgInTmp[FFT_LENGTH];
	cmpxDataOut imgOutTmpFFTStream[FFT_LENGTH];
	cmpxDataOut imgOutTmpBlockRam[FFT_LENGTH];
	cmpxDataOut  fftPyrOut[1520];
#pragma HLS STREAM variable=imgInTmp depth=512 dim=1
#pragma HLS STREAM variable=imgOutTmpFFTStream depth=512 dim=1
#pragma HLS STREAM variable=fftPyrOut depth=1520 dim=1

	LPH: for(int l=0;l<8;l++){
//#pragma HLS DATAFLOW
		const int	cidx = climits[l];
		const int	nlimit = limits[l];
		const int	fsize = fsizes[l];
		const int	lshift = lshifts[l];
		bool direction =false;
		bool ovflo;
		if( l ==0){
					direction = true;
					for(int i=0;i<512;i++) {
					#pragma HLS pipeline
								//cmpxDataIn val (0,0);
									//val.real() = imgIn[i].real();
									//val.imag() =imgIn[i].real()
								imgInTmp[i] = imgIn[i];
							}
				  }
		else {
			for(int i=0;i<512;i++) {
			#pragma HLS pipeline
						cmpxDataIn val (0,0);
						if (i<nlimit) {
							val.real() = (fftPyrOut[cidx + i ].real() >> lshift);
							val.imag() = (fftPyrOut[cidx + i ].imag() >> lshift);
						}
						imgInTmp[i] = val;
					}
					}
		fft_top<config2_t,IMG_WIDTH,cmpxDataIn,cmpxDataOut>(direction,imgIn,imgOutTmpFFTStream,&ovflo,nlimit);
		if(l==0){
			dummy_proc2(imgOutTmpFFTStream, imgOutTmpBlockRam);
			pyrbuild_top(imgOutTmpBlockRam, fftPyrOut, 512, 512);

#ifndef __SYNTHESIS__
				{
					FILE * fo = fopen("fftOut.txt", "wb");
					for(int i=0;i<512;i++) {
						fprintf(fo, "%.8f %.8f\n", imgOutTmpFFTStream[i].real().to_float(), imgOutTmpFFTStream[i].imag().to_float());
					}
					fclose(fo);
				}
				{
					FILE * fo = fopen("fftOutFilter.txt", "wb");
					for(int i=0;i<1520;i++) {
						fprintf(fo, "%.8f %.8f\n", fftPyrOut[i].real().to_float(), fftPyrOut[i].imag().to_float());
					}
				}
			/*
				for(int i=0;i<512;i++)
					std::cout << "FFT Out " << i << " : " << imgOutTmp[i] << std::endl;

			*/
#endif
		}//end of input
		else{
		for(int i=0;i<512;i++) {
#pragma HLS pipeline
			if(i<nlimit){
			t_pyr_complex val;
			cmpxDataOut2 tmp = imgOutTmpFFTStream[i];
			val.real() = tmp.real() >> pshifts[l];
			val.imag() = tmp.imag() >> pshifts[l];
			pyrFilOut.write(val);
				}
			}
		}
예제 #23
0
void receive(
             hls::stream<t_s_xgmii> &s_xgmii,
             hls::stream<t_axis> &m_axis,
             hls::stream<t_rx_status> &rx_status
             )
{
#ifdef RELEASE
#pragma HLS interface ap_ctrl_none port=return
#pragma HLS data_pack variable=s_xgmii
#endif

#pragma HLS INTERFACE axis port=m_axis
#pragma HLS data_pack variable=rx_status

    int i;
    t_s_xgmii cur = {0, 0};
    t_s_xgmii precur = {0, 0};
    t_s_xgmii last_word;
    ap_uint<32> crc_state = 0xffffffff;
    ap_uint<16> frm_cnt = 0;
    int data_err = 0;
    ap_uint<16> len_type = 0xffff;
    int last_user_byte_lane_before_frame_end;
    int user_data_end_detected = 0;
    int last_user_word_pos;
    ap_uint<3> last_user_byte_lane;

 MAIN: while (1) {

        if (user_data_end_detected) {
            int fcs_err = (crc_state != 0x00000000);
            int len_err = 0;
            int frm_byte_cnt = frm_cnt*8 + last_user_byte_lane_before_frame_end + 1 + 4;
            int under = (frm_byte_cnt < 64);
            int over = (frm_byte_cnt > 1500);

            if (is_len(len_type)) {
                if (len_type > 2) {
                    len_err = (len_type != ((last_user_word_pos - 2) * 8 + 2 + last_user_byte_lane + 1));
                } else {
                    len_err = (len_type != ((last_user_word_pos - 2) * 8 + 2 + 8 - (last_user_byte_lane + 1)));
                }
            }

            int good = !(fcs_err | len_err | data_err | under | over);

            m_axis.write((t_axis){last_word.rxd, mask_up_to_bit(8, last_user_byte_lane), !good, 1});
            rx_status.write((t_rx_status) {frm_cnt, good, 0, 0, under, len_err, fcs_err, data_err, 0, over});
        }

        cur = precur;
        if (!s_xgmii.read_nb(precur)) return;

        frm_cnt = 0;
        crc_state = 0xffffffff;
        len_type = 0xffff;

    IDLE_AND_PREAMBLE: while (!((cur.rxc == 0x01) && (cur.rxd == 0xd5555555555555fb))) {
#pragma HLS LATENCY max=0 min=0
    		cur = precur;
    		if (!s_xgmii.read_nb(precur)) return;
            // printf("RXD 0x%016lx, RXC 0x%02x\n", cur.rxd.to_long(), cur.rxc.to_int());
        }

		cur = precur;
		if (!s_xgmii.read_nb(precur)) return;
        // printf("RXD 0x%016lx, RXC 0x%02x\n", cur.rxd.to_long(), cur.rxc.to_int());

        user_data_end_detected = 0;
        int frame_end_detected = 0;
    USER_DATA: do {
#pragma HLS LATENCY max=0 min=0
            ap_uint<8> crc_field_mask;
    		// END-OF-FRAME detection
            if (cur.rxc != 0x00) {
                switch(cur.rxc) {
                case 0xe0 : last_user_byte_lane_before_frame_end = 0; crc_field_mask = 0x1e; break;
                case 0xc0 : last_user_byte_lane_before_frame_end = 1; crc_field_mask = 0x3c; break;
                case 0x80 : last_user_byte_lane_before_frame_end = 2; crc_field_mask = 0x78; break;
                default: crc_field_mask = 0xff; break;
                }
                frame_end_detected = 1;
                if (!user_data_end_detected) {
                    last_word = cur;
                    last_user_byte_lane = last_user_byte_lane_before_frame_end;
                }
                user_data_end_detected = 1;
            } else if ((precur.rxc != 0x00) && ((ap_uint<8>) ~precur.rxc <= 0x0f)) {
                switch(precur.rxc) {
                case 0xf0 : last_user_byte_lane_before_frame_end = 7; break;
                case 0xf8 : last_user_byte_lane_before_frame_end = 6; crc_field_mask = 0x80; break;
                case 0xfc : last_user_byte_lane_before_frame_end = 5; crc_field_mask = 0xc0; break;
                case 0xfe : last_user_byte_lane_before_frame_end = 4; crc_field_mask = 0xe0; break;
                case 0xff : last_user_byte_lane_before_frame_end = 3; crc_field_mask = 0xf0; frame_end_detected = 1; break;
                }
                if (!user_data_end_detected) {
                    last_word = cur;
                    last_user_byte_lane = last_user_byte_lane_before_frame_end;
                }
                user_data_end_detected = 1;
            } else {
                crc_field_mask = 0x00;
            }

            // END-OF-USER-DATA detection
            if (!user_data_end_detected) {
                if (frm_cnt == 1) {
                    len_type = ((ap_uint<16>) wbyte(cur.rxd, 4) << 8) | wbyte(cur.rxd, 5);
                    if (is_len(len_type)) {
                        // Calculate the position of the last word within the frame
                        // which contains valid user data (based on LENGTH/TYPE field
                        // value. -3 is subtracted before division because first two
                        // bytes of user data are not word aligned, and 1 is subtracted
                        // additionally since we want to find out the last word that
                        // still contains data, not the number of user data words. +2
                        // since first word aligned user data byte starts at word #2.
                        last_user_word_pos = (len_type - 3) / 8 + 2;
                        last_user_byte_lane = (len_type - 3) % 8;

                        if (len_type <= 2) {
                            user_data_end_detected = 1;
                            last_word = cur;
                        }
                    }
                } else if (is_len(len_type)) {
                    if (frm_cnt == last_user_word_pos) {
                        last_word = cur;
                        user_data_end_detected = 1;
                    }
                }
            }

            if (!user_data_end_detected) {
                m_axis.write((t_axis){cur.rxd, 0xff, 0, 0});
                frm_cnt++;
            }

            ap_uint<64> crc_data = 0;
        CRC_MASK_CALC: for (i = 0; i < 8; i++) {
#pragma HLS LOOP unroll
                if (!wbit(cur.rxc, i)) {
                    ap_uint<8> d = wbyte(cur.rxd, i);
                    if (wbit(crc_field_mask,i)) {
                        d = ~d;
                    }

                    crc_data = replace_byte(crc_data, d, i);
                }
            }

            crc32<ap_uint<64>>(crc_data, &crc_state);
            // printf("RXD 0x%016lx, RXC 0x%02x, CRC_DATA 0x%016lx, crc_field_mask 0x%02x, CRC_STATE 0x%08lx, FRMEND %d\n", cur.rxd.to_long(), cur.rxc.to_int(), crc_data.to_long(), crc_field_mask.to_int(), crc_state.to_int(), frame_end_detected);
            //// printf("RXD 0x%016lx, RXC 0x%02x, CRC_STATE 0x%08lx, FRMEND %d\n", cur.rxd.to_long(), cur.rxc.to_int(), crc_state.to_int(), frame_end_detected);

            cur = precur;
            if (!s_xgmii.read_nb(precur)) return;
        } while(!frame_end_detected);

    }
}
예제 #24
0
/*
 * Core that apply a 3x3(Configurable) 2d Convolution, Erode, Dilate on
 * grayscale images
 * http://www.xilinx.com/support/documentation/sw_manuals/xilinx2014_1/ug902-vivado-high-level-synthesis.pdf
 * */
void doImgProc(hls::stream<uint_8_side_channel>& inStream,
               hls::stream<int_8_side_channel> & outStream,
               char                              kernel[KERNEL_DIM * KERNEL_DIM],
               int                               operation)
{
#pragma HLS INTERFACE axis port=inStream
#pragma HLS INTERFACE axis port=outStream
#pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS
#pragma HLS INTERFACE s_axilite port=operation bundle=CRTL_BUS
#pragma HLS INTERFACE s_axilite port=kernel bundle=KERNEL_BUS

  // Defining the line buffer and setting the inter dependency to false through
  // pragmas
  hls::LineBuffer<KERNEL_DIM, IMG_WIDTH, unsigned char> lineBuff;
  hls::Window<KERNEL_DIM, KERNEL_DIM, short> window;

  // Index used to keep track of row,col
  int idxCol       = 0;
  int idxRow       = 0;
  int pixConvolved = 0;

  // Calculate delay to fix line-buffer offset
  int waitTicks  = (IMG_WIDTH * (KERNEL_DIM - 1) + KERNEL_DIM) / 2;// 241;
  int countWait  = 0;
  int sentPixels = 0;


  int_8_side_channel  dataOutSideChannel;
  uint_8_side_channel currPixelSideChannel;

  // Iterate on all pixels for our 320x240 image, the HLS PIPELINE improves our
  // latency
  for (int idxPixel = 0; idxPixel < (IMG_WIDTH * IMG_HEIGHT); idxPixel++) {
#pragma HLS PIPELINE

    // Read and cache (Block here if FIFO sender is empty)
    currPixelSideChannel = inStream.read();

    // Get the pixel data
    unsigned char pixelIn = currPixelSideChannel.data;

    // Put data on the LineBuffer
    lineBuff.shift_up(idxCol);
    lineBuff.insert_top(pixelIn, idxCol); // Will put in val[2] of line buffer
                                          // (Check Debug)

    // Put data on the window and multiply with the kernel
    for (int idxWinRow = 0; idxWinRow < KERNEL_DIM; idxWinRow++) {
      for (int idxWinCol = 0; idxWinCol < KERNEL_DIM; idxWinCol++) {
        // idxWinCol + pixConvolved, will slide the window ...
        short val = (short)lineBuff.getval(idxWinRow, idxWinCol + pixConvolved);

        // Multiply kernel by the sampling window
        val = (short)kernel[(idxWinRow * KERNEL_DIM) + idxWinCol] * val;
        window.insert(val, idxWinRow, idxWinCol);
      }
    }

    // Avoid calculate out of the image boundaries and if we can convolve
    short valOutput = 0;

    if ((idxRow >= KERNEL_DIM - 1) && (idxCol >= KERNEL_DIM - 1)) {
      switch (operation) {
      case 0:

        // Convolution
        valOutput = sumWindow(&window);
        valOutput = valOutput / 8;

        // Avoid negative values
        if (valOutput < 0) valOutput = 0;
        break;

      case 1:

        // Erode
        valOutput = minWindow(&window);
        break;

      case 2:

        // Dilate
        valOutput = maxWindow(&window);
        break;
      }

      pixConvolved++;
    }

    // Calculate row and col index
    if (idxCol < IMG_WIDTH - 1) {
      idxCol++;
    }
    else {
      // New line
      idxCol = 0;
      idxRow++;
      pixConvolved = 0;
    }

    /*
     * Fix the line buffer delay, on a 320x240 image with 3x3 kernel, the delay
     * will be
     * ((240*2) + 3)/2 = 241
     * So we wait for 241 ticks send the results than put more 241 zeros
     */

    // Put data on output stream (side-channel(tlast) way...)

    /*dataOutSideChannel.data = valOutput;
       dataOutSideChannel.keep = currPixelSideChannel.keep;
       dataOutSideChannel.strb = currPixelSideChannel.strb;
       dataOutSideChannel.user = currPixelSideChannel.user;
       dataOutSideChannel.last = currPixelSideChannel.last;
       dataOutSideChannel.id = currPixelSideChannel.id;
       dataOutSideChannel.dest = currPixelSideChannel.dest;

       // Send to the stream (Block if the FIFO receiver is full)
       outStream.write(dataOutSideChannel);*/
    countWait++;

    if (countWait > waitTicks)  {
      dataOutSideChannel.data = valOutput;
      dataOutSideChannel.keep = currPixelSideChannel.keep;
      dataOutSideChannel.strb = currPixelSideChannel.strb;
      dataOutSideChannel.user = currPixelSideChannel.user;
      dataOutSideChannel.last = 0;
      dataOutSideChannel.id   = currPixelSideChannel.id;
      dataOutSideChannel.dest = currPixelSideChannel.dest;

      // Send to the stream (Block if the FIFO receiver is full)
      outStream.write(dataOutSideChannel);
      sentPixels++;
    }
  }

  // Now send the remaining zeros (Just the (Number of delayed ticks)
  for (countWait = 0; countWait < waitTicks; countWait++) {
    dataOutSideChannel.data = 0;
    dataOutSideChannel.keep = currPixelSideChannel.keep;
    dataOutSideChannel.strb = currPixelSideChannel.strb;
    dataOutSideChannel.user = currPixelSideChannel.user;

    // Send last on the last item
    if (countWait < waitTicks - 1) dataOutSideChannel.last = 0;
    else dataOutSideChannel.last = 1;
    dataOutSideChannel.id   = currPixelSideChannel.id;
    dataOutSideChannel.dest = currPixelSideChannel.dest;

    // Send to the stream (Block if the FIFO receiver is full)
    outStream.write(dataOutSideChannel);
  }
}
void heston_kernel_ml(const params_ml params, hls::stream<calc_t> &gaussian_rn1,
		hls::stream<calc_t> &gaussian_rn2, hls::stream<calc_t> &prices) {
	#pragma HLS interface ap_none port=params
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \
			variable=params
	#pragma HLS interface ap_fifo port=gaussian_rn1
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn1
	#pragma HLS interface ap_fifo port=gaussian_rn2
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn2
	#pragma HLS interface ap_fifo port=prices
	#pragma HLS resource core=AXI4Stream variable=prices
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \
			variable=return

	// write block size to stream
	prices.write(BLOCK_SIZE);

	state_t state_coarse[BLOCK_SIZE];
	#pragma HLS data_pack variable=state_coarse
	state_t state_fine[BLOCK_SIZE];
	#pragma HLS data_pack variable=state_fine
	w_both_t w_both[BLOCK_SIZE];
	#pragma HLS data_pack variable=w_both

	ap_uint<6> upper_j = params.ml_constant + (params.do_multilevel ? 1 : 0);

	for (uint32_t path = 0; path < params.path_cnt; path += BLOCK_SIZE) {
		for (uint32_t step = 0; step != params.step_cnt_coarse; ++step) {
			for (ap_uint<6> j = 0; j != upper_j; ++j) {
				for (ap_uint<10> block_i = 0; block_i != BLOCK_SIZE;
						++block_i) {
					#pragma HLS PIPELINE II=1

					bool is_fine = j != params.ml_constant;

					//
					// initialize
					//
					state_t l_state_coarse, l_state_fine;
					w_both_t l_w_both;
					if (step == 0 && j == 0) {
						l_state_coarse = get_init_state(params);
						l_state_fine = get_init_state(params);
						l_w_both = get_w_zero();
					} else {
						l_state_coarse = state_coarse[block_i];
						l_state_fine = state_fine[block_i];
						l_w_both = w_both[block_i];
					}

					//
					// calculate next step
					//
					state_t n_state_coarse, n_state_fine;
					w_both_t n_w_both;

					if (is_fine) {
						// step fine
						float w_stock = gaussian_rn1.read();
						float w_vola = gaussian_rn2.read();
						n_state_fine = get_next_step(params, l_state_fine,
								w_stock, w_vola, true);
						n_state_coarse = l_state_coarse;
						// accumulate random numbers
						n_w_both.w_stock = l_w_both.w_stock + w_stock;
						n_w_both.w_vola = l_w_both.w_vola + w_vola;
					} else {
						// step coarse
						n_state_coarse = get_next_step(params, l_state_coarse,
								l_w_both.w_stock, l_w_both.w_vola, false);
						n_state_fine = l_state_fine;
						n_w_both = get_w_zero();
					}

					state_coarse[block_i] = n_state_coarse;
					state_fine[block_i] = n_state_fine;
					w_both[block_i] = n_w_both;

					//
					// write out
					//
					if ((step + 1 == params.step_cnt_coarse) &&
							(j + 1 >= params.ml_constant)) {
						if (is_fine) {
							prices.write(get_log_price(n_state_fine));
						} else {
							prices.write(get_log_price(n_state_coarse));
						}
					}
				}
			}
		}
	}
}
// -----------------------------------------------------------
void lsupdate2SWvector	( 	const uint32_t stepsMC,
						const uint32_t pathsMC,
						const float discount,
						hls::stream<float> &contin_in,
						hls::stream<float> &payoff_in,
						hls::stream<float> &cashFlowDisc_out,
						hls::stream<float> &toAccum_out )
{
	printf("lsupdate2SWvector\n");

	float cashFlowVector[CASHFLOW_SIZE];
#pragma HLS RESOURCE variable=cashFlowVector core=RAM_2P_BRAM

	// ----------------------------------------------------
	zerosLoop:for(uint32_t path=0; path<pathsMC; ++path)
	{
#pragma HLS PIPELINE II=1 enable_flush

		cashFlowVector[path] = (float) 0.0f;
	}

	// ----------------------------------------------------

	// ----------------------------------------------------
	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			uint32_t indexRead = path;
			uint32_t indexWrite = path;

			float continuation = contin_in.read();
			float payoff       = payoff_in.read();

			float cashFlow = cashFlowVector[indexRead];

			float discountedCashFlow = discount * cashFlow;

			// ---------------------------------
			float newY;

			if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) )
				newY = payoff;
			else
				newY = discountedCashFlow;

			// ---------------------------------
			// write to outputs
			cashFlowVector[indexWrite] = newY;

			if(step < stepsMC)
				cashFlowDisc_out.write(discount * newY);

			if(step == stepsMC)
				toAccum_out.write(newY);

		}
	}

	return;
}
예제 #27
0
void pyrconstuct_top(
			  cmpxDataIn imgIn[IMG_WIDTH],
		      //hls::stream<t_image> &imgIn,
			  hls::stream<t_pyr_complex> &pyrFilOut,
		      const int nL
		      ) {

//#pragma HLS interface ap_fifo depth=1 port=ovflo
#pragma HLS interface ap_fifo depth=512 port=imgIn
#pragma HLS interface ap_fifo depth=1520 port=pyrFilOut


//#pragma HLS data_pack variable=imgIn

#pragma HLS data_pack variable=pyrFilOut
#pragma HLS dataflow

	cmpxDataIn imgInTmp[FFT_LENGTH];
//#pragma HLS RESOURCE variable=imgInTmp core=RAM_2P_BRAM

	cmpxDataOut imgOutTmpFFTStream[FFT_LENGTH];
	cmpxDataOut imgOutTmpBlockRam[FFT_LENGTH];

	cmpxDataOut  fftPyrOut[1520];

	config_t fft_config;
	config2_t fft_config2;
	status_t fft_status;
	status2_t fft_status2;

#pragma HLS data_pack variable=fft_config
#pragma HLS data_pack variable=fft_config2

#pragma HLS STREAM variable=imgInTmp depth=512 dim=1
#pragma HLS STREAM variable=imgOutTmpFFTStream depth=512 dim=1
//#pragma HLS STREAM variable=fftPyrOut depth=1520 dim=1

	fft_config.setDir(true);
	//dummy_proc_fe(imgIn, imgInTmp);

	for(int i=0;i<512;i++) {
		//imgInTmp[i] = imgIn.read();
		imgInTmp[i] = imgIn[i];
	}

	hls::fft<config1>(imgInTmp, imgOutTmpFFTStream, &fft_status, &fft_config);
	//imgOutTmp
	dummy_proc2(imgOutTmpFFTStream, imgOutTmpBlockRam);

	pyrbuild_top(imgOutTmpBlockRam, fftPyrOut, 512, 512);

	//return;

#ifndef __SYNTHESIS__
	{
		FILE * fo = fopen("fftOut.txt", "wb");
		for(int i=0;i<512;i++) {
			fprintf(fo, "%.8f %.8f\n", imgOutTmpFFTStream[i].real().to_float(), imgOutTmpFFTStream[i].imag().to_float());
		}
		fclose(fo);
	}
	{
		FILE * fo = fopen("fftOutFilter.txt", "wb");
		for(int i=0;i<1520;i++) {
			fprintf(fo, "%.8f %.8f\n", fftPyrOut[i].real().to_float(), fftPyrOut[i].imag().to_float());
		}
	}
/*
	for(int i=0;i<512;i++)
		std::cout << "FFT Out " << i << " : " << imgOutTmp[i] << std::endl;

*/
#endif
	int fsizes[7]={ 512,512,256,128,64,32,16	};
	//int l = 0;
	LPH: for(int l=0;l<Kset;l++){

		cmpxDataOut2 ifftPyrOut2[512];
#pragma HLS STREAM variable=ifftPyrOut2 depth=512 dim=1
#pragma HLS DATAFLOW
		//optimized able
		cmpxDataOut2  ifftPyrOut[512];
//#pragma HLS RESOURCE variable=ifftPyrOut core=RAM_2P_BRAM
//#pragma HLS ARRAY_PARTITION variable=ifftPyrOut complete dim=1
	#pragma HLS STREAM variable=ifftPyrOut depth=512 dim=1
		cmpxDataIn imgInTmp2[512];
	#pragma HLS STREAM variable=imgInTmp2 depth=512 dim=1

		int cidx = climits[l];
		int nlimit = limits[l];
		int fsize = fsizes[l];
		int lshift = lshifts[l];


		for(int i=0;i<fsize;i++) {
#pragma HLS pipeline
			cmpxDataIn val (0,0);
			if (i<nlimit) {
				val.real() = (fftPyrOut[cidx + i ].real() >> lshift);
				val.imag() = (fftPyrOut[cidx + i ].imag() >> lshift);
			}
			imgInTmp2[i] = val;
		}

		fft_config2.setDir(false);
		//fft_config2.setSch(0x2AB);
		fft_config2.setNfft(llimits[l]);
		hls::fft<config2>(imgInTmp2, ifftPyrOut, &fft_status2, &fft_config2);

		dummy_proc2<cmpxDataOut2>(ifftPyrOut,ifftPyrOut2);
//#ifndef __SYNTHESIS__
//	if (l==0)
//	{
//		FILE * fo = fopen("ifft_in.txt", "wb");
//		for(int i=0;i<512;i++) {
//			fprintf(fo, "%.8f %.8f\n", imgInTmp2[i].real().to_float(), imgInTmp2[i].imag().to_float());
//		}
//		fclose(fo);
//
//		fo = fopen("ifft_out.txt", "wb");
//		for(int i=0;i<512;i++) {
//			fprintf(fo, "%.8f %.8f\n", ifftPyrOut[i].real().to_float(), ifftPyrOut[i].imag().to_float());
//		}
//		fclose(fo);
//	}
//#endif

		for(int i=0;i<fsize;i++) {
#pragma HLS pipeline
			t_pyr_complex val;
			val.real() = ifftPyrOut2[i].real() >> pshifts[l];
			val.imag() = ifftPyrOut2[i].imag() >> pshifts[l];
			pyrFilOut.write(val);
		}
	}//end of ifft
예제 #28
0
void dummy_proc3( T imgIn[IMG_WIDTH],hls::stream<T> &out,int length){
#pragma HLS inline
	for(int i=0;i<length;i++)
		out.write( imgIn[i]);
}
void accumregio(	const uint32_t stepsMC,
					const uint32_t pathsMC,
					volatile uint32_t *peekStep,
					volatile uint32_t *peekPath,
					hls::stream<float> &inData,
					hls::stream<float> &outAccum )

{

//#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL

//#pragma HLS INTERFACE axis register port=inData
//#pragma HLS INTERFACE axis register port=outAccum


	float sums[ACCUM_ELEM];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS DEPENDENCE variable=sums false

	*peekStep = 0xFFFFFFFF;
	*peekPath = 0xFFFFFFFF;

	stepsLoop:for(uint32_t step=0; step<stepsMC; ++step)
	{
//#pragma HLS PIPELINE enable_flush

		resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i)
		{
	#pragma HLS PIPELINE II=1

			sums[i] = (float) 0.0f;
		}

		*peekStep = step;

		// ------------------------------------------
		uint8_t index = (uint8_t) 0;

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1

			float data = inData.read();
			float oldSum = sums[index];
			float newSum = oldSum + data;

			sums[index] = newSum;

			index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0;

			*peekPath = path;
		}

		// ------------------------------------------
		float totalSum = (float) 0.0f;

		totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i)
		{
	#pragma HLS PIPELINE II=1
			totalSum += sums[i];
		}

		// ------------------------------------------
		outAccum.write(totalSum);

		//*peekStep = step;
	}

	return;
}
예제 #30
0
void fe_zc(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize, ap_uint<32> threshold) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_fifo port=featureFifo
#pragma HLS INTERFACE ap_fifo port=sampleFifo

	ap_uint<32> data;

	ap_int<16> sampleChannel1 = 0;
	ap_int<16> sampleChannel2 = 0;

	ap_uint<32> zcChannel1 = 0;
	ap_uint<32> zcChannel2 = 0;

	ap_int<2> stateChannel1 = 0;
	ap_int<2> stateChannel2 = 0;

	ap_uint<8> cntSamples = 0;

	// Wait for Samples to arrive in FIFO
	while( windowSize == 0 ) {

	}

	while(1) {
		zcChannel1 = 0;
		zcChannel2 = 0;

		stateChannel1 = 0;
		stateChannel2 = 0;

		// Count zero-crossing for channel 1 & 2
		for(cntSamples=0; cntSamples < windowSize; cntSamples++) {
			// Read data from Sample-FIFO
			// 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel
			data = sampleFifo.read();

			sampleChannel1 = data(15, 0);
			if( abs2(sampleChannel1) < threshold ) {
				sampleChannel1 = 0;
			}


			sampleChannel2 = data(31, 16);
			if( abs2(sampleChannel2) < threshold ) {
				sampleChannel2 = 0;
			}

			// Check whether a zero-crossing occurred or not
			// Channel 1
			if( stateChannel1 == 0 ) {
				if( sampleChannel1 < 0 ) {
					stateChannel1 = -1;
				}
				else if( sampleChannel1 == 0 ) {
					stateChannel1 = 0;
				}
				else {
					stateChannel1 = 1;
				}
			}
			else if( stateChannel1 < 0 ) {
				if( sampleChannel1 > 0 ) {
					zcChannel1++;
					if( sampleChannel1 < 0 ) {
						stateChannel1 = -1;
					}
					else if( sampleChannel1 == 0 ) {
						stateChannel1 = 0;
					}
					else {
						stateChannel1 = 1;
					}
				}
			}
			else if( stateChannel1 > 0 ) {
				if( sampleChannel1 < 0 ) {
					zcChannel1++;
					if( sampleChannel1 < 0 ) {
						stateChannel1 = -1;
					}
					else if( sampleChannel1 == 0 ) {
						stateChannel1 = 0;
					}
					else {
						stateChannel1 = 1;
					}
				}
			}

			// Channel 2
			if( stateChannel2 == 0 ) {
				if( sampleChannel2 < 0 ) {
					stateChannel2 = -1;
				}
				else if( sampleChannel2 == 0 ) {
					stateChannel2 = 0;
				}
				else {
					stateChannel2 = 1;
				}
			}
			else if( stateChannel2 < 0 ) {
				if( sampleChannel2 > 0 ) {
					zcChannel2++;
					if( sampleChannel2 < 0 ) {
						stateChannel2 = -1;
					}
					else if( sampleChannel2 == 0 ) {
						stateChannel2 = 0;
					}
					else {
						stateChannel2 = 1;
					}
				}
			}
			else if( stateChannel2 > 0 ) {
				if( sampleChannel2 < 0 ) {
					zcChannel2++;
					if( sampleChannel2 < 0 ) {
						stateChannel2 = -1;
					}
					else if( sampleChannel2 == 0 ) {
						stateChannel2 = 0;
					}
					else {
						stateChannel2 = 1;
					}
				}
			}
		}
		// Write back features to Feature-FIFO
		featureFifo.write(zcChannel1);
		featureFifo.write(zcChannel2);
	}
}