예제 #1
0
void toplevel(hls::stream<uint32>& in, hls::stream<uint32>& out) {

#pragma HLS INTERFACE ap_fifo port=in
#pragma HLS INTERFACE ap_fifo port=out
#pragma HLS RESOURCE variable=in core=AXI4Stream
#pragma HLS RESOURCE variable=out core=AXI4Stream
#pragma HLS INTERFACE ap_ctrl_none port=return

#pragma HLS ARRAY_PARTITION variable=openings complete dim=1
#pragma HLS ARRAY_PARTITION variable=inGrid complete dim=1
#pragma HLS ARRAY_MAP variable=directions instance=instance1 horizontal
#pragma HLS ARRAY_MAP variable=tile instance=instance1 horizontal

	uint8 tileCoords = in.read();
	uint8 tileSize = in.read();
	uint8 tileDataLen = in.read(); // Number of 32-bit bits
	readData(in, tileDataLen);

	uint8 numOpenings = findOpenings(tileSize);
	out.write(tileCoords);
	out.write(numOpenings);

	if (numOpenings > 0) {
		writeEntrance(out);
		findDeadEnds(tileSize);
		uint16 numDirections = findPath(tileSize);
		writeDirections(out, numDirections);
	}
}
void gauss_transform(
		hls::stream<uint32_t> &uniform_rns,
		hls::stream<float> &gaussian_rns) {
	#pragma HLS interface ap_fifo port=uniform_rns
	#pragma HLS resource core=AXI4Stream variable=uniform_rns

	#pragma HLS interface ap_fifo port=gaussian_rns
	#pragma HLS resource core=AXI4Stream variable=gaussian_rns

	#pragma HLS interface ap_ctrl_none port=return

	float u1, u2, r, z1, z2;
	while (true){
	//for (int i = 0; i < 100/2; ++i) {
		#pragma HLS PIPELINE II=2
		// intervall (0:1]
		u1 = ((float)uniform_rns.read() + 1.f) * (float)(1.0 / 4294967296.0);
		// intervall (0:2PI]
		u2 = ((float)uniform_rns.read() + 1.f) * (float)(2 * M_PI / 4294967296.0);
		r = hls::sqrtf(-2 * hls::logf(u1));
		z1 = r * hls::cosf(u2);
		z2 = r * hls::sinf(u2);
		gaussian_rns.write(z1);
		gaussian_rns.write(z2);
	}

}
// -----------------------------------------------------------
void lsupdate2SW_step	( 	const uint32_t step,
							const uint32_t stepsMC,
							const uint32_t pathsMC,
							const float discount,
							hls::stream<float> &contin_in,
							hls::stream<float> &payoff_in,
							hls::stream<float> &cashFlow_in,
							hls::stream<float> &cashFlow_out,
							hls::stream<float> &cashFlowDisc_out,
							hls::stream<float> &toAccum_out )
{
	printf("lsupdate2SW_step\n");
/**
	zerosLoop:for(uint32_t path=0; path<pathsMC; ++path)
	{
#pragma HLS PIPELINE II=1 enable_flush
		// ---------------------------------
		// write to outputs
		cashFlow_out.write((float) 0.0f);

		cashFlowDisc_out.write((float) 0.0f);
	}
*/

	//stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{

			float continuation = contin_in.read();
			float payoff       = payoff_in.read();

			float cashFlow = cashFlow_in.read();

			float discountedCashFlow = discount * cashFlow;

			// ---------------------------------
			float newY;

			if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) )
				newY = payoff;
			else
				newY = discountedCashFlow;

			// ---------------------------------
			// write to outputs
			if(step < stepsMC)
				cashFlow_out.write(newY);

			if(step < stepsMC)
				cashFlowDisc_out.write(discount * newY);

			if(step == stepsMC)
				toAccum_out.write(newY);
		}
	}

	return;
}
예제 #4
0
//Top-level function
void toplevel(hls::stream<uint32> &input, hls::stream<uint32> &output) {
#pragma HLS INTERFACE ap_fifo port=input
#pragma HLS INTERFACE ap_fifo port=output
#pragma HLS RESOURCE variable=input core=AXI4Stream
#pragma HLS RESOURCE variable=output core=AXI4Stream
#pragma HLS INTERFACE ap_ctrl_none port=return

		uint32 command;

		init();

		side = input.read();
		ntiles = side * side;

		for(u8 t = 0; t < ntiles; t++)
			for (u8 e = 0; e < 4; e++)
				tiles[t][e] = input.read();

		mapcolours();

		// we start off with tile 0 in position 0
	    avail &= ~BIT36(0);

	    seq = 1;
	    while (!terminate) {
	    	if (seq == 1)
	    		solve();

			if (terminate) {
				output.write(0);
				break;
			}

			/* use magic flag to enforce sequencing */
			seq = 0;
			output.write(1);
			if (seq == 0)
				command = input.read();
			seq = 1;

			/* command 0: terminate */
			if (command == 0)
				break;

			/* command 1: write output */
			if (command == 1)
				for (u8 p = 0; p < ntiles; p++)
					for(u8 e = 0; e < 4; e++)
						output.write(colour(p, e));

			/* any other command (canonically 2) will cause search
			 * to continue without output */
			if (seq == 0)
				backtrack();
			seq = 1;
	    }
}
void pricing(hls::stream<float> in, hls::stream<float> out, hls::stream<float> out2, float strike_price) {
	//#pragma HLS PIPELINE II=1

	const int BLOCK = 64;
	static ap_uint<32> res_cnt[BLOCK];
	static float res_sum[BLOCK];
	static float res_prod[BLOCK];

	for (int i = 0; i < BLOCK; ++i) {
		#pragma HLS PIPELINE II=1
		float path = in.read();
		float res = max_0(hls::expf(path) - strike_price);


		ap_uint<32> l_cnt = res_cnt[i];
		float l_sum = res_sum[i];
		float l_prod = res_prod[i];

		ap_uint<32> n_cnt = l_cnt + 1;
		float delta = res - l_sum;
		float n_sum = l_sum + delta / n_cnt;
		float n_prod = l_prod + delta * (res - n_sum);

		res_cnt[i] = n_cnt;
		res_sum[i] = n_sum;
		res_prod[i] = n_prod;


		out.write(res_sum[i]);
		out2.write(res_prod[i]);
	}

	//std::max(0.f, hls::expf(path) - strike_price);
}
예제 #6
0
void dut(
    hls::stream<bit32_t> &strm_in,
    hls::stream<bit32_t> &strm_out
)
{
  // -----------------------------
  // YOUR CODE GOES HERE
  // -----------------------------
  digit  in_digit;
  bit4_t out_bit4;

  // ------------------------------------------------------
  // Input processing
  // ------------------------------------------------------
  // read the two input 32-bit words (low word first)
  bit32_t input_lo = strm_in.read();
  bit32_t input_hi = strm_in.read();

  // Convert input raw bits to digit 49-bit representation via bit slicing
  in_digit(31, 0) = input_lo;
  in_digit(in_digit.length()-1, 32) = input_hi;

  // ------------------------------------------------------
  // Call digitrec
  // ------------------------------------------------------
  out_bit4 = digitrec( in_digit );

  // ------------------------------------------------------
  // Output processing
  // ------------------------------------------------------

  // Write out the recognized digit (0-9)
  strm_out.write( out_bit4(out_bit4.length()-1, 0) );
}
예제 #7
0
void fir_sw(hls::stream<int> &input_val, hls::stream<int> &output_val)
{
	int i;
	static short shift_reg[TAPS] = {0};
	const short coeff[TAPS] = {6,0,-4,-3,5,6,-6,-13,7,44,64,44,7,-13,
                                 -6,6,5,-3,-4,0,6};

	for(i=0; i < RUN_LENGTH; i++){
		int sample;
		sample = input_val.read();

		//Shift Register
		for(int j=0; j < TAPS-1; j++){
			shift_reg[j] = shift_reg[j+1];
		}
		shift_reg[TAPS-1] = sample;

		//Filter Operation
		int acc = 0;
		for(int k=0; k < TAPS; k++){
			acc += shift_reg[k] * coeff[k];
		}
		output_val.write(acc);
	}
}
void antithetic(
		hls::stream<float> &rn_in,
		hls::stream<float> &rn_out_1,
		hls::stream<float> &rn_out_2)
{
	#pragma HLS interface ap_fifo port=rn_in
	#pragma HLS resource core=AXI4Stream variable=rn_in

	#pragma HLS interface ap_fifo port=rn_out_1
	#pragma HLS resource core=AXI4Stream variable=rn_out_1
	#pragma HLS interface ap_fifo port=rn_out_2
	#pragma HLS resource core=AXI4Stream variable=rn_out_2

	#pragma HLS interface ap_ctrl_none port=return

	//for (int i = 0; i < 10 / 2; ++i) {
	{
	//while (true) {
		#pragma HLS PIPELINE II=2

		float r1 = rn_in.read();
		float r2 = rn_in.read();

		rn_out_1.write(r1);
		rn_out_2.write(r2);

		rn_out_1.write(negate(r1));
		rn_out_2.write(negate(r2));
	}
}
void dut(
    hls::stream<bit32_t> &strm_in,
    hls::stream<bit32_t> &strm_out
)
{
  // Declare the input and output variables
  complex<float> out[4096];
  complex<float> complex_In1[4096];
  complex<float> complex_In2[4096];
  float input_data_re = 0;

  //-------------------------------------------------------
  // Input processing
  //-------------------------------------------------------
  // Read the two input 32-bit words
  bit32_t input1_lo;
  bit32_t input2_hi;
  bit32_t output_r;
  bit32_t output_i;
 
  for(int i = 0; i < 4096 ;i++) 
  {
    input1_lo = strm_in.read();
    input2_hi = strm_in.read();
    input_data_re = input1_lo;
    complex_In1[i] = complex<float>(input_data_re, 0);
    input_data_re = input2_hi;
    complex_In2[i] = complex<float>(input_data_re, 0);

  }

//  for(int m = 0; m < 4096 ;m++)
//  {
////    input_data_re = in1[m];
//    complex_In1[m] = complex<float>(80, 0);
////    input_data_re = in2[m];
//    complex_In2[m] = complex<float>(80, 0);
//  }
 
  // ------------------------------------------------------
  // Call Hybrid Imaging
  // ------------------------------------------------------
  hybrid_image(12, complex_In1, complex_In2, out );

  // ------------------------------------------------------
  // Output processing
  // ------------------------------------------------------
  // Write out the computed digit value
  
  for(int i = 0; i < 4096 ;i++)  
  { 
//    printf("%f\n",out[i]);
//    output = out[i];
    output_r = out[i].real();
    output_i = out[i].imag();
    strm_out.write(output_r);
    strm_out.write(output_i );
  }
}
void cflowaccumregio(	const uint32_t pathsMC,
						float *totalSum,
						hls::stream<float> &inData )

{
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=totalSum bundle=CONTROL

#pragma HLS INTERFACE axis register port=inData


	////////////////////////////////////////////////////////////////////////////////////////////////////////////

	//static float sums[N];
	float sums[NELEMENTS];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS dependence variable=sums false

	resetLoop:for(uint8_t i=0; i<NELEMENTS; ++i)
	{
#pragma HLS PIPELINE II=1

		sums[i] = (float) 0.0f;
	}

	uint8_t index = (uint8_t) 0;

	pathsLoop:for(uint32_t i=0; i<pathsMC; ++i)
	{
#pragma HLS PIPELINE II=1

		float data = inData.read();
		float oldSum = sums[index];
		float newSum = oldSum + data;

		sums[index] = newSum;

		index = (index<(NELEMENTS-1))?++index:(uint8_t)0;
	}


	float total = (float) 0.0f;

	totalLoop:for(uint8_t i=0; i<NELEMENTS;++i)
	{
#pragma HLS PIPELINE II=1
		total += sums[i];
	}

	*totalSum = total;

	return;
}
// -----------------------------------------------------------
void accumSW(	const uint32_t stepsMC,
				const uint32_t pathsMC,
				hls::stream<float> &inData,
				hls::stream<float> &outAccum )
{
	printf("accumSW\n");

	float sums[ACCUM_ELEM];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS DEPENDENCE variable=sums false

	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		// ------------------------------------------
		resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			sums[i] = (float) 0.0f;
		}

		// ------------------------------------------
		uint8_t index = (uint8_t) 0;

		pathsLoop:for(uint32_t i=0; i<pathsMC; ++i)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			float data = inData.read();
			float oldSum = sums[index];
			float newSum = oldSum + data;

			sums[index] = newSum;

			index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0;
		}

		// ------------------------------------------
		float totalSum = (float) 0.0f;

		totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i)
		{
	#pragma HLS PIPELINE II=1
			totalSum += sums[i];
		}

		// ------------------------------------------
		outAccum.write(totalSum);
	}

	return;
}
예제 #12
0
void fe_wfl(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_fifo port=featureFifo
#pragma HLS INTERFACE ap_fifo port=sampleFifo

    ap_uint<32> data;

    ap_int<32> wflChannel1 = 0;
    ap_int<32> wflChannel2 = 0;

    ap_int<16> sampleChannel1 = 0;
    ap_int<16> sampleChannel2 = 0;

    ap_int<16> prevSampleChannel1 = 0;
    ap_int<16> prevSampleChannel2 = 0;   

    ap_uint<8> cntSamples = 0;
    
    // Wait for Samples to arrive in FIFO
    while( windowSize == 0 ) {

    }

    while(1) {
        wflChannel1 = 0;
        wflChannel2 = 0;

        // Count zero-crossing for channel 1 & 2
        for(cntSamples = 0; cntSamples < windowSize; cntSamples++) {
            // Read data from Sample-FIFO
            // 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel
            data = sampleFifo.read();

            sampleChannel1 = data(15, 0);
            sampleChannel2 = data(31, 16);

            if (cntSamples > 0) {
                wflChannel1 += abs2(sampleChannel1 - prevSampleChannel1);
                wflChannel2 += abs2(sampleChannel2 - prevSampleChannel2);
            }

            prevSampleChannel1 = sampleChannel1;
            prevSampleChannel2 = sampleChannel2;
        }

        // Write back features to Feature-FIFO
        featureFifo.write(wflChannel1);
        featureFifo.write(wflChannel2);
    }
}
예제 #13
0
/**
 * Read data from the AXI stream and convert from uint32 to uint1
 */
void readData(hls::stream<uint32> &in, uint8 tileDataLen) {
#pragma HLS INLINE

	readLoop: for (uint8 i = 0; i < tileDataLen; i++) {
#pragma HLS LOOP_TRIPCOUNT min=5 max=18 // Min: MIN_TILE_DATA_32 (5) Max: MAX_TILE_DATA_32 (18) Actual: tileDataLen
		inGrid[i] = in.read();

		// Convert data from 18 * 32 => 576 * 1
		readShiftLoop: for (uint8 j = 0; j < BUS_WIDTH; j++) {
#pragma HLS UNROLL

			uint16 ix = i * BUS_WIDTH + (BUS_WIDTH - j - 1); // (BUS_WIDTH - j - 1) necessary to preserve direction
			tile[ix] = (inGrid[i] & (1 << j)) ? 1 : 0;
		}
	}
}
예제 #14
0
void nufft_top_pyr(hls::stream<t_input_complex>  &sig,
			   hls::stream<t_disp_scalar>    &dispFilter,
			   hls::stream<t_nufft_output_complex> &sigStreamOutH,
			   hls::stream<t_nufft_output_complex> &sigStreamOutL0,
			   hls::stream<t_nufft_output_complex> &sigStreamOutLA,
			   hls::stream<t_nufft_output_complex> &sigStreamOutLP) {
#pragma HLS inline off
/*
#ifndef  NUFFTB
#pragma HLS INTERFACE axis port=sig
#pragma HLS INTERFACE axis port=dispFilter
#pragma HLS INTERFACE axis port=sigStreamOutH
#pragma HLS INTERFACE axis port=sigStreamOutL0
#pragma HLS INTERFACE axis port=sigStreamOutLA
#pragma HLS INTERFACE axis port=sigStreamOutLP
#endif
*/

#pragma HLS data_pack variable=sig

#pragma HLS DATAFLOW
	hls::stream<t_input_complex>  sigH;
	hls::stream<t_input_complex>  sigL0;
	hls::stream<t_input_complex>  sigLA;

	hls::stream<t_disp_scalar > disp0;
	hls::stream<t_disp_scalar > disp1;
	hls::stream<t_disp_scalar > disp2;



#pragma HLS data_pack variable=sigH
#pragma HLS data_pack variable=sigL0
#pragma HLS data_pack variable=sigLA
#pragma HLS data_pack variable=sigStreamOutH
#pragma HLS data_pack variable=sigStreamOutL0
#pragma HLS data_pack variable=sigStreamOutLA
#pragma HLS data_pack variable=sigStreamOutLP

#pragma HLS stream variable=disp0   depth=512
#pragma HLS stream variable=disp1   depth=512
#pragma HLS stream variable=disp2   depth=512

#pragma HLS stream variable=sigH    depth=512
#pragma HLS stream variable=sigL0   depth=512
#pragma HLS stream variable=sigLA   depth=512
//#pragma HLS stream variable=sigStreamOutH    depth=512
//#pragma HLS stream variable=sigStreamOutL0   depth=512
//#pragma HLS stream variable=sigStreamOutLA   depth=490
//#pragma HLS stream variable=sigStreamOutLP   depth=64


	hls::stream<t_input_complex>  sigInMem;
#pragma HLS data_pack variable=sigInMem
#pragma HLS stream variable=sigInMem    depth=1520
	for(int coefIdx = 0;coefIdx < 1520; coefIdx++) {
#pragma HLS pipeline
		sigInMem.write(sig.read());
	}

	int l = 0;
	int i = 0;
	for(int coefIdx = 0;coefIdx < 1520; coefIdx++)
	{
#pragma HLS pipeline

			t_input_complex v = sigInMem.read();
			if (coefIdx >=0 && coefIdx < climits[1]) 			sigH.write(v);
			if (coefIdx >=climits[1] && coefIdx < climits[2])	sigL0.write(v);
			if (coefIdx >=climits[2] && coefIdx < climits[6])   sigLA.write(v);
			if (coefIdx >=climits[6])                           sigStreamOutLP.write(v);


		t_disp_scalar dispVal = dispFilter.read();

		if (coefIdx >=0 && coefIdx < climits[1])   	        disp0.write(dispVal);
		if (coefIdx >=climits[1] && coefIdx < climits[2]) 	disp1.write(dispVal);
		if (coefIdx >=climits[2] && coefIdx < climits[6]) 	disp2.write(dispVal);
	}
part1:
	nufft_top<C, 512>( sigH, disp0,  sigStreamOutH, 512,255);
part2:
	nufft_top<C, 512>( sigL0, disp1, sigStreamOutL0, 512,255);

part3:

//const int limits[] = { 512, 512,256,128,64,32,16};
//const int Klimits[] = { 255, 255, 127, 63, 31, 15, 3};

	for(int k=0;k<4;k++) {
//#pragma HLS DATAFLOW
		const int limit = 256>>k;
		const int klimit = 127 >> k;
		int level = k;
		nufft_top<C, 256>(sigLA, disp2, sigStreamOutLA, limit, level, klimit);
	}

//	for(int k=2;k<6;k++) {
//		nufft_top<C, 256>(sigL[2], disp2, sigStreamOutLA, limits[k],k-1,Klimits[k]);
//	}


	//if (disp0.)
}
void lsdatagenregio ( const uint32_t stepsMC,
						const uint32_t pathsMC,
						const float K,
						const uint32_t callPut,
						volatile uint32_t *peekStep,
						volatile uint32_t *peekPath,
						hls::stream<float> &stock,
						hls::stream<float> &cashFlow,
						hls::stream<float> &stream_x0,
						hls::stream<float> &stream_x1,
						hls::stream<float> &stream_x2,
						hls::stream<float> &stream_x3,
						hls::stream<float> &stream_x4,
						hls::stream<float> &stream_y,
						hls::stream<float> &stream_yx,
						hls::stream<float> &stream_yx2 )
{
#pragma HLS INTERFACE axis register port=stock
#pragma HLS INTERFACE axis register port=cashFlow

#pragma HLS INTERFACE axis register port=stream_x0
#pragma HLS INTERFACE axis register port=stream_x1
#pragma HLS INTERFACE axis register port=stream_x2
#pragma HLS INTERFACE axis register port=stream_x3
#pragma HLS INTERFACE axis register port=stream_x4
#pragma HLS INTERFACE axis register port=stream_y
#pragma HLS INTERFACE axis register port=stream_yx
#pragma HLS INTERFACE axis register port=stream_yx2

#pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=callPut bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=K bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL


	*peekStep = 0xFFFFFFFF;
	*peekPath = 0xFFFFFFFF;

	stepsLoop:for(uint32_t step=0; step < stepsMC; ++step)
	{
		*peekStep = step;

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
//#pragma HLS PIPELINE II=1 enable_flush
#pragma HLS PIPELINE II=1

			float s = stock.read();
			float cflow = cashFlow.read();

			// ---------------------------------
			// in-the-money calculation
			float diff = (s-K);

			float payoff;

			if(callPut == 0)
				payoff = diff;
			else
				payoff = -diff;

			bool inTheMoney;

			if( payoff > 0.0f )
				inTheMoney = true;
			else
				inTheMoney = false;

			// ---------------------------------
			// basis functions
			float s2 = s*s;

			float x0;
			float x1;
			float x2;
			float y;

			if(inTheMoney == true)
			{
				x0 = (float) 1.0f;
				x1 = (float) s;
				x2 = (float) s2;
				y  = (float) cflow;
			}
			else
			{
				x0 = (float) 0.0f;
				x1 = (float) 0.0f;
				x2 = (float) 0.0f;
				y  = (float) 0.0f;
			}

			// remaining multipliers
			float x3  = x1*x2;
			float x4  = x2*x2;
			float yx  =  y*x1;
			float yx2 =  y*x2;



			// write to streams
			stream_x0.write(x0);
			stream_x1.write(x1);
			stream_x2.write(x2);
			stream_x3.write(x3);
			stream_x4.write(x4);
			stream_y.write(y);
			stream_yx.write(yx);
			stream_yx2.write(yx2);



			//*peekStep = step;
			*peekPath = path;
		}
	}
	return;
}
void accumregio(	const uint32_t stepsMC,
					const uint32_t pathsMC,
					volatile uint32_t *peekStep,
					volatile uint32_t *peekPath,
					hls::stream<float> &inData,
					hls::stream<float> &outAccum )

{

//#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=stepsMC bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=pathsMC bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=peekStep bundle=CONTROL
//#pragma HLS INTERFACE s_axilite port=peekPath bundle=CONTROL

//#pragma HLS INTERFACE axis register port=inData
//#pragma HLS INTERFACE axis register port=outAccum


	float sums[ACCUM_ELEM];
	#pragma HLS RESOURCE variable=sums core=RAM_2P_BRAM
	#pragma HLS DEPENDENCE variable=sums false

	*peekStep = 0xFFFFFFFF;
	*peekPath = 0xFFFFFFFF;

	stepsLoop:for(uint32_t step=0; step<stepsMC; ++step)
	{
//#pragma HLS PIPELINE enable_flush

		resetLoop:for(uint8_t i=0; i<ACCUM_ELEM; ++i)
		{
	#pragma HLS PIPELINE II=1

			sums[i] = (float) 0.0f;
		}

		*peekStep = step;

		// ------------------------------------------
		uint8_t index = (uint8_t) 0;

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1

			float data = inData.read();
			float oldSum = sums[index];
			float newSum = oldSum + data;

			sums[index] = newSum;

			index = (index<(ACCUM_ELEM-1))?++index:(uint8_t)0;

			*peekPath = path;
		}

		// ------------------------------------------
		float totalSum = (float) 0.0f;

		totalLoop:for(uint8_t i=0; i<ACCUM_ELEM;++i)
		{
	#pragma HLS PIPELINE II=1
			totalSum += sums[i];
		}

		// ------------------------------------------
		outAccum.write(totalSum);

		//*peekStep = step;
	}

	return;
}
void pyrconstuct_top (
    std::complex<ap_fixed<16, 1, (ap_q_mode) 5, (ap_o_mode)3, 0> > imgIn[512],
    hls::stream<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > >& pyrFilOut,
    const int nL)
{
    fstream wrapc_switch_file_token;
    wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log");
    int AESL_i;
    if (wrapc_switch_file_token.good())
    {
        static unsigned AESL_transaction_pc = 0;
        string AESL_token;
        string AESL_num;
        static AESL_FILE_HANDLER aesl_fh;

        // define output stream variables: "pyrFilOut"
        std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0;
        int aesl_tmp_1;
        int aesl_tmp_2 = 0;

        // read output stream size: "pyrFilOut"
        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[transaction]]
        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_num); // transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc)
        {
            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // pop_size
            aesl_tmp_1 = atoi(AESL_token.c_str());
            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, AESL_token); // [[/transaction]]
        }

        // output port post check: "pyrFilOut_V"
        aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // [[transaction]]
        if (AESL_token != "[[transaction]]")
        {
            exit(1);
        }
        aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_num); // transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc)
        {
            aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data

            std::vector<sc_bv<34> > pyrFilOut_V_pc_buffer;
            int i = 0;

            while (AESL_token != "[[/transaction]]")
            {
                bool no_x = false;
                bool err = false;

                // search and replace 'X' with "0" from the 1st char of token
                while (!no_x)
                {
                    size_t x_found = AESL_token.find('X');
                    if (x_found != string::npos)
                    {
                        if (!err)
                        {
                            cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl;
                            err = true;
                        }
                        AESL_token.replace(x_found, 1, "0");
                    }
                    else
                    {
                        no_x = true;
                    }
                }

                no_x = false;

                // search and replace 'x' with "0" from the 3rd char of token
                while (!no_x)
                {
                    size_t x_found = AESL_token.find('x', 2);

                    if (x_found != string::npos)
                    {
                        if (!err)
                        {
                            cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'pyrFilOut_V', possible cause: There are uninitialized variables in the C design." << endl;
                            err = true;
                        }
                        AESL_token.replace(x_found, 1, "0");
                    }
                    else
                    {
                        no_x = true;
                    }
                }

                // push token into output port buffer
                if (AESL_token != "")
                {
                    pyrFilOut_V_pc_buffer.push_back(AESL_token.c_str());
                    i++;
                }

                aesl_fh.read(AUTOTB_TVOUT_PC_pyrFilOut_V, AESL_token); // data or [[/transaction]]

                if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_pyrFilOut_V))
                {
                    exit(1);
                }
            }

            // correct the buffer size the current transaction
            if (i != aesl_tmp_1)
            {
                aesl_tmp_1 = i;
            }

            if (aesl_tmp_1 > 0 && aesl_tmp_0.size() < aesl_tmp_1)
            {
                int aesl_tmp_0_size = aesl_tmp_0.size();

                for (int tmp_aesl_tmp_0 = 0; tmp_aesl_tmp_0 < aesl_tmp_1 - aesl_tmp_0_size; tmp_aesl_tmp_0++)
                {
                    std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > tmp;
                    aesl_tmp_0.push_back(tmp);
                }
            }

            // ***********************************
            if (i > 0)
            {
                // RTL Name: pyrFilOut_V
                {
                    // bitslice(16, 0)
                    // {
                    // celement: pyrFilOut.V._M_real.V(16, 0)
                    // {
                    sc_lv<17>* pyrFilOut_V__M_real_V_lv0_0_1519_1 = new sc_lv<17>[1520];
                    // }
                    // }
                    // bitslice(33, 17)
                    // {
                    // celement: pyrFilOut.V._M_imag.V(16, 0)
                    // {
                    sc_lv<17>* pyrFilOut_V__M_imag_V_lv0_0_1519_1 = new sc_lv<17>[1520];
                    // }
                    // }

                    // bitslice(16, 0)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_real.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                                {
                                    pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(16, 0));
                                }
                            }
                        }
                    }
                    // bitslice(33, 17)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_imag.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                                {
                                    pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++].range(16, 0) = sc_bv<17>(pyrFilOut_V_pc_buffer[hls_map_index].range(33, 17));
                                }
                            }
                        }
                    }

                    // bitslice(16, 0)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_real.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                // sub                    : i_0
                                // ori_name               : aesl_tmp_0[i_0].real()
                                // sub_1st_elem           : 0
                                // ori_name_1st_elem      : aesl_tmp_0[0].real()
                                // output_left_conversion : (aesl_tmp_0[i_0].real()).range()
                                // output_type_conversion : (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str()
                                if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                                {
                                    (aesl_tmp_0[i_0].real()).range() = (pyrFilOut_V__M_real_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str();
                                }
                            }
                        }
                    }
                    // bitslice(33, 17)
                    {
                        int hls_map_index = 0;
                        // celement: pyrFilOut.V._M_imag.V(16, 0)
                        {
                            // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                            for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                            {
                                // sub                    : i_0
                                // ori_name               : aesl_tmp_0[i_0].imag()
                                // sub_1st_elem           : 0
                                // ori_name_1st_elem      : aesl_tmp_0[0].imag()
                                // output_left_conversion : (aesl_tmp_0[i_0].imag()).range()
                                // output_type_conversion : (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str()
                                if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                                {
                                    (aesl_tmp_0[i_0].imag()).range() = (pyrFilOut_V__M_imag_V_lv0_0_1519_1[hls_map_index++]).to_string(SC_BIN).c_str();
                                }
                            }
                        }
                    }
                }
            }
        }

        // push back output stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

        AESL_transaction_pc++;
    }
    else
    {
        static unsigned AESL_transaction;

        static AESL_FILE_HANDLER aesl_fh;

        // "imgIn_M_real_V"
        char* tvin_imgIn_M_real_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_imgIn_M_real_V);

        // "imgIn_M_imag_V"
        char* tvin_imgIn_M_imag_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_imgIn_M_imag_V);

        // "pyrFilOut_V"
        char* tvin_pyrFilOut_V = new char[50];
        aesl_fh.touch(AUTOTB_TVIN_pyrFilOut_V);
        char* tvout_pyrFilOut_V = new char[50];
        aesl_fh.touch(AUTOTB_TVOUT_pyrFilOut_V);
        char* wrapc_stream_size_out_pyrFilOut_V = new char[50];
        aesl_fh.touch(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V);
        char* wrapc_stream_egress_status_pyrFilOut_V = new char[50];
        aesl_fh.touch(WRAPC_STREAM_EGRESS_STATUS_pyrFilOut_V);

        static INTER_TCL_FILE tcl_file(INTER_TCL);
        int leading_zero;

        // dump stream tvin: "pyrFilOut"
        std::vector<std::complex<ap_fixed<17, 6, (ap_q_mode) 0, (ap_o_mode)3, 0> > > aesl_tmp_0;
        int aesl_tmp_1 = 0;
        while (!pyrFilOut.empty())
        {
            aesl_tmp_0.push_back(pyrFilOut.read());
            aesl_tmp_1++;
        }

        // [[transaction]]
        sprintf(tvin_imgIn_M_real_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);

        sc_bv<16>* imgIn_M_real_V_tvin_wrapc_buffer = new sc_bv<16>[512];

        // RTL Name: imgIn_M_real_V
        {
            // bitslice(15, 0)
            {
                int hls_map_index = 0;
                // celement: imgIn._M_real.V(15, 0)
                {
                    // carray: (0) => (511) @ (1)
                    for (int i_0 = 0; i_0 <= 511; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : imgIn[i_0].real()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : imgIn[0].real()
                        // regulate_c_name       : imgIn__M_real_V
                        // input_type_conversion : (imgIn[i_0].real()).range().to_string(SC_BIN).c_str()
                        if (&(imgIn[0].real()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<16> imgIn__M_real_V_tmp_mem;
                            imgIn__M_real_V_tmp_mem = (imgIn[i_0].real()).range().to_string(SC_BIN).c_str();
                            imgIn_M_real_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_real_V_tmp_mem.range(15, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < 512; i++)
        {
            sprintf(tvin_imgIn_M_real_V, "%s\n", (imgIn_M_real_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);
        }

        tcl_file.set_num(512, &tcl_file.imgIn_M_real_V_depth);
        sprintf(tvin_imgIn_M_real_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_real_V, tvin_imgIn_M_real_V);

        // release memory allocation
        delete [] imgIn_M_real_V_tvin_wrapc_buffer;

        // [[transaction]]
        sprintf(tvin_imgIn_M_imag_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);

        sc_bv<16>* imgIn_M_imag_V_tvin_wrapc_buffer = new sc_bv<16>[512];

        // RTL Name: imgIn_M_imag_V
        {
            // bitslice(15, 0)
            {
                int hls_map_index = 0;
                // celement: imgIn._M_imag.V(15, 0)
                {
                    // carray: (0) => (511) @ (1)
                    for (int i_0 = 0; i_0 <= 511; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : imgIn[i_0].imag()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : imgIn[0].imag()
                        // regulate_c_name       : imgIn__M_imag_V
                        // input_type_conversion : (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str()
                        if (&(imgIn[0].imag()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<16> imgIn__M_imag_V_tmp_mem;
                            imgIn__M_imag_V_tmp_mem = (imgIn[i_0].imag()).range().to_string(SC_BIN).c_str();
                            imgIn_M_imag_V_tvin_wrapc_buffer[hls_map_index++].range(15, 0) = imgIn__M_imag_V_tmp_mem.range(15, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < 512; i++)
        {
            sprintf(tvin_imgIn_M_imag_V, "%s\n", (imgIn_M_imag_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);
        }

        tcl_file.set_num(512, &tcl_file.imgIn_M_imag_V_depth);
        sprintf(tvin_imgIn_M_imag_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVIN_imgIn_M_imag_V, tvin_imgIn_M_imag_V);

        // release memory allocation
        delete [] imgIn_M_imag_V_tvin_wrapc_buffer;

        // push back input stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

// [call_c_dut] ---------->

        AESL_ORIG_DUT_pyrconstuct_top(imgIn, pyrFilOut, nL);

        // pop output stream: "pyrFilOut"
        int aesl_tmp_2 = aesl_tmp_1;
        aesl_tmp_1 = 0;
        aesl_tmp_0.clear();
        while (!pyrFilOut.empty())
        {
            aesl_tmp_0.push_back(pyrFilOut.read());
            aesl_tmp_1++;
        }

        // [[transaction]]
        sprintf(tvout_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);

        sc_bv<34>* pyrFilOut_V_tvout_wrapc_buffer = new sc_bv<34>[1520];

        // RTL Name: pyrFilOut_V
        {
            // bitslice(16, 0)
            {
                int hls_map_index = 0;
                // celement: pyrFilOut.V._M_real.V(16, 0)
                {
                    // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                    for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : aesl_tmp_0[i_0].real()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : aesl_tmp_0[0].real()
                        // regulate_c_name       : pyrFilOut_V__M_real_V
                        // input_type_conversion : (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str()
                        if (&(aesl_tmp_0[0].real()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<17> pyrFilOut_V__M_real_V_tmp_mem;
                            pyrFilOut_V__M_real_V_tmp_mem = (aesl_tmp_0[i_0].real()).range().to_string(SC_BIN).c_str();
                            pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(16, 0) = pyrFilOut_V__M_real_V_tmp_mem.range(16, 0);
                        }
                    }
                }
            }
            // bitslice(33, 17)
            {
                int hls_map_index = 0;
                // celement: pyrFilOut.V._M_imag.V(16, 0)
                {
                    // carray: (aesl_tmp_2) => (aesl_tmp_1 - 1) @ (1)
                    for (int i_0 = aesl_tmp_2; i_0 <= aesl_tmp_1 - 1; i_0 += 1)
                    {
                        // sub                   : i_0
                        // ori_name              : aesl_tmp_0[i_0].imag()
                        // sub_1st_elem          : 0
                        // ori_name_1st_elem     : aesl_tmp_0[0].imag()
                        // regulate_c_name       : pyrFilOut_V__M_imag_V
                        // input_type_conversion : (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str()
                        if (&(aesl_tmp_0[0].imag()) != NULL) // check the null address if the c port is array or others
                        {
                            sc_lv<17> pyrFilOut_V__M_imag_V_tmp_mem;
                            pyrFilOut_V__M_imag_V_tmp_mem = (aesl_tmp_0[i_0].imag()).range().to_string(SC_BIN).c_str();
                            pyrFilOut_V_tvout_wrapc_buffer[hls_map_index++].range(33, 17) = pyrFilOut_V__M_imag_V_tmp_mem.range(16, 0);
                        }
                    }
                }
            }
        }

        // dump tv to file
        for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2; i++)
        {
            sprintf(tvout_pyrFilOut_V, "%s\n", (pyrFilOut_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str());
            aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);
        }

        tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2, &tcl_file.pyrFilOut_V_depth);
        sprintf(tvout_pyrFilOut_V, "[[/transaction]] \n");
        aesl_fh.write(AUTOTB_TVOUT_pyrFilOut_V, tvout_pyrFilOut_V);

        // release memory allocation
        delete [] pyrFilOut_V_tvout_wrapc_buffer;

        // dump stream size
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[transaction]] %d\n", AESL_transaction);
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "%d\n", aesl_tmp_1 - aesl_tmp_2);
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);
        sprintf(wrapc_stream_size_out_pyrFilOut_V, "[[/transaction]] \n");
        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_pyrFilOut_V, wrapc_stream_size_out_pyrFilOut_V);

        // push back output stream: "pyrFilOut"
        for (int i = 0; i < aesl_tmp_1; i++)
        {
            pyrFilOut.write(aesl_tmp_0[i]);
        }

        // release memory allocation: "imgIn_M_real_V"
        delete [] tvin_imgIn_M_real_V;
        // release memory allocation: "imgIn_M_imag_V"
        delete [] tvin_imgIn_M_imag_V;
        // release memory allocation: "pyrFilOut_V"
        delete [] tvout_pyrFilOut_V;
        delete [] tvin_pyrFilOut_V;
        delete [] wrapc_stream_size_out_pyrFilOut_V;

        AESL_transaction++;

        tcl_file.set_num(AESL_transaction , &tcl_file.trans_num);
    }
}
// For next interface change
//TODO(brugger): remove step_size
//TODO(brugger): path_cnt should be uint64_t
//TODO(brugger): add correlation
void heston_kernel_sl(
		// call option
		calc_t log_spot_price,
		calc_t reversion_rate_TIMES_step_size,
		calc_t long_term_avg_vola,
		calc_t vol_of_vol_TIMES_sqrt_step_size,
		calc_t double_riskless_rate, // = 2 * riskless_rate
		calc_t vola_0,
//		calc_t correlation,
//		calc_t time_to_maturity,
	    // both knockout
		calc_t log_lower_barrier_value,
		calc_t log_upper_barrier_value,
		// simulation params
		uint32_t step_cnt,
		calc_t step_size, // = time_to_maturity / step_cnt
		calc_t half_step_size, // = step_size / 2
		calc_t sqrt_step_size, // = sqrt(step_size)
		calc_t barrier_correction_factor, // = BARRIER_HIT_CORRECTION * sqrt_step_size
		uint32_t path_cnt,

		hls::stream<calc_t> &gaussian_rn1,
		hls::stream<calc_t> &gaussian_rn2,
		hls::stream<calc_t> &prices)
{
	#pragma HLS interface ap_none port=log_spot_price
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_spot_price
	#pragma HLS interface ap_none port=reversion_rate_TIMES_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=reversion_rate_TIMES_step_size
	#pragma HLS interface ap_none port=long_term_avg_vola
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=long_term_avg_vola
	#pragma HLS interface ap_none port=vol_of_vol_TIMES_sqrt_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vol_of_vol_TIMES_sqrt_step_size
	#pragma HLS interface ap_none port=double_riskless_rate
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=double_riskless_rate
	#pragma HLS interface ap_none port=vola_0
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=vola_0
//	#pragma HLS interface ap_none port=correlation
//	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=correlation
//	#pragma HLS interface ap_none port=time_to_maturity
//	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=time_to_maturity
	#pragma HLS interface ap_none port=log_lower_barrier_value
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_lower_barrier_value
	#pragma HLS interface ap_none port=log_upper_barrier_value
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=log_upper_barrier_value
	#pragma HLS interface ap_none port=step_cnt
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_cnt
	#pragma HLS interface ap_none port=step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=step_size
	#pragma HLS interface ap_none port=half_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=half_step_size
	#pragma HLS interface ap_none port=sqrt_step_size
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=sqrt_step_size
	#pragma HLS interface ap_none port=barrier_correction_factor
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=barrier_correction_factor
	#pragma HLS interface ap_none port=path_cnt
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=path_cnt

	#pragma HLS interface ap_fifo port=gaussian_rn1
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn1
	#pragma HLS interface ap_fifo port=gaussian_rn2
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn2
	#pragma HLS interface ap_fifo port=prices
	#pragma HLS resource core=AXI4Stream variable=prices

	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" variable=return

	////////////////////////////////////////////////////////////////////////////////////////////////////////////

	state_t states[BLOCK_SIZE];
	#pragma HLS data_pack variable=states

	for (uint32_t block = 0; block < path_cnt; block += BLOCK_SIZE) {
		for (uint32_t step = 0; step != step_cnt; ++step) {
			// TODO(brugger): use data type with less bits for inner counter
			for (uint32_t i = 0; i != BLOCK_SIZE; ++i) {
				#pragma HLS PIPELINE II=1

				state_t l_state;
				// initialize
				if (step == 0) {
					l_state.stock = log_spot_price;
					l_state.vola = vola_0;
					l_state.barrier_hit = false;
				} else {
					l_state = states[i];
				}

				// calcualte next step
				state_t n_state;
				calc_t max_vola = MAX((calc_t) 0., l_state.vola);
				calc_t sqrt_vola = hls::sqrtf(max_vola);
				n_state.stock = l_state.stock + (double_riskless_rate - max_vola) *
						half_step_size + sqrt_step_size * sqrt_vola *
						gaussian_rn1.read();
				n_state.vola = l_state.vola + reversion_rate_TIMES_step_size *
						(long_term_avg_vola - max_vola) +
						vol_of_vol_TIMES_sqrt_step_size * sqrt_vola *
						(calc_t) gaussian_rn2.read();
				calc_t barrier_correction = barrier_correction_factor *
						sqrt_vola;
				#pragma HLS RESOURCE variable=barrier_correction \
						core=FMul_meddsp
				n_state.barrier_hit = l_state.barrier_hit |  (n_state.stock <
						log_lower_barrier_value + barrier_correction) |
						(n_state.stock > log_upper_barrier_value -
						barrier_correction);
				states[i] = n_state;

				// write out
				if (step + 1 == step_cnt && (block + i) < path_cnt)
					prices.write(n_state.barrier_hit ?
							-std::numeric_limits<calc_t>::infinity() :
							n_state.stock);

			}
		}
	}
}
void heston_kernel_ml(const params_ml params, hls::stream<calc_t> &gaussian_rn1,
		hls::stream<calc_t> &gaussian_rn2, hls::stream<calc_t> &prices) {
	#pragma HLS interface ap_none port=params
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \
			variable=params
	#pragma HLS interface ap_fifo port=gaussian_rn1
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn1
	#pragma HLS interface ap_fifo port=gaussian_rn2
	#pragma HLS resource core=AXI4Stream variable=gaussian_rn2
	#pragma HLS interface ap_fifo port=prices
	#pragma HLS resource core=AXI4Stream variable=prices
	#pragma HLS resource core=AXI4LiteS metadata="-bus_bundle params" \
			variable=return

	// write block size to stream
	prices.write(BLOCK_SIZE);

	state_t state_coarse[BLOCK_SIZE];
	#pragma HLS data_pack variable=state_coarse
	state_t state_fine[BLOCK_SIZE];
	#pragma HLS data_pack variable=state_fine
	w_both_t w_both[BLOCK_SIZE];
	#pragma HLS data_pack variable=w_both

	ap_uint<6> upper_j = params.ml_constant + (params.do_multilevel ? 1 : 0);

	for (uint32_t path = 0; path < params.path_cnt; path += BLOCK_SIZE) {
		for (uint32_t step = 0; step != params.step_cnt_coarse; ++step) {
			for (ap_uint<6> j = 0; j != upper_j; ++j) {
				for (ap_uint<10> block_i = 0; block_i != BLOCK_SIZE;
						++block_i) {
					#pragma HLS PIPELINE II=1

					bool is_fine = j != params.ml_constant;

					//
					// initialize
					//
					state_t l_state_coarse, l_state_fine;
					w_both_t l_w_both;
					if (step == 0 && j == 0) {
						l_state_coarse = get_init_state(params);
						l_state_fine = get_init_state(params);
						l_w_both = get_w_zero();
					} else {
						l_state_coarse = state_coarse[block_i];
						l_state_fine = state_fine[block_i];
						l_w_both = w_both[block_i];
					}

					//
					// calculate next step
					//
					state_t n_state_coarse, n_state_fine;
					w_both_t n_w_both;

					if (is_fine) {
						// step fine
						float w_stock = gaussian_rn1.read();
						float w_vola = gaussian_rn2.read();
						n_state_fine = get_next_step(params, l_state_fine,
								w_stock, w_vola, true);
						n_state_coarse = l_state_coarse;
						// accumulate random numbers
						n_w_both.w_stock = l_w_both.w_stock + w_stock;
						n_w_both.w_vola = l_w_both.w_vola + w_vola;
					} else {
						// step coarse
						n_state_coarse = get_next_step(params, l_state_coarse,
								l_w_both.w_stock, l_w_both.w_vola, false);
						n_state_fine = l_state_fine;
						n_w_both = get_w_zero();
					}

					state_coarse[block_i] = n_state_coarse;
					state_fine[block_i] = n_state_fine;
					w_both[block_i] = n_w_both;

					//
					// write out
					//
					if ((step + 1 == params.step_cnt_coarse) &&
							(j + 1 >= params.ml_constant)) {
						if (is_fine) {
							prices.write(get_log_price(n_state_fine));
						} else {
							prices.write(get_log_price(n_state_coarse));
						}
					}
				}
			}
		}
	}
}
예제 #20
0
/*
 * Core that apply a 3x3(Configurable) 2d Convolution, Erode, Dilate on
 * grayscale images
 * http://www.xilinx.com/support/documentation/sw_manuals/xilinx2014_1/ug902-vivado-high-level-synthesis.pdf
 * */
void doImgProc(hls::stream<uint_8_side_channel>& inStream,
               hls::stream<int_8_side_channel> & outStream,
               char                              kernel[KERNEL_DIM * KERNEL_DIM],
               int                               operation)
{
#pragma HLS INTERFACE axis port=inStream
#pragma HLS INTERFACE axis port=outStream
#pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS
#pragma HLS INTERFACE s_axilite port=operation bundle=CRTL_BUS
#pragma HLS INTERFACE s_axilite port=kernel bundle=KERNEL_BUS

  // Defining the line buffer and setting the inter dependency to false through
  // pragmas
  hls::LineBuffer<KERNEL_DIM, IMG_WIDTH, unsigned char> lineBuff;
  hls::Window<KERNEL_DIM, KERNEL_DIM, short> window;

  // Index used to keep track of row,col
  int idxCol       = 0;
  int idxRow       = 0;
  int pixConvolved = 0;

  // Calculate delay to fix line-buffer offset
  int waitTicks  = (IMG_WIDTH * (KERNEL_DIM - 1) + KERNEL_DIM) / 2;// 241;
  int countWait  = 0;
  int sentPixels = 0;


  int_8_side_channel  dataOutSideChannel;
  uint_8_side_channel currPixelSideChannel;

  // Iterate on all pixels for our 320x240 image, the HLS PIPELINE improves our
  // latency
  for (int idxPixel = 0; idxPixel < (IMG_WIDTH * IMG_HEIGHT); idxPixel++) {
#pragma HLS PIPELINE

    // Read and cache (Block here if FIFO sender is empty)
    currPixelSideChannel = inStream.read();

    // Get the pixel data
    unsigned char pixelIn = currPixelSideChannel.data;

    // Put data on the LineBuffer
    lineBuff.shift_up(idxCol);
    lineBuff.insert_top(pixelIn, idxCol); // Will put in val[2] of line buffer
                                          // (Check Debug)

    // Put data on the window and multiply with the kernel
    for (int idxWinRow = 0; idxWinRow < KERNEL_DIM; idxWinRow++) {
      for (int idxWinCol = 0; idxWinCol < KERNEL_DIM; idxWinCol++) {
        // idxWinCol + pixConvolved, will slide the window ...
        short val = (short)lineBuff.getval(idxWinRow, idxWinCol + pixConvolved);

        // Multiply kernel by the sampling window
        val = (short)kernel[(idxWinRow * KERNEL_DIM) + idxWinCol] * val;
        window.insert(val, idxWinRow, idxWinCol);
      }
    }

    // Avoid calculate out of the image boundaries and if we can convolve
    short valOutput = 0;

    if ((idxRow >= KERNEL_DIM - 1) && (idxCol >= KERNEL_DIM - 1)) {
      switch (operation) {
      case 0:

        // Convolution
        valOutput = sumWindow(&window);
        valOutput = valOutput / 8;

        // Avoid negative values
        if (valOutput < 0) valOutput = 0;
        break;

      case 1:

        // Erode
        valOutput = minWindow(&window);
        break;

      case 2:

        // Dilate
        valOutput = maxWindow(&window);
        break;
      }

      pixConvolved++;
    }

    // Calculate row and col index
    if (idxCol < IMG_WIDTH - 1) {
      idxCol++;
    }
    else {
      // New line
      idxCol = 0;
      idxRow++;
      pixConvolved = 0;
    }

    /*
     * Fix the line buffer delay, on a 320x240 image with 3x3 kernel, the delay
     * will be
     * ((240*2) + 3)/2 = 241
     * So we wait for 241 ticks send the results than put more 241 zeros
     */

    // Put data on output stream (side-channel(tlast) way...)

    /*dataOutSideChannel.data = valOutput;
       dataOutSideChannel.keep = currPixelSideChannel.keep;
       dataOutSideChannel.strb = currPixelSideChannel.strb;
       dataOutSideChannel.user = currPixelSideChannel.user;
       dataOutSideChannel.last = currPixelSideChannel.last;
       dataOutSideChannel.id = currPixelSideChannel.id;
       dataOutSideChannel.dest = currPixelSideChannel.dest;

       // Send to the stream (Block if the FIFO receiver is full)
       outStream.write(dataOutSideChannel);*/
    countWait++;

    if (countWait > waitTicks)  {
      dataOutSideChannel.data = valOutput;
      dataOutSideChannel.keep = currPixelSideChannel.keep;
      dataOutSideChannel.strb = currPixelSideChannel.strb;
      dataOutSideChannel.user = currPixelSideChannel.user;
      dataOutSideChannel.last = 0;
      dataOutSideChannel.id   = currPixelSideChannel.id;
      dataOutSideChannel.dest = currPixelSideChannel.dest;

      // Send to the stream (Block if the FIFO receiver is full)
      outStream.write(dataOutSideChannel);
      sentPixels++;
    }
  }

  // Now send the remaining zeros (Just the (Number of delayed ticks)
  for (countWait = 0; countWait < waitTicks; countWait++) {
    dataOutSideChannel.data = 0;
    dataOutSideChannel.keep = currPixelSideChannel.keep;
    dataOutSideChannel.strb = currPixelSideChannel.strb;
    dataOutSideChannel.user = currPixelSideChannel.user;

    // Send last on the last item
    if (countWait < waitTicks - 1) dataOutSideChannel.last = 0;
    else dataOutSideChannel.last = 1;
    dataOutSideChannel.id   = currPixelSideChannel.id;
    dataOutSideChannel.dest = currPixelSideChannel.dest;

    // Send to the stream (Block if the FIFO receiver is full)
    outStream.write(dataOutSideChannel);
  }
}
void hls_cropping_strm ( hls::stream< ap_int<8> > & src,  hls::stream< ap_int<16> > & dst) {

    fstream wrapc_switch_file_token;

    wrapc_switch_file_token.open(".hls_cosim_wrapc_switch.log");

    int AESL_i;

    if (wrapc_switch_file_token.good()) {

        static unsigned AESL_transaction_pc;

        string AESL_token;

        string AESL_num;

        static AESL_FILE_HANDLER aesl_fh;

        aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[transaction]]

        aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //pop_size

            int aesl_tmp_1 = atoi(AESL_token.c_str());

            for (int i = 0 ; i < aesl_tmp_1  ; i++) {

                src.read();

            }

            aesl_fh.read(WRAPC_STREAM_SIZE_IN_src_V_V, AESL_token); //[[/transaction]]

        }

        int aesl_tmp_4;

        int aesl_tmp_5 = 0;

        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[transaction]]

        aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //pop_size

            aesl_tmp_4 = atoi(AESL_token.c_str());

            aesl_fh.read(WRAPC_STREAM_SIZE_OUT_dst_V_V, AESL_token); //[[/transaction]]

        }

        std::vector<ap_int<16> > aesl_tmp_3;

        aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //[[transaction]]

        if ( AESL_token != "[[transaction]]") {

           exit(1);

        }

        aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_num); //transaction number

        if (atoi(AESL_num.c_str()) == AESL_transaction_pc ) {

            aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data

            std::vector < sc_bv<16> > dst_V_V_pc_buffer;

            int i = 0;

            while (AESL_token != "[[/transaction]]") {

                bool no_x = false;

                bool err = false;

                while (!no_x) {

                size_t x_found = AESL_token.find('X');

                if (x_found != string::npos) {

                    if (!err) {

                        cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; 

                        err = true;

                    }

                    AESL_token.replace(x_found, 1, "0");

                } else {

                    no_x = true;

                }

                }

                no_x = false;

                while (!no_x) {

                size_t x_found = AESL_token.find('x', 2);

                if (x_found != string::npos) {

                    if (!err) {

                        cerr << "@W [SIM-201] RTL produces unknown value 'X' on port 'dst_V_V', possible cause: There are uninitialized variables in the C design." << endl; 

                        err = true;

                    }

                    AESL_token.replace(x_found, 1, "0");

                } else {

                    no_x = true;

                }

                }

                if (AESL_token != "") {

                    dst_V_V_pc_buffer.push_back( AESL_token.c_str() );

                    i++;

                }

                aesl_fh.read(AUTOTB_TVOUT_PC_dst_V_V, AESL_token); //data or [[/transaction]]

                if (AESL_token == "[[[/runtime]]]" || aesl_fh.eof(AUTOTB_TVOUT_PC_dst_V_V)) {

                   exit(1);

                }

            }

            if (i != aesl_tmp_4) {

               aesl_tmp_4 = i;

            }

            if (aesl_tmp_4 > 0 && aesl_tmp_3.size() < aesl_tmp_4) {

                int aesl_tmp_3_size = aesl_tmp_3.size();

                for (int tmp_aesl_tmp_3 = 0 ; tmp_aesl_tmp_3 < aesl_tmp_4 - aesl_tmp_3_size ; tmp_aesl_tmp_3 ++ ) {

                    ap_int<16> tmp;

                    aesl_tmp_3.push_back(tmp);

                }

            }

            if (i > 0) {

                sc_lv<16> *dst_V_V_lv0_0_0_1 = new sc_lv<16>[aesl_tmp_4 - aesl_tmp_5];

                AESL_i = 0; //subscript for rtl array

                for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

                    if(&(aesl_tmp_3[0]) != 0) {

                       dst_V_V_lv0_0_0_1[0 + AESL_i].range(15, 0) = sc_bv<16>(dst_V_V_pc_buffer[0 + AESL_i].range(15, 0));

                    }

                    AESL_i++;

                }

                AESL_i = 0; //subscript for rtl array

                for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

                    if(&(aesl_tmp_3[0]) != 0) {

                       aesl_tmp_3[i_0] = (dst_V_V_lv0_0_0_1[0 + AESL_i]).to_string(SC_BIN).c_str();

                    }

                    AESL_i++;

                }

                }

        }

        for (int i = 0; i < aesl_tmp_4; i++) {

            dst.write(aesl_tmp_3[i]);

        }

        AESL_transaction_pc ++ ;

    } else {

        static unsigned AESL_transaction;

        static AESL_FILE_HANDLER aesl_fh;

        char* tvin_src_V_V = new char[50];

        char* wrapc_stream_size_in_src_V_V = new char[50];

        char* tvout_dst_V_V = new char[50];

        char* tvin_dst_V_V = new char[50];

        aesl_fh.touch(AUTOTB_TVIN_dst_V_V);

        char* wrapc_stream_size_out_dst_V_V = new char[50];

        static INTER_TCL_FILE tcl_file(INTER_TCL);


        int leading_zero;

        std::vector<ap_int<8> > aesl_tmp_0;

        int aesl_tmp_1 = 0;

        while (!src.empty()) {

            aesl_tmp_0.push_back(src.read());

            aesl_tmp_1 ++;

        }

        std::vector<ap_int<16> > aesl_tmp_3;

        int aesl_tmp_4 = 0;

        while (!dst.empty()) {

            aesl_tmp_3.push_back(dst.read());

            aesl_tmp_4 ++;

        }

        for (int i = 0; i < aesl_tmp_1; i++) {

            src.write(aesl_tmp_0[i]);

        }

        AESL_ORIG_DUT_hls_cropping_strm(src,dst);

        int aesl_tmp_2 = src.size();

        int aesl_tmp_5 = aesl_tmp_4;

        while (!dst.empty()) {

            aesl_tmp_3.push_back(dst.read());

            aesl_tmp_4 ++;

        }

        sprintf(tvin_src_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        sc_bv<8> *src_V_V_tvin_wrapc_buffer = new sc_bv<8>[aesl_tmp_1 - aesl_tmp_2];

        AESL_i = 0; //subscript for rtl array

        for (int i_0 = 0; i_0 <= aesl_tmp_1 - aesl_tmp_2 - 1 ; i_0+= 1) {

        sc_lv<8> src_V_V_tmp_mem; 

            if(&(aesl_tmp_0[0]) != 0) {

            src_V_V_tmp_mem = (aesl_tmp_0[i_0]).to_string(2).c_str();

               src_V_V_tvin_wrapc_buffer[0 + AESL_i].range(7, 0) = src_V_V_tmp_mem.range(7, 0 ) ;

            }

            AESL_i++;

        }

        for (int i = 0; i < aesl_tmp_1 - aesl_tmp_2 ; i++) {

            sprintf(tvin_src_V_V, "%s\n", (src_V_V_tvin_wrapc_buffer[i]).to_string(SC_HEX).c_str());

            aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        }

        tcl_file.set_num(aesl_tmp_1 - aesl_tmp_2,&tcl_file.src_V_V_depth);

        sprintf(tvin_src_V_V, "[[/transaction]] \n");

        aesl_fh.write(AUTOTB_TVIN_src_V_V, tvin_src_V_V);

        delete [] src_V_V_tvin_wrapc_buffer;

        sprintf(wrapc_stream_size_in_src_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(wrapc_stream_size_in_src_V_V, "%d\n", aesl_tmp_1 - aesl_tmp_2);

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(wrapc_stream_size_in_src_V_V, "[[/transaction]] \n");

        aesl_fh.write(WRAPC_STREAM_SIZE_IN_src_V_V, wrapc_stream_size_in_src_V_V);

        sprintf(tvout_dst_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        sc_bv<16> *dst_V_V_tvout_wrapc_buffer = new sc_bv<16>[aesl_tmp_4 - aesl_tmp_5];

        AESL_i = 0; //subscript for rtl array

        for (int i_0 = 0; i_0 <= aesl_tmp_4 - aesl_tmp_5 - 1 ; i_0+= 1) {

        sc_lv<16> dst_V_V_tmp_mem; 

            if(&(aesl_tmp_3[0]) != 0) {

            dst_V_V_tmp_mem = (aesl_tmp_3[i_0]).to_string(2).c_str();

               dst_V_V_tvout_wrapc_buffer[0 + AESL_i].range(15, 0) = dst_V_V_tmp_mem.range(15, 0 ) ;

            }

            AESL_i++;

        }

        for (int i = 0; i < aesl_tmp_4 - aesl_tmp_5 ; i++) {

            sprintf(tvout_dst_V_V, "%s\n", (dst_V_V_tvout_wrapc_buffer[i]).to_string(SC_HEX).c_str());

            aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        }

        tcl_file.set_num(aesl_tmp_4 - aesl_tmp_5,&tcl_file.dst_V_V_depth);

        sprintf(tvout_dst_V_V, "[[/transaction]] \n");

        aesl_fh.write(AUTOTB_TVOUT_dst_V_V, tvout_dst_V_V);

        delete [] dst_V_V_tvout_wrapc_buffer;

        sprintf(wrapc_stream_size_out_dst_V_V, "[[transaction]] %d\n", AESL_transaction);

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        sprintf(wrapc_stream_size_out_dst_V_V, "%d\n", aesl_tmp_4 - aesl_tmp_5);

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        sprintf(wrapc_stream_size_out_dst_V_V, "[[/transaction]] \n");

        aesl_fh.write(WRAPC_STREAM_SIZE_OUT_dst_V_V, wrapc_stream_size_out_dst_V_V);

        for (int i = 0; i < aesl_tmp_4; i++) {

            dst.write(aesl_tmp_3[i]);

        }

        delete [] tvin_src_V_V;

        delete [] wrapc_stream_size_in_src_V_V;

        delete [] tvout_dst_V_V;

        delete [] tvin_dst_V_V;

        delete [] wrapc_stream_size_out_dst_V_V;

        AESL_transaction++;

        tcl_file.set_num(AESL_transaction , &tcl_file.trans_num);

    }
}
// -----------------------------------------------------------
void lsupdate2SWvector	( 	const uint32_t stepsMC,
						const uint32_t pathsMC,
						const float discount,
						hls::stream<float> &contin_in,
						hls::stream<float> &payoff_in,
						hls::stream<float> &cashFlowDisc_out,
						hls::stream<float> &toAccum_out )
{
	printf("lsupdate2SWvector\n");

	float cashFlowVector[CASHFLOW_SIZE];
#pragma HLS RESOURCE variable=cashFlowVector core=RAM_2P_BRAM

	// ----------------------------------------------------
	zerosLoop:for(uint32_t path=0; path<pathsMC; ++path)
	{
#pragma HLS PIPELINE II=1 enable_flush

		cashFlowVector[path] = (float) 0.0f;
	}

	// ----------------------------------------------------

	// ----------------------------------------------------
	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			uint32_t indexRead = path;
			uint32_t indexWrite = path;

			float continuation = contin_in.read();
			float payoff       = payoff_in.read();

			float cashFlow = cashFlowVector[indexRead];

			float discountedCashFlow = discount * cashFlow;

			// ---------------------------------
			float newY;

			if( (payoff > (float) 0.0f) && (payoff >= (float) continuation) )
				newY = payoff;
			else
				newY = discountedCashFlow;

			// ---------------------------------
			// write to outputs
			cashFlowVector[indexWrite] = newY;

			if(step < stepsMC)
				cashFlowDisc_out.write(discount * newY);

			if(step == stepsMC)
				toAccum_out.write(newY);

		}
	}

	return;
}
예제 #23
0
void fe_zc(hls::stream< ap_uint<32> > sampleFifo, hls::stream< ap_uint<32> > featureFifo, ap_uint<8> windowSize, ap_uint<32> threshold) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE ap_fifo port=featureFifo
#pragma HLS INTERFACE ap_fifo port=sampleFifo

	ap_uint<32> data;

	ap_int<16> sampleChannel1 = 0;
	ap_int<16> sampleChannel2 = 0;

	ap_uint<32> zcChannel1 = 0;
	ap_uint<32> zcChannel2 = 0;

	ap_int<2> stateChannel1 = 0;
	ap_int<2> stateChannel2 = 0;

	ap_uint<8> cntSamples = 0;

	// Wait for Samples to arrive in FIFO
	while( windowSize == 0 ) {

	}

	while(1) {
		zcChannel1 = 0;
		zcChannel2 = 0;

		stateChannel1 = 0;
		stateChannel2 = 0;

		// Count zero-crossing for channel 1 & 2
		for(cntSamples=0; cntSamples < windowSize; cntSamples++) {
			// Read data from Sample-FIFO
			// 2 16 bit Samples at one position in 32 bit FIFO => Process 2 channels in parallel
			data = sampleFifo.read();

			sampleChannel1 = data(15, 0);
			if( abs2(sampleChannel1) < threshold ) {
				sampleChannel1 = 0;
			}


			sampleChannel2 = data(31, 16);
			if( abs2(sampleChannel2) < threshold ) {
				sampleChannel2 = 0;
			}

			// Check whether a zero-crossing occurred or not
			// Channel 1
			if( stateChannel1 == 0 ) {
				if( sampleChannel1 < 0 ) {
					stateChannel1 = -1;
				}
				else if( sampleChannel1 == 0 ) {
					stateChannel1 = 0;
				}
				else {
					stateChannel1 = 1;
				}
			}
			else if( stateChannel1 < 0 ) {
				if( sampleChannel1 > 0 ) {
					zcChannel1++;
					if( sampleChannel1 < 0 ) {
						stateChannel1 = -1;
					}
					else if( sampleChannel1 == 0 ) {
						stateChannel1 = 0;
					}
					else {
						stateChannel1 = 1;
					}
				}
			}
			else if( stateChannel1 > 0 ) {
				if( sampleChannel1 < 0 ) {
					zcChannel1++;
					if( sampleChannel1 < 0 ) {
						stateChannel1 = -1;
					}
					else if( sampleChannel1 == 0 ) {
						stateChannel1 = 0;
					}
					else {
						stateChannel1 = 1;
					}
				}
			}

			// Channel 2
			if( stateChannel2 == 0 ) {
				if( sampleChannel2 < 0 ) {
					stateChannel2 = -1;
				}
				else if( sampleChannel2 == 0 ) {
					stateChannel2 = 0;
				}
				else {
					stateChannel2 = 1;
				}
			}
			else if( stateChannel2 < 0 ) {
				if( sampleChannel2 > 0 ) {
					zcChannel2++;
					if( sampleChannel2 < 0 ) {
						stateChannel2 = -1;
					}
					else if( sampleChannel2 == 0 ) {
						stateChannel2 = 0;
					}
					else {
						stateChannel2 = 1;
					}
				}
			}
			else if( stateChannel2 > 0 ) {
				if( sampleChannel2 < 0 ) {
					zcChannel2++;
					if( sampleChannel2 < 0 ) {
						stateChannel2 = -1;
					}
					else if( sampleChannel2 == 0 ) {
						stateChannel2 = 0;
					}
					else {
						stateChannel2 = 1;
					}
				}
			}
		}
		// Write back features to Feature-FIFO
		featureFifo.write(zcChannel1);
		featureFifo.write(zcChannel2);
	}
}
예제 #24
0
void dut(hls::stream<int> in_fifo, hls::stream<int> out_fifo) 
{
    //#pragma HLS pipeline II=4
    //#pragma HLS dependence variable=rand_number inter false
    #pragma HLS interface ap_ctrl_none port=return
    //static int cnt;

    int i;

    //fifo tmp variables
    int data1; 
    int data2;
    long long full;
    my_type cvt;
    my_type ret;

    //BS parameters
    char CallPutFlag = 'c';
    double S;
    double X;
    double T;
    double r;
    double b;
    double result;


//======================Initialize BlackSchole parameters though FIFO=====================
    //S
    data1 = in_fifo.read();
    data2 = in_fifo.read();
    full = ((long long) data1 << 32) + data2;
    cvt.myint64 = full;
    S = cvt.mydouble;

    //X
    data1 = in_fifo.read();
    data2 = in_fifo.read();
    full = ((long long) data1 << 32) + data2;
    cvt.myint64 = full;
    X = cvt.mydouble;

    //T
    data1 = in_fifo.read();
    data2 = in_fifo.read();
    full = ((long long) data1 << 32) + data2;
    cvt.myint64 = full;
    T = cvt.mydouble;

    //r
    data1 = in_fifo.read();
    data2 = in_fifo.read();
    full = ((long long) data1 << 32) + data2;
    cvt.myint64 = full;
    r = cvt.mydouble;

    //b
    data1 = in_fifo.read();
    data2 = in_fifo.read();
    full = ((long long) data1 << 32) + data2;
    cvt.myint64 = full;
    b = cvt.mydouble;


//======================Starting MC Iterations for BS=====================


    loop: for (i = 0; i < NUM_PASSES; i++) {
        // shadow_state();
        result = BlackScholes(CallPutFlag, S, X, T, r, b);
        ret.mydouble = result;
        //data1 = (int)(ret.myint64>>32);
        //data2 = (int)(0xffffffff&(ret.myint64));
        //out_fifo.write(data1);
        //out_fifo.write(data2);
    }

}
void lsupdate1SW  ( const uint32_t stepsMC,
					const uint32_t pathsMC,
					const float K,
					const uint32_t callPut,
					hls::stream<float> &stock,
					hls::stream<float> &b0_in,
					hls::stream<float> &b1_in,
					hls::stream<float> &b2_in,
					hls::stream<float> &contin_out,
					hls::stream<float> &payoff_out )
{
	printf("lsupdate1SW\n");
	printf("lsupdateSW\n");
	stepsLoop:for(uint32_t step=0; step<=stepsMC; ++step)
	{
		// ---------------------------------
		// continuation value
		float b0;
		float b1;
		float b2;

		if(step == 0)
		{
			b0 = (float) 0.0f;
			b1 = (float) 0.0f;
			b2 = (float) 0.0f;
		}
		else
		{
			b0 = b0_in.read();
			b1 = b1_in.read();
			b2 = b2_in.read();
		}

		// -------------------------------------

		pathsLoop:for(uint32_t path=0; path<pathsMC; ++path)
		{
	#pragma HLS PIPELINE II=1 enable_flush

			float s = stock.read();

			// ---------------------------------
			// payoff calculation
			float diff = (s-K);

			float callPutDiff;

			if(callPut == 0)
				callPutDiff = diff;
			else
				callPutDiff = -diff;

			float payoff = fmaxf(callPutDiff, (float) 0.0f);

			// ---------------------------------
			// basis functions
			float s2 = s*s;

			float x0 = (float) 1.0f;
			float x1 = s;
			float x2 = s2;

			// ---------------------------------
			// continuation value

			float continuation = b0*x0 + b1*x1 + b2*x2;

			// ---------------------------------
			// write to output
			contin_out.write(continuation);
			payoff_out.write(payoff);

		}
	}

	return;
}