static int compute_price(double tt, double H, double K, double r_premia, double v0, double kappa, double theta, double sigma, double rho, 
	double L, int M, int Nt )
{
	/*Variables*/
	int j, n, k;
	double r; /*continuous rate*/
	double min_log_price, max_log_price;
	double ds, dt; /*price and time discretization steps*/
	double rho_hat; /*parameter after substitution*/
	double q, factor, discount_factor; /*pde parameters*/
	double treshold = 1e-9; /* when we assume probability to be zero and switch to a different equation*/

	int k_d, k_u; /*n+1 vertice numbers, depending on [n][k]*/
	double sigma_local, gamma; /*wh factors parameters*/
	double beta_minus, beta_plus; /*wh-factors coefficients*/
	double local_barrier; /*a barrier depending on [n][k], to check crossing on each step*/

	//if (2.0 * kappa * theta < pow(sigma, 2))
	//	return 1; /*Novikov condition not satisfied, probability values could be incorrect*/
	/*Body*/
	r = log(1 + r_premia / 100);

	/*building voltree*/
	tree_v(tt, v0, kappa, theta, sigma, Nt);

	/*spacial variable. Price space construction*/
	min_log_price = L*log(0.5) - (rho / sigma)* V[Nt][Nt];
	max_log_price = L*log(2);
	ds = (max_log_price - min_log_price) / double(M);

	for (j = 0; j < M; j++)
	{
		ba_log_prices[j] = min_log_price + j*ds;
		ba_prices[j] = H*exp(ba_log_prices[j] + (rho / sigma)* V[0][0]);
	}
	dt = tt / double(Nt);
	
	/*fft frequences we'll need in every vertice of a tree*/
	fftfreq(M, ds);
	rho_hat = sqrt(1.0 - pow(rho, 2.0));
	q = 1.0 / dt + r;
	factor = pow(q*dt, -1.0);
	//discount_factor = exp(r*dt);
	discount_factor = r - rho / sigma * kappa * theta;

	/*filling F_next matrice by initial (in time T) conditions*/
	for (j = 0; j < M; j++)
		for (k = 0; k < Nt + 1; k++)
		{
			F_next[j][k] = Complex(G(H*exp(ba_log_prices[j] + (rho / sigma)* V[Nt][k]), K), 0);
		}

	/*here the main cycle starts - the backward induction procedure*/
	for (n = Nt - 1; n >= 0; n--)
	{
		printf("Processing: %d of %d\n", n, Nt-1);
		for (k = 0; k <= n; k++)
		{
			/*to calculate the binomial expectation we should use matrices from the tree method.
			After (n,k) vertice one could either get to (n+1,k_u) or (n+1, k_d). The numbers k_u and k_d could be
			read from f_up and f_down matrices, by the rule of addition, for example:
			
			f_down[i][j] = -z;
			Rd = V[i + 1][j - z]

			f_up[i][j] = z;
			Ru = V[i + 1][j + z];
			*/
			k_u = k + f_up[n][k];
			k_d = k + f_down[n][k];
			local_barrier = - (rho / sigma) * V[n][k];

			/*initial conditions of a step*/
			for (j = 0; j < M; j++)
			{
				//f_n_plus_1_k_u[j] = F[j][n+1][k_u];
				//f_n_plus_1_k_d[j] = F[j][n+1][k_d];
				f_n_plus_1_k_u[j] = F_next[j][k_u];
				f_n_plus_1_k_d[j] = F_next[j][k_d];
			}
			/*applying indicator function*/
			for (j = 0; j < M; j++)
			{
				if (ba_log_prices[j] < local_barrier)
				{
					f_n_plus_1_k_u[j].r = 0.0;
					f_n_plus_1_k_u[j].i = 0.0;
					f_n_plus_1_k_d[j].r = 0.0;
					f_n_plus_1_k_d[j].i = 0.0;
				}
			}
			if (V[n][k] >= treshold)
			{
				/*set up variance - dependent parameters for a given step*/
				sigma_local = rho_hat * sqrt(V[n][k]);
				gamma = r - 0.5 * V[n][k] - rho / sigma * kappa * (theta - V[n][k]);  /*also local*/
				/* beta_plus and beta_minus*/
				/*beta_minus = -(gamma + sqrt(gamma^2 + 2 * sigma^2 * q)) / sigma^2
					beta_plus = -(gamma - sqrt(gamma^2 + 2 * sigma^2 * q)) / sigma^2*/
				beta_minus = -(gamma + sqrt(pow(gamma,2) + 2 * pow(sigma_local,2) * q)) / pow(sigma_local,2);
				beta_plus = -(gamma - sqrt(pow(gamma,2) + 2 * pow(sigma_local,2) * q)) / pow(sigma_local,2);

				for (j = 0; j < M; j++)
				{
					/* factor functions
					phi_plus_array = ([beta_plus / (beta_plus - i * 2 * pi*xi) for xi in xi_space])
					phi_minus_array = ([-beta_minus / (-beta_minus + i * 2 * pi*xi) for xi in xi_space]) */
					phi_plus_array[j] = RCdiv(beta_plus, RCsub(beta_plus, RCmul((2.0 * PI * fftfreqs[j]), CI)));
					phi_minus_array[j] = RCdiv(-beta_minus, RCadd(-beta_minus, RCmul((2.0 * PI * fftfreqs[j]), CI)));
				}
				
				/*factorization calculation*/

				/*f_n_k_u = factor * fft.ifft(phi_minus_array *	fft.fft(
				indicator(original_prices_array, 0) * fft.ifft(phi_plus_array * fft.fft(f_n_plus_1_k_u))))*/
				for (int j = 0; j < M; j++)
				{
					f_n_plus_1_k_u_re[j] = f_n_plus_1_k_u[j].r;
					f_n_plus_1_k_u_im[j] = f_n_plus_1_k_u[j].i;

				}
				pnl_fft2(f_n_plus_1_k_u_re, f_n_plus_1_k_u_im, M);
				for (j = 0; j < M; j++) {
					/*putting complex and imaginary part together again*/
					f_n_plus_1_k_u_fft_results[j] = Complex(f_n_plus_1_k_u_re[j], f_n_plus_1_k_u_im[j]);
					/*multiplying by phi_plus*/
					f_n_plus_1_k_u_fft_results[j] = Cmul(phi_plus_array[j], f_n_plus_1_k_u_fft_results[j]);
					/*extracting imaginary and complex parts to use in further fft*/
					f_n_plus_1_k_u_fft_results_re[j] = f_n_plus_1_k_u_fft_results[j].r;
					f_n_plus_1_k_u_fft_results_im[j] = f_n_plus_1_k_u_fft_results[j].i;

				}

				pnl_ifft2(f_n_plus_1_k_u_fft_results_re, f_n_plus_1_k_u_fft_results_im, M);
				/*applying indicator function, after ifft*/
				for (j = 0; j < M; j++)
				{
					if (ba_log_prices[j] < local_barrier)
					{
						f_n_plus_1_k_u_fft_results_re[j] = 0.0;
						f_n_plus_1_k_u_fft_results_im[j] = 0.0;
					}
				}

				/*performing second fft */
				pnl_fft2(f_n_plus_1_k_u_fft_results_re, f_n_plus_1_k_u_fft_results_im, M);
				
				for (j = 0; j < M; j++) {
					/*putting complex and imaginary part together again*/
					f_n_plus_1_k_u_fft_results[j] = Complex(f_n_plus_1_k_u_fft_results_re[j], f_n_plus_1_k_u_fft_results_im[j]);
					/*multiplying by phi_minus*/
					f_n_plus_1_k_u_fft_results[j] = Cmul(phi_minus_array[j], f_n_plus_1_k_u_fft_results[j]);
					/*extracting imaginary and complex parts to use in further fft*/
					f_n_plus_1_k_u_fft_results_re[j] = f_n_plus_1_k_u_fft_results[j].r;
					f_n_plus_1_k_u_fft_results_im[j] = f_n_plus_1_k_u_fft_results[j].i;
				}

				/*the very last ifft*/
				pnl_ifft2(f_n_plus_1_k_u_fft_results_re, f_n_plus_1_k_u_fft_results_im, M);
				/*multiplying by factor*/
				for (j = 0; j < M; j++) {
					f_n_k_u[j].r = factor * f_n_plus_1_k_u_fft_results_re[j];
					f_n_k_u[j].i = factor * f_n_plus_1_k_u_fft_results_im[j];
				}

				/*f_n_k_d = factor * fft.ifft(phi_minus_array * fft.fft(
				indicator(original_prices_array, 0) * fft.ifft(phi_plus_array * fft.fft(f_n_plus_1_k_d))))*/
				for (int j = 0; j < M; j++)
				{
					f_n_plus_1_k_d_re[j] = f_n_plus_1_k_d[j].r;
					f_n_plus_1_k_d_im[j] = f_n_plus_1_k_d[j].i;

				}
				pnl_fft2(f_n_plus_1_k_d_re, f_n_plus_1_k_d_im, M);
				for (j = 0; j < M; j++) {
					/*putting complex and imaginary part together again*/
					f_n_plus_1_k_d_fft_results[j] = Complex(f_n_plus_1_k_d_re[j], f_n_plus_1_k_d_im[j]);
					/*multiplying by phi_plus*/
					f_n_plus_1_k_d_fft_results[j] = Cmul(phi_plus_array[j], f_n_plus_1_k_d_fft_results[j]);
					/*extracting imaginary and complex parts to use in further fft*/
					f_n_plus_1_k_d_fft_results_re[j] = f_n_plus_1_k_d_fft_results[j].r;
					f_n_plus_1_k_d_fft_results_im[j] = f_n_plus_1_k_d_fft_results[j].i;
				}
				pnl_ifft2(f_n_plus_1_k_d_fft_results_re, f_n_plus_1_k_d_fft_results_im, M);
				/*applying indicator function, after ifft*/
				for (j = 0; j < M; j++)
				{
					if (ba_log_prices[j] < local_barrier)
					{
						f_n_plus_1_k_d_fft_results_re[j] = 0.0;
						f_n_plus_1_k_d_fft_results_im[j] = 0.0;
					}
				}
				/*performing second fft */
				pnl_fft2(f_n_plus_1_k_d_fft_results_re, f_n_plus_1_k_d_fft_results_im, M);

				for (j = 0; j < M; j++) {
					/*putting complex and imaginary part together again*/
					f_n_plus_1_k_d_fft_results[j] = Complex(f_n_plus_1_k_d_fft_results_re[j], f_n_plus_1_k_d_fft_results_im[j]);
					/*multiplying by phi_minus*/
					f_n_plus_1_k_d_fft_results[j] = Cmul(phi_minus_array[j], f_n_plus_1_k_d_fft_results[j]);
					/*extracting imaginary and complex parts to use in further fft*/
					f_n_plus_1_k_d_fft_results_re[j] = f_n_plus_1_k_d_fft_results[j].r;
					f_n_plus_1_k_d_fft_results_im[j] = f_n_plus_1_k_d_fft_results[j].i;
				}
				/*the very last ifft*/
				pnl_ifft2(f_n_plus_1_k_d_fft_results_re, f_n_plus_1_k_d_fft_results_im, M);
				/*multiplying by factor*/
				for (j = 0; j < M; j++) {
					f_n_k_d[j].r = factor * f_n_plus_1_k_d_fft_results_re[j];
					f_n_k_d[j].i = factor * f_n_plus_1_k_d_fft_results_im[j];
				}
			}
			else if (V[n][k] < treshold)
			{
				/*applying indicator function*/
				for (j = 0; j < M; j++)
				{
					if (ba_log_prices[j] < local_barrier)
					{
						f_n_plus_1_k_u[j].r = 0.0;
						f_n_plus_1_k_u[j].i = 0.0;
						f_n_plus_1_k_d[j].r = 0.0;
						f_n_plus_1_k_d[j].i = 0.0;
					}
				}
				for (j = 0; j < M; j++)
				{
					//f_n_plus_1_k_u[j] = F[j][n + 1][k_u];
					f_n_plus_1_k_u[j] = F_next[j][k_u];
					f_n_k_u[j] = CRsub(f_n_plus_1_k_u[j], discount_factor * dt);
					f_n_k_d[j] = f_n_k_u[j];

				}
			}
			/*
            f_n_k = pd_f[n, k] * f_n_k_d + pu_f[n, k] * f_n_k_u
			*/
			for (j = 0; j < M; j++)
			{
				f_n_k[j] = Cadd(RCmul(pd_f[n][k], f_n_k_d[j]), RCmul(pu_f[n][k], f_n_k_u[j]));
				F_prev[j][k] = f_n_k[j];
			}						
		}
		for (j = 0; j < M; j++)
		{
			for (int state = 0; state < Nt; state++)
			{
				F_next[j][state] = F_prev[j][state];
				F_prev[j][state] = Complex(0,0);
			}
		}
	}
	/*Preprocessing F before showing out*/
	for (j = 0; j < M; j++)
	{
		if (ba_prices[j] <= H)
		{
			F_next[j][0].r = 0;
		}
		if (F_next[j][0].r < 0.)
		{
			F_next[j][0].r = 0;
		}
	}
	return OK;
}
int main() {
    omp_set_num_threads(numCores); // Set the number of threads for OpenMP parallel sections
    fftw_threads_init(); // Initialize threaded FFTs
    rfftwnd_plan dp_c2r; // Inverse FFT plan
    rfftwnd_plan dp_r2c; // Forward FFT plan
    // Create the plans using FFTW_MEASURE to get fastest transforms, do this here so
    // that it is only done once and the plans reused.
    
    std::cout << "Creating FFTW plans...\n";
    dp_c2r = rfftw3d_create_plan(N, N, N, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE);
    dp_r2c = rfftw3d_create_plan(N, N, N, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE);
    
    double *kvec = new double[N];
    fftfreq(kvec);
    
    std::ofstream fout;
    std::ofstream tout;
    std::ifstream fin;
    
    fout.open("GalaxyNum.dat",std::ios::out);
    fout.close();
    
    std::vector< Pk > InputPower;
    int numKModes = 0;
    
    std::cout << "Reading input power file: " << CAMBfile << "\n";
    fin.open(CAMBfile.c_str(),std::ios::in);
    while (!fin.eof()) {
        Pk Input_temp;
        fin >> Input_temp.k >> Input_temp.P;
        
        if (!fin.eof()) {
            InputPower.push_back(Input_temp);
            ++numKModes;
        }
    }
    fin.close();
    
    double *kvals = new double[numKModes];
    double *InPow = new double[numKModes];
    
    for (int i = 0; i < numKModes; ++i) {
        kvals[i] = InputPower[i].k;
        InPow[i] = InputPower[i].P;
    }
    
    gsl_spline *Power = gsl_spline_alloc(gsl_interp_cspline, numKModes);
    gsl_interp_accel *acc = gsl_interp_accel_alloc();
    
    gsl_spline_init(Power, kvals, InPow, numKModes);
    
    fftw_complex *deltak3di = new fftw_complex[N_im];
    fftw_real *deltar3di = new fftw_real[N_tot];
    
#pragma omp parallel for
    for (int i = 0; i < N_tot; ++i) {
        deltar3di[i] = 0.0;
        if (i < N_im) {
            deltak3di[i].re = 0.0;
            deltak3di[i].im = 0.0;
        }
    }
    
    std::cout << "Distributing power over volume...\n";
    Gendk(kvec, Power, acc, deltak3di); // Call function to populate the power grid
    
    std::cout << "Performing initial one-time inverse FFT...\n";
    rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3di,deltar3di); // FFT
    
    std::cout << "Taking the natural log...\n";
#pragma omp parallel for
    for (int i = 0; i < N_tot; ++i) {
        deltar3di[i] = log(1.0 + deltar3di[i]);
        if (i < N_im) {
            deltak3di[i].re = 0.0;
            deltak3di[i].im = 0.0;
        }
    }
    
    std::cout << "Performing initial one-time forward FFT...\n";
    rfftwnd_threads_one_real_to_complex(numCores,dp_r2c,deltar3di,deltak3di);
        
    std::cout << "Normalizing...\n";
#pragma omp parallel for
    for (int i = 0; i < N_im; ++i) {
        deltak3di[i].re /= N_tot;
        deltak3di[i].im /= N_tot;
    }
    
    delete[] deltar3di;
    
    tout.open("Timings.dat",std::ios::out);
    std::cout << "Starting to generate mocks...\n";
    for (int mock = startNum-1; mock < numMocks; ++mock) {
        double start_time = omp_get_wtime();
        std::string lrgfile = filename(base, mock+1, ext);
        std::cout << "Generating mock " << lrgfile << "\n";
        
        fftw_complex *deltak3d = new fftw_complex[N_im];
        fftw_real *deltar3d = new fftw_real[N_tot];
        
        // Initialize power array. Do it in parallel to speed things up.        
#pragma omp parallel for
        for (int i = 0; i < N_tot; ++i) {
            deltar3d[i] = 0.0;
            if (i < N_im) {
                deltak3d[i].re = 0.0;
                deltak3d[i].im = 0.0;
            }
        }
        
        std::cout << "    Setting up for the inverse FFT...\n";
        Sampdk(kvec, deltak3di, deltak3d);
        
        if (powOut) {
            std::cout << "    Outputting raw power array...\n";
            std::string powerfile = filename(powbase, mock+1, extbin);
            fout.open(powerfile.c_str(),std::ios::out|std::ios::binary);
            fout.write((char *) deltak3d, N_im*sizeof(fftw_complex));
            fout.close();
        }
        
        std::cout << "    Performing second inverse FFT...\n";
        rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3d,deltar3d);
        
        if (matOut) {
            std::cout << "    Outputting matter field array...\n";
            std::string matterfile = filename(matbase, mock+1, extbin);
            fout.open(matterfile.c_str(),std::ios::out|std::ios::binary);
            fout.write((char *) deltar3d, N_tot*sizeof(fftw_real));
            fout.close();
        }
        
        double mean = 0.0;
        double variance = 0.0;
        double dr_max = 0.0;
        double dr_min = 0.0;
        
        for (int i = 0; i < N_tot; ++i) {
            mean += deltar3d[i]/N_tot;
            if (deltar3d[i] > dr_max) dr_max = deltar3d[i];
            if (deltar3d[i] < dr_min) dr_min = deltar3d[i];
        }
        std::cout << "    Max  = " << dr_max << "\n";
        std::cout << "    Min  = " << dr_min << "\n";
        std::cout << "    Mean = " << mean << "\n";
        
        std::cout << "    Calculating variance...\n";
        for (int i = 0; i < N_tot; ++i) {
            deltar3d[i] -= mean;
            variance += (deltar3d[i])*(deltar3d[i])/(N_tot-1);
        }
        
        std::cout << "    Poisson sampling...\n";
        Gendr(lrgfile, variance, deltar3d);
        
        delete[] deltak3d;
        delete[] deltar3d;
        
        double totaltime = omp_get_wtime()-start_time;
        std::cout << "    Time to generate mock: " << totaltime << " seconds\n";
        tout << lrgfile << " " << totaltime << "\n";
    }
    tout.close();
    
    delete[] kvec;
    delete[] deltak3di;
    delete[] kvals;
    delete[] InPow;
    
    rfftwnd_destroy_plan(dp_r2c);
    rfftwnd_destroy_plan(dp_c2r);
    
    gsl_spline_free(Power);
    gsl_interp_accel_free(acc);
    
    return 0;
}