Beispiel #1
0
template <typename PointInT, typename PointNT, typename PointOutT> void
pcl::GSS3DEstimation<PointInT, PointNT, PointOutT>::computeFeature (PointCloudOut &output)
{
  calculateGeometricScaleSpace ();
  computeDerivatives ();
  extractEdges ();
}
Beispiel #2
0
void GridCell::computeCorrector (long stepNum)
{
	// Values of neighboring cells. us = "u stencil", vs = "v stencil" etc.
	stencil us, vs, hs, hts;
	// Call the giveStencil function with corrector setting 'c'.
	giveStencil(us, vs, hs, hts, 'c');

	// Struct to store the corrector derivatives.
	oneSidedDifferences correctorStep;

	// Compute corrector in a cyclic manner, alternating the directions of
	// the x and y differences between forward and backward in a cycle of four.
	// The directions are always exactly the opposite as in the predictor computation.
	// This allows for second order accuracy in time and space.
	switch (stepNum % 4)
	{
	case 0:
		correctorStep = computeDerivatives(us, vs, hs, hts, 'b', 'b');
		break;
	case 1:
		correctorStep = computeDerivatives(us, vs, hs, hts, 'b', 'f');
		break;
	case 2:
		correctorStep = computeDerivatives(us, vs, hs, hts, 'f', 'f');
		break;
	case 3:
		correctorStep = computeDerivatives(us, vs, hs, hts, 'f', 'b');
		break;
	}

	double duCorrector = correctorStep.du;
	double dvCorrector = correctorStep.dv;
	double dhCorrector = correctorStep.dh;

	// Compute the new values.
	u = u + 0.5 * (duCorrector + duPredictor) * dt;
	v = v + 0.5 * (dvCorrector + dvPredictor) * dt;
	h = h + 0.5 * (dhCorrector + dhPredictor) * dt;
	if (u < 0) // Clamp slightly such that u is never negative.
	{
		u = 0;
	}
	htot = h + hsurf;
}
Beispiel #3
0
// PUBLIC TIME MARCHING METHODS ***********************************
void GridCell::computePredictor (long stepNum)
{
	// Values of neighboring cells. us = "u stencil", vs = "v stencil" etc.
	stencil us, vs, hs, hts;
	// Call the giveStencil function with predictor setting 'p'.
	giveStencil(us, vs, hs, hts, 'p');

	// Struct to store the predictor derivatives.
	oneSidedDifferences predictorStep;

	// Compute predictor in a cyclic manner, alternating the directions of
	// the x and y differences between forward and backward in a cycle of four.
	switch (stepNum % 4)
	{
	case 0:
		// Now, both derivatives are approximated using forward differences.
		predictorStep = computeDerivatives(us, vs, hs, hts, 'f', 'f');
		break;
	case 1:
		// Now, use forward difference for x and backward for y.
		predictorStep = computeDerivatives(us, vs, hs, hts, 'f', 'b');
		break;
	case 2:
		// Backward differences for both.
		predictorStep = computeDerivatives(us, vs, hs, hts, 'b', 'b');
		break;
	case 3:
		// Backward for x and forward for y.
		predictorStep = computeDerivatives(us, vs, hs, hts, 'b', 'f');
		break;
	}

	// Store the predictor derivatives.
	duPredictor = predictorStep.du;
	dvPredictor = predictorStep.dv;
	dhPredictor = predictorStep.dh;

	// Compute predictor values.
	uPredictor = u + duPredictor * dt;
	vPredictor = v + dvPredictor * dt;
	hPredictor = h + dhPredictor * dt;
	htotPredictor = hsurf + hPredictor;
}
Beispiel #4
0
Fields::Fields(Hamiltonian &ham, ExtrinsicCurvature kij) : k(kij) {
  // Get data from the Hamiltonian.
  mass = ham.getBareMass();
  sigma = ham.getSingularAngularPart();
  regPower = ham.getSingularPower();
  u = ham.getRemainder();
  // Set up the basis.
  int* ranks = ham.getRanks();
  basis.setRanks(ranks[0], ranks[1]);
  basis.setMaximumRadius(ham.getMaximumRadius());
  computeDerivatives();
  delete[] ranks;
}
Beispiel #5
0
template<typename PointSource, typename PointTarget> void
pcl::NormalDistributionsTransform<PointSource, PointTarget>::computeTransformation (PointCloudSource &output, const Eigen::Matrix4f &guess)
{
  nr_iterations_ = 0;
  converged_ = false;

  double gauss_c1, gauss_c2, gauss_d3;

  // Initializes the guassian fitting parameters (eq. 6.8) [Magnusson 2009]
  gauss_c1 = 10 * (1 - outlier_ratio_);
  gauss_c2 = outlier_ratio_ / pow (resolution_, 3);
  gauss_d3 = -log (gauss_c2);
  gauss_d1_ = -log ( gauss_c1 + gauss_c2 ) - gauss_d3;
  gauss_d2_ = -2 * log ((-log ( gauss_c1 * exp ( -0.5 ) + gauss_c2 ) - gauss_d3) / gauss_d1_);

  if (guess != Eigen::Matrix4f::Identity ())
  {
    // Initialise final transformation to the guessed one
    final_transformation_ = guess;
    // Apply guessed transformation prior to search for neighbours
    transformPointCloud (output, output, guess);
  }

  // Initialize Point Gradient and Hessian
  point_gradient_.setZero ();
  point_gradient_.block<3, 3>(0, 0).setIdentity ();
  point_hessian_.setZero ();

  Eigen::Transform<float, 3, Eigen::Affine, Eigen::ColMajor> eig_transformation;
  eig_transformation.matrix () = final_transformation_;

  // Convert initial guess matrix to 6 element transformation vector
  Eigen::Matrix<double, 6, 1> p, delta_p, score_gradient;
  Eigen::Vector3f init_translation = eig_transformation.translation ();
  Eigen::Vector3f init_rotation = eig_transformation.rotation ().eulerAngles (0, 1, 2);
  p << init_translation (0), init_translation (1), init_translation (2),
  init_rotation (0), init_rotation (1), init_rotation (2);

  Eigen::Matrix<double, 6, 6> hessian;

  double score = 0;
  double delta_p_norm;

  // Calculate derivates of initial transform vector, subsequent derivative calculations are done in the step length determination.
  score = computeDerivatives (score_gradient, hessian, output, p);

  while (!converged_)
  {
    // Store previous transformation
    previous_transformation_ = transformation_;

    // Solve for decent direction using newton method, line 23 in Algorithm 2 [Magnusson 2009]
    Eigen::JacobiSVD<Eigen::Matrix<double, 6, 6> > sv (hessian, Eigen::ComputeFullU | Eigen::ComputeFullV);
    // Negative for maximization as opposed to minimization
    delta_p = sv.solve (-score_gradient);

    //Calculate step length with guarnteed sufficient decrease [More, Thuente 1994]
    delta_p_norm = delta_p.norm ();

    if (delta_p_norm == 0 || delta_p_norm != delta_p_norm)
    {
      trans_probability_ = score / static_cast<double> (input_->points.size ());
      converged_ = delta_p_norm == delta_p_norm;
      return;
    }

    delta_p.normalize ();
    delta_p_norm = computeStepLengthMT (p, delta_p, delta_p_norm, step_size_, transformation_epsilon_ / 2, score, score_gradient, hessian, output);
    delta_p *= delta_p_norm;


    transformation_ = (Eigen::Translation<float, 3> (static_cast<float> (delta_p (0)), static_cast<float> (delta_p (1)), static_cast<float> (delta_p (2))) *
                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (3)), Eigen::Vector3f::UnitX ()) *
                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (4)), Eigen::Vector3f::UnitY ()) *
                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (5)), Eigen::Vector3f::UnitZ ())).matrix ();


    p = p + delta_p;

    // Update Visualizer (untested)
    if (update_visualizer_ != 0)
      update_visualizer_ (output, std::vector<int>(), *target_, std::vector<int>() );

    if (nr_iterations_ > max_iterations_ ||
        (nr_iterations_ && (std::fabs (delta_p_norm) < transformation_epsilon_)))
    {
      converged_ = true;
    }

    nr_iterations_++;

  }

  // Store transformation probability.  The realtive differences within each scan registration are accurate
  // but the normalization constants need to be modified for it to be globally accurate
  trans_probability_ = score / static_cast<double> (input_->points.size ());
}
Beispiel #6
0
template<typename PointSource, typename PointTarget> double
pcl::NormalDistributionsTransform<PointSource, PointTarget>::computeStepLengthMT (const Eigen::Matrix<double, 6, 1> &x, Eigen::Matrix<double, 6, 1> &step_dir, double step_init, double step_max,
                                                                                  double step_min, double &score, Eigen::Matrix<double, 6, 1> &score_gradient, Eigen::Matrix<double, 6, 6> &hessian,
                                                                                  PointCloudSource &trans_cloud)
{
  // Set the value of phi(0), Equation 1.3 [More, Thuente 1994]
  double phi_0 = -score;
  // Set the value of phi'(0), Equation 1.3 [More, Thuente 1994]
  double d_phi_0 = -(score_gradient.dot (step_dir));

  Eigen::Matrix<double, 6, 1>  x_t;

  if (d_phi_0 >= 0)
  {
    // Not a decent direction
    if (d_phi_0 == 0)
      return 0;
    else
    {
      // Reverse step direction and calculate optimal step.
      d_phi_0 *= -1;
      step_dir *= -1;

    }
  }

  // The Search Algorithm for T(mu) [More, Thuente 1994]

  int max_step_iterations = 10;
  int step_iterations = 0;

  // Sufficient decreace constant, Equation 1.1 [More, Thuete 1994]
  double mu = 1.e-4;
  // Curvature condition constant, Equation 1.2 [More, Thuete 1994]
  double nu = 0.9;

  // Initial endpoints of Interval I,
  double a_l = 0, a_u = 0;

  // Auxiliary function psi is used until I is determined ot be a closed interval, Equation 2.1 [More, Thuente 1994]
  double f_l = auxilaryFunction_PsiMT (a_l, phi_0, phi_0, d_phi_0, mu);
  double g_l = auxilaryFunction_dPsiMT (d_phi_0, d_phi_0, mu);

  double f_u = auxilaryFunction_PsiMT (a_u, phi_0, phi_0, d_phi_0, mu);
  double g_u = auxilaryFunction_dPsiMT (d_phi_0, d_phi_0, mu);

  // Check used to allow More-Thuente step length calculation to be skipped by making step_min == step_max
  bool interval_converged = (step_max - step_min) > 0, open_interval = true;

  double a_t = step_init;
  a_t = std::min (a_t, step_max);
  a_t = std::max (a_t, step_min);

  x_t = x + step_dir * a_t;

  final_transformation_ = (Eigen::Translation<float, 3>(static_cast<float> (x_t (0)), static_cast<float> (x_t (1)), static_cast<float> (x_t (2))) *
                           Eigen::AngleAxis<float> (static_cast<float> (x_t (3)), Eigen::Vector3f::UnitX ()) *
                           Eigen::AngleAxis<float> (static_cast<float> (x_t (4)), Eigen::Vector3f::UnitY ()) *
                           Eigen::AngleAxis<float> (static_cast<float> (x_t (5)), Eigen::Vector3f::UnitZ ())).matrix ();

  // New transformed point cloud
  transformPointCloud (*input_, trans_cloud, final_transformation_);

  // Updates score, gradient and hessian.  Hessian calculation is unessisary but testing showed that most step calculations use the
  // initial step suggestion and recalculation the reusable portions of the hessian would intail more computation time.
  score = computeDerivatives (score_gradient, hessian, trans_cloud, x_t, true);

  // Calculate phi(alpha_t)
  double phi_t = -score;
  // Calculate phi'(alpha_t)
  double d_phi_t = -(score_gradient.dot (step_dir));

  // Calculate psi(alpha_t)
  double psi_t = auxilaryFunction_PsiMT (a_t, phi_t, phi_0, d_phi_0, mu);
  // Calculate psi'(alpha_t)
  double d_psi_t = auxilaryFunction_dPsiMT (d_phi_t, d_phi_0, mu);

  // Iterate until max number of iterations, interval convergance or a value satisfies the sufficient decrease, Equation 1.1, and curvature condition, Equation 1.2 [More, Thuente 1994]
  while (!interval_converged && step_iterations < max_step_iterations && !(psi_t <= 0 /*Sufficient Decrease*/ && d_phi_t <= -nu * d_phi_0 /*Curvature Condition*/))
  {
    // Use auxilary function if interval I is not closed
    if (open_interval)
    {
      a_t = trialValueSelectionMT (a_l, f_l, g_l,
                                   a_u, f_u, g_u,
                                   a_t, psi_t, d_psi_t);
    }
    else
    {
      a_t = trialValueSelectionMT (a_l, f_l, g_l,
                                   a_u, f_u, g_u,
                                   a_t, phi_t, d_phi_t);
    }

    a_t = std::min (a_t, step_max);
    a_t = std::max (a_t, step_min);

    x_t = x + step_dir * a_t;

    final_transformation_ = (Eigen::Translation<float, 3> (static_cast<float> (x_t (0)), static_cast<float> (x_t (1)), static_cast<float> (x_t (2))) *
                             Eigen::AngleAxis<float> (static_cast<float> (x_t (3)), Eigen::Vector3f::UnitX ()) *
                             Eigen::AngleAxis<float> (static_cast<float> (x_t (4)), Eigen::Vector3f::UnitY ()) *
                             Eigen::AngleAxis<float> (static_cast<float> (x_t (5)), Eigen::Vector3f::UnitZ ())).matrix ();

    // New transformed point cloud
    // Done on final cloud to prevent wasted computation
    transformPointCloud (*input_, trans_cloud, final_transformation_);

    // Updates score, gradient. Values stored to prevent wasted computation.
    score = computeDerivatives (score_gradient, hessian, trans_cloud, x_t, false);

    // Calculate phi(alpha_t+)
    phi_t = -score;
    // Calculate phi'(alpha_t+)
    d_phi_t = -(score_gradient.dot (step_dir));

    // Calculate psi(alpha_t+)
    psi_t = auxilaryFunction_PsiMT (a_t, phi_t, phi_0, d_phi_0, mu);
    // Calculate psi'(alpha_t+)
    d_psi_t = auxilaryFunction_dPsiMT (d_phi_t, d_phi_0, mu);

    // Check if I is now a closed interval
    if (open_interval && (psi_t <= 0 && d_psi_t >= 0))
    {
      open_interval = false;

      // Converts f_l and g_l from psi to phi
      f_l = f_l + phi_0 - mu * d_phi_0 * a_l;
      g_l = g_l + mu * d_phi_0;

      // Converts f_u and g_u from psi to phi
      f_u = f_u + phi_0 - mu * d_phi_0 * a_u;
      g_u = g_u + mu * d_phi_0;
    }

    if (open_interval)
    {
      // Update interval end points using Updating Algorithm [More, Thuente 1994]
      interval_converged = updateIntervalMT (a_l, f_l, g_l,
                                             a_u, f_u, g_u,
                                             a_t, psi_t, d_psi_t);
    }
    else
    {
      // Update interval end points using Modified Updating Algorithm [More, Thuente 1994]
      interval_converged = updateIntervalMT (a_l, f_l, g_l,
                                             a_u, f_u, g_u,
                                             a_t, phi_t, d_phi_t);
    }

    step_iterations++;
  }

  // If inner loop was run then hessian needs to be calculated.
  // Hessian is unnessisary for step length determination but gradients are required
  // so derivative and transform data is stored for the next iteration.
  if (step_iterations)
    computeHessian (hessian, trans_cloud, x_t);

  return (a_t);
}
int main (int args, char* argv[])
{


double length = LENGTH;                                                                                //Axial length of vessel to simulate (in micrometers).
double diameter= DIAMETER;

double pi = 3.1415;                                                                 //Steady state diameter of the simulated vessel (in micrometers).
double hx_smc = HX_SMC;
double hy_smc = HY_SMC;
double hx_ec  = HX_EC;
double hy_ec  = HY_EC;

double new_length 	= (rint (length/hx_ec) ) * hx_ec;


	if  (( (int)(new_length) % (int) (hx_smc) )!=0)
		hx_smc = 	new_length / (rint (new_length/hx_smc)) ;

double new_circ= (rint (diameter *  pi /hy_smc) ) * hy_smc;

	if  (( (int)(new_circ) % (int) (hy_ec) )!=0)
		hy_ec = 	new_circ / (rint (new_circ/hy_ec)) ;


grid.neq_smc	=	26;
grid.neq_ec		=	4;

grid.nodes_smc	=	(int)	(length/hx_smc);
grid.layers_smc	=	(int)	(new_circ/hy_smc);

grid.nodes_ec	=	(int)	(length/hx_ec);
grid.layers_ec	=	(int)	(new_circ/hy_ec);

grid.m		=	grid.nodes_smc*grid.layers_smc;
grid.n		=	grid.nodes_ec *grid.layers_ec ;



MPI_Init(&args, &argv);

int 	tasks,
	myRank;

MPI_Status	status1,stat1,stat2,stat3,stat4;
MPI_Request	req1,req2,req3,req4;

MPI_Comm_size(MPI_COMM_WORLD,&tasks);
MPI_Comm_rank(MPI_COMM_WORLD,&myRank);

FILE *errPT;
char filename[10];int n;
int kB=1024,	MB=	1024*1024;
	sprintf(filename,"err.%d.txt",myRank);
	errPT	=	fopen(filename,"w+");

int n_smc,n_ec;

if (myRank ==0)
{
	printf("LENGTH = %2.5f\t\tDIAMETER(requested)= %2.5f\t\tDIAMETER(corrected)= %2.5f\t\tCIRCUMFERENCE = %2.5f\n\n",length,diameter,(new_circ/pi),new_circ);
	printf("SMC GRID INFO:\nNodes =	%d\nLayers = %d\nHX = %f,HY = %f\nTotal cells = %d\n",grid.nodes_smc,grid.layers_smc,hx_smc,hy_smc,grid.m);
	printf("EC GRID INFO:\nNodes =	%d\nLayers = %d\nHX = %f,HY = %f\nTotal cells = %d\n",grid.nodes_ec,grid.layers_ec,hx_ec,hy_ec,grid.n);


	//EC bit
	int r 	=	grid.nodes_ec%tasks;

	if (r ==0)
		{
		n_ec	=	grid.nodes_ec/tasks;
		grid.n_ec= n_ec;
		for (int i=1;i<tasks;i++)
		MPI_Send(&n_ec,1,MPI_INT,i,000,MPI_COMM_WORLD);


		}
	else{
	     	n_ec	=	(grid.nodes_ec - r )/tasks;
		grid.n_ec= n_ec;
		for (int i=1;i<tasks-1;i++)
		MPI_Send(&n_ec,1,MPI_INT,i,000,MPI_COMM_WORLD);
		n_ec=n_ec+r;
	        MPI_Send(&n_ec,1,MPI_INT,(tasks-1),000,MPI_COMM_WORLD);

		}

	}

	else
	MPI_Recv(&grid.n_ec,1,MPI_INT,MASTER,000,MPI_COMM_WORLD,&status1);

	grid.n_smc	=	13*grid.n_ec;


	//Each should perform this bit locally

	celltype1*	smc_base;
	celltype2*	 ec_base;



	smc_base	=	(celltype1*) malloc((grid.layers_smc *grid.n_smc) * sizeof(celltype1));
	if(smc_base==NULL){
		fprintf(errPT,"Allocation failed for smc_base\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	 ec_base	=	(celltype2*) malloc((grid.layers_ec *grid.n_ec)   * sizeof(celltype2));
	 if(ec_base==NULL){
		fprintf(errPT,"Allocation failed for ec_base\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	smc   = (celltype1**) malloc(grid.layers_smc * sizeof(celltype1*));
	if(smc==NULL){
		fprintf(errPT,"Allocation failed for smc row dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	for (int i=0; i<grid.layers_smc; i++){
		smc[i]=  (celltype1*) malloc(grid.n_smc * sizeof(celltype1));
		if(smc[i]==NULL){
		fprintf(errPT,"Allocation failed for smc row # %d dimension\n",i);
		MPI_Abort(MPI_COMM_WORLD,100);
		}
		smc[i]=  smc_base+(i*grid.n_smc);
		}
	ec    = (celltype2**) malloc(grid.layers_ec * sizeof(celltype2*));
	if(smc==NULL){
		fprintf(errPT,"Allocation failed for smc row dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	for (int i=0; i<grid.layers_ec; i++){
		 ec[i]=  (celltype2*) malloc(grid.n_ec * sizeof(celltype2));
		 if(ec[i]==NULL){
			fprintf(errPT,"Allocation failed for ec row # %d dimension\n",i);
			MPI_Abort(MPI_COMM_WORLD,100);
			}
		 ec[i]=  ec_base+(i*grid.n_ec);
		 }


	nn.sbuf_left    =	(double*)malloc( 3*(grid.layers_smc+grid.layers_ec) * sizeof(double));
	if(nn.sbuf_left==NULL){
		fprintf(errPT,"Allocation failed for nn.sbuf_left dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	nn.sbuf_right   =	(double*)malloc( 3*(grid.layers_smc+grid.layers_ec) * sizeof(double));
	if(nn.sbuf_right==NULL){
		fprintf(errPT,"Allocation failed for nn.sbuf_right dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	nn.rbuf_left    =	(double*)malloc( 3*(grid.layers_smc+grid.layers_ec) * sizeof(double));
	if(nn.rbuf_left==NULL){
		fprintf(errPT,"Allocation failed for nn.rbuf_left dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	nn.rbuf_right   =	(double*)malloc( 3*(grid.layers_smc+grid.layers_ec) * sizeof(double));
	if(nn.rbuf_right==NULL){
		fprintf(errPT,"Allocation failed for nn.rbuf_right dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}



	nn.left_smc_c	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.left_smc_c==NULL){
		fprintf(errPT,"Allocation failed for nn.left_smc_c dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	nn.right_smc_c 	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.right_smc_c==NULL){
		fprintf(errPT,"Allocation failed for nn.right_smc_c dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}


	nn.left_ec_c	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.left_ec_c==NULL){
		fprintf(errPT,"Allocation failed for nn.left_ec_c dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	nn.right_ec_c 	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.right_ec_c==NULL){
		fprintf(errPT,"Allocation failed for nn.right_ec_c dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}


	nn.left_smc_v	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.left_smc_v==NULL){
		fprintf(errPT,"Allocation failed for nn.left_smc_v dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	nn.right_smc_v 	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.right_smc_v==NULL){
		fprintf(errPT,"Allocation failed for nn.right_smc_v dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}


	nn.left_ec_v	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.left_ec_v==NULL){
		fprintf(errPT,"Allocation failed for nn.left_ec_v dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	nn.right_ec_v 	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.right_ec_v==NULL){
		fprintf(errPT,"Allocation failed for nn.right_ec_v dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}


	nn.left_smc_I	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.left_smc_I==NULL){
		fprintf(errPT,"Allocation failed for nn.left_smc_I dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}
	nn.right_smc_I 	=	(double*)malloc( grid.layers_smc * sizeof(double));
	if(nn.right_smc_I==NULL){
		fprintf(errPT,"Allocation failed for nn.right_smc_I dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}


	nn.left_ec_I	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.left_ec_I==NULL){
		fprintf(errPT,"Allocation failed for nn.left_ec_I dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}

	nn.right_ec_I 	=	(double*)malloc( grid.layers_ec * sizeof(double));
	if(nn.right_ec_I==NULL){
		fprintf(errPT,"Allocation failed for nn.right_ec_I dimension\n");
		MPI_Abort(MPI_COMM_WORLD,100);
		}



	//Lodaing the information of rows and cols of each cell and allocating memory for various flux equations

	for(int i=0; i<grid.layers_smc; i++)
	{
		for (int j=0;j<grid.n_smc;j++)
		{
			smc[i][j].row	=	i;
			smc[i][j].col 	=	j;
	/*		smc[i][j].A	=    (double*)	malloc(12*sizeof(double));
			if(smc[i][j].A==NULL){
				fprintf(errPT,"Allocation failed for smc[i][j].A dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}
			smc[i][j].B	=    (double*)	malloc(3*sizeof(double));
			if(smc[i][j].B==NULL){
				fprintf(errPT,"Allocation failed for smc[i][j].B dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}
			smc[i][j].C	=    (double*)	malloc(3*sizeof(double));
			if(smc[i][j].C==NULL){
				fprintf(errPT,"Allocation failed for smc[i][j].C dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}*/
		}
	}

	for(int i=0; i<grid.layers_ec; i++)
	{
		for (int j=0;j<grid.n_ec;j++)
		{
			ec[i][j].row	=	i;
			ec[i][j].col   =	j;
	/*		ec[i][j].A	=    (double*)	malloc(12*sizeof(double));
			if(ec[i][j].A==NULL){
				fprintf(errPT,"Allocation failed for ec[i][j].A dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}
			ec[i][j].B	=    (double*)	malloc(3*sizeof(double));
			if(ec[i][j].B==NULL){
				fprintf(errPT,"Allocation failed for ec[i][j].B dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}
			ec[i][j].C	=    (double*)	malloc(3*sizeof(double));
			if(ec[i][j].C==NULL){
				fprintf(errPT,"Allocation failed for ec[i][j].C dimension\n");
				MPI_Abort(MPI_COMM_WORLD,100);
				}*/
		}
	}


	int total	=	(grid.neq_smc * grid.n_smc * grid.layers_smc) + (grid.neq_ec * grid.n_ec * grid.layers_ec);



	//Initialize and use the solver here


	RKSUITE		rksuite;
	//Time variables
	double  tfinal  = TFINAL;
	double 	tnow	= 0.0;
	double 	interval= INTERVAL;

	//File written every 1 second
	int file_write_per_unit_time=(int)(1/interval);


	//Error control variables

	double 	TOL	= 1e-7;

	double  thres[total];
	for (int i= 0; i<total; i++)
	thres[i]	=	1e-7;

	//Variables holding new and old values
	double* y = (double*) malloc (total * sizeof(double));


	/***** INITIALIZATION SECTION ******/
int offset	=	0;
	Initialize_tsoukias_smc(offset,y);
	offset	=	grid.n_smc*grid.layers_smc * grid.neq_smc;
	Initialize_koeingsberger_ec(offset, y);



	int i,j,k;
	for(i=0;i< grid.layers_smc; i++)
	 	{
		for (j=0;j<grid.n_smc;j++)
			{
			if (i>0)
			k=(i*grid.n_smc*grid.neq_smc);
			else if (i==0)
			k=0;
				smc[i][j].p     =   &y[k+(j*grid.neq_smc)+0];
			}
	 	}

	offset	=	grid.n_smc*grid.layers_smc * grid.neq_smc;

	for(i=0;i< grid.layers_ec; i++)
	 	{
		for (j=0;j<grid.n_ec;j++)
			{
			if (i>0)
			k= offset+i*grid.n_ec*grid.neq_ec ;
			else if (i==0)
			k=offset+0;
				ec[i][j].q     =   &y[k+(j*grid.neq_ec)+0];
			}
	 	}

	for(i=0;i< grid.layers_smc; i++)
	 	{
		for (j=0;j<grid.n_smc;j++)
			{
			smc[i][j].NE	=	0.0;
			smc[i][j].NO	=	0.0;
			smc[i][j].I_stim=	0.0;
			}
	 	}

/***************************************************************/
	double* yp= (double*) malloc (total * sizeof(double));

	//Solver method
	int method	=	2;		//RK(4,5)

	//Statistics
	int totf,stpcst,acptstp;
  	double waste,hnext;

	//Error Flag
	int cflag = 0, uflag =0;
	//Error extent monitor, to select best constant step of tolerances
	double* ymax = (double*) malloc(total * sizeof(double));


	int state = couplingParms(CASE);

	if (myRank==0)

		printf("\n Step Size=%2.10lf\nUnit time in file write=%lf\n",interval,(double)file_write_per_unit_time*interval );


	//Output file management

	FILE
			*time,*Task_info,
			*ci,*cj,
			*si,*sj,
			*vi,*vj,
			*Ii,*Ij,
			*cpv_i,*cpc_i,*cpi_i,
			*cpv_j,*cpc_j,*cpi_j;




	n=sprintf(filename,"t%d.txt",myRank);
			time	=	fopen(filename,"w+");

	n=sprintf(filename,"Task_info%d.txt",myRank);
			Task_info=	fopen(filename,"w+");

	n=sprintf(filename,"smc_c%d.txt",myRank);
			ci	=	fopen(filename,"w+");

	n=sprintf(filename,"ec_c%d.txt",myRank);
			cj	=	fopen(filename,"w+");

	n=sprintf(filename,"ec_s%d.txt",myRank);
			sj	=	fopen(filename,"w+");

	n=sprintf(filename,"smc_v%d.txt",myRank);
			vi	=	fopen(filename,"w+");

	n=sprintf(filename,"ec_v%d.txt",myRank);
			vj	=	fopen(filename,"w+");

	n=sprintf(filename,"smc_I%d.txt",myRank);
			Ii	=	fopen(filename,"w+");

	n=sprintf(filename,"ec_I%d.txt",myRank);
			Ij	=	fopen(filename,"w+");

	n=sprintf(filename,"cpv_i%d.txt",myRank);
			cpv_i	=	fopen(filename,"w+");

	n=sprintf(filename,"cpc_i%d.txt",myRank);
			cpc_i	=	fopen(filename,"w+");

	n=sprintf(filename,"cpi_i%d.txt",myRank);
			cpi_i	=	fopen(filename,"w+");

	n=sprintf(filename,"cpc_j%d.txt",myRank);
			cpc_j	=	fopen(filename,"w+");

	n=sprintf(filename,"cpv_j%d.txt",myRank);
			cpv_j	=	fopen(filename,"w+");

	n=sprintf(filename,"cpi_j%d.txt",myRank);
			cpi_j	=	fopen(filename,"w+");



	int 	count		= 3*(grid.layers_smc+grid.layers_ec),
		buf_offset	= 3*grid.layers_smc;

	int iteration=0;
	double Per_MPI_time_t1,Per_MPI_time_t2,
	     Per_step_compute_time_t1,Per_step_compute_time_t2;
	double t1   =   MPI_Wtime();

/********************************************************************/
	for(int i=0; i<grid.layers_smc;i++)
		update_send_buffers_smc(myRank,tasks,i);

	for(int i=0; i<grid.layers_ec;i++)
		update_send_buffers_ec(myRank,tasks,i);

	my_boundary(myRank);

	update_async(myRank,tasks);

	retrive_recv_buffers_smc(myRank,tasks);
	retrive_recv_buffers_ec(myRank,tasks);

/************************************************************************/
	 /*********************SOLVER SECTION*************************/
/************************************************************************/
	tend	=	interval;
		rksuite.setup(total, tnow, y, tend, TOL, thres, method, "CT", false, 1e-5, false );

			while (tnow <=10.00) {
		        // the ct() function does not guarantee to advance all the
		        // way to the stop time.  Keep stepping until it does.
		        do {
		            computeDerivatives( tnow, y, yp );
			        rksuite.ct(computeDerivatives, tnow, y, yp, cflag );
			        if (cflag >= 5) {
				        printf("RKSUITE error %d\n", cflag );
				        MPI_Abort(MPI_COMM_WORLD,1000);
				        return false;
				        break;
			        }
		        } while (tnow < tend);

	    iteration++;

/************************************************/
/*		      Communication Block			    */
/************************************************/

	        update_async(myRank,tasks);
	        MPI_Barrier(MPI_COMM_WORLD);

	        if (iteration==5){
	        		fprintf(Task_info,"COUPLING COEFFICIENTS\n\n");


	        		fprintf(Task_info,"g_hm_smc=\t%6.2lf\ng_hm_ec=\t%6.2lf\np_hm_smc=\t%6.2lf\np_hm_ec=\t%6.2lf\npIP_hm_smc=\t%6.2lf\npIP_hm_ec=\t%6.2lf\ng_ht_smc=\t%6.2lf\tg_ht_ec=\t%6.2lf\np_ht_smc=\t%6.2lf\tp_ht_ec=\t%6.2lf\npIP_ht_smc=\t%6.2lf\tpIP_ht_ec=\t%6.2lf\n",
	        		cpl_cef.g_hm_smc,cpl_cef.g_hm_ec,cpl_cef.p_hm_smc,cpl_cef.p_hm_ec,cpl_cef.pIP_hm_smc,cpl_cef.pIP_hm_ec,cpl_cef.g_ht_smc,cpl_cef.g_ht_ec,cpl_cef.p_ht_smc,cpl_cef.p_ht_ec,
	        		cpl_cef.pIP_ht_smc,cpl_cef.pIP_ht_ec);


	        		fprintf(Task_info,"\nSpatial Gradient info:\nMinimum JPLC\t=\t=%lf\nMaximum JPLC\t=\t=%lf\nGradient\t=\t=%lf\n"
	        					,grid.min_jplc,grid.max_jplc,grid.gradient);

	        		fprintf(Task_info,"\nTotal Tasks=%d\tMyRank=%d\n\n",tasks,myRank);
	        		fprintf(Task_info,"LENGTH = %2.5f\nREQUESTED DIAMETER = %2.5f\nCORRECTED CIRCUMFERENCE = %2.5f\nTotal layers=%d\nTotal nodes=%d\n\n",length,diameter,new_circ,grid.layers_smc,grid.nodes_smc);
	        		fprintf(Task_info,"SMC GRID INFO:\nNodes =	%d\nLayers = %d\nHX = %lf,HY = %lf\nTotal cells = %d\n\n",grid.n_smc,grid.layers_smc,hx_smc,hy_smc,grid.m);
	        		fprintf(Task_info,"EC GRID INFO:\nNodes =	%d\nLayers = %d\nHX = %lf,HY = %lf\nTotal cells = %d\n\n",grid.n_ec,grid.layers_ec,hx_ec,hy_ec,grid.n);
	        		fprintf(Task_info,"JPLC (at t<100 seconds) in axial direction per EC\n");
	        	for (int p=0; p<grid.n_ec; p++)
	        		fprintf(Task_info,"[%d]\t%lf\n",p,ec[0][p].JPLC);

	        		}


	        if ((iteration==1)||(iteration % file_write_per_unit_time==0))
			{
				fprintf(time,"%lf\n",tnow);

				for (int row=0; row<grid.layers_smc;row++)
				{
					for (int col=0; col<grid.n_smc; col++)
					{
						fprintf(ci,"%2.10lf",smc[row][col].p[smc_Ca_i]);
						fprintf(vi,"%2.10lf",smc[row][col].p[smc_V_m]);
						fprintf(Ii,"%2.10lf",smc[row][col].p[smc_IP3]);


						if (col<(grid.n_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}
						}

						if (row<(grid.layers_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}

						else if (row==(grid.layers_smc-1)){
							fprintf(ci,"\n");
							fprintf(vi,"\n");
							fprintf(Ii,"\n");
							}
					}

		     	for (int row=0; row<grid.layers_ec;row++)
				{
					for (int col=0; col<grid.n_ec; col++)
					{
						fprintf(cj,"%2.10lf\t",ec[row][col].q[ec_Ca]);
						fprintf(sj,"%2.10lf\t",ec[row][col].q[ec_SR]);
						fprintf(vj,"%2.10lf\t",ec[row][col].q[ec_Vm]);
						fprintf(Ij,"%2.10lf\t",ec[row][col].q[ec_IP3]);
						}
						if (row<(grid.layers_ec-1)){
							fprintf(cj,"\t");
							fprintf(sj,"\t");
							fprintf(vj,"\t");
							fprintf(Ij,"\t");
							}
						else if (row==(grid.layers_ec-1)){
							fprintf(cj,"\n");
							fprintf(sj,"\n");
							fprintf(vj,"\n");
							fprintf(Ij,"\n");
							}
			   		}
		     		fflush(time);fflush(Task_info);fflush(errPT);
		     		//fflush(MPI_stat);
		     		fflush(ci);fflush(cj);
		     		fflush(sj);
		     		fflush(vi);fflush(vj);
		     		fflush(Ii);fflush(Ij);

		     		fflush(cpv_i);
		     		fflush(cpc_i);fflush(cpi_i);

		     		fflush(cpv_j);fflush(cpc_j);
		     		fflush(cpi_j);
				}
	        tend += interval;
	        rksuite.reset(tend);
	       } //end while()
/****////////*******////////****************
/************************************************************************/
    /*********************AFTER 10.00 seconds*************************/
/************************************************************************/
	tend	=	tnow + interval;
		rksuite.setup(total, tnow, y, tend, TOL, thres, method, "CT", false, 1e-5, false );

			while (tnow <=100.00) {
				// the ct() function does not guarantee to advance all the
				// way to the stop time.  Keep stepping until it does.
				do {
					computeDerivatives( tnow, y, yp );
					rksuite.ct(computeDerivatives, tnow, y, yp, cflag );
					if (cflag >= 5) {
						printf("RKSUITE error %d\n", cflag );
						MPI_Abort(MPI_COMM_WORLD,1000);
						return false;
						break;
					}
				} while (tnow < tend);

		iteration++;

/************************************************/
/*		      Communication Block			    */
/************************************************/

			update_async(myRank,tasks);
			MPI_Barrier(MPI_COMM_WORLD);

			if (iteration==5e3){

					fprintf(Task_info,"JPLC (at 10<t<100 seconds) in axial direction per EC\n");
				for (int p=0; p<grid.n_ec; p++)
					fprintf(Task_info,"[%d]\t%lf\n",p,ec[0][p].JPLC);

					}


			if (iteration % file_write_per_unit_time==0)
			{
				fprintf(time,"%lf\n",tnow);

				for (int row=0; row<grid.layers_smc;row++)
				{
					for (int col=0; col<grid.n_smc; col++)
					{
						fprintf(ci,"%2.10lf",smc[row][col].p[smc_Ca_i]);
						fprintf(vi,"%2.10lf",smc[row][col].p[smc_V_m]);
						fprintf(Ii,"%2.10lf",smc[row][col].p[smc_IP3]);


						if (col<(grid.n_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}
						}

						if (row<(grid.layers_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}

						else if (row==(grid.layers_smc-1)){
							fprintf(ci,"\n");
							fprintf(vi,"\n");
							fprintf(Ii,"\n");
							}
					}

				for (int row=0; row<grid.layers_ec;row++)
				{
					for (int col=0; col<grid.n_ec; col++)
					{
						fprintf(cj,"%2.10lf\t",ec[row][col].q[ec_Ca]);
						fprintf(sj,"%2.10lf\t",ec[row][col].q[ec_SR]);
						fprintf(vj,"%2.10lf\t",ec[row][col].q[ec_Vm]);
						fprintf(Ij,"%2.10lf\t",ec[row][col].q[ec_IP3]);
						}
						if (row<(grid.layers_ec-1)){
							fprintf(cj,"\t");
							fprintf(sj,"\t");
							fprintf(vj,"\t");
							fprintf(Ij,"\t");
							}
						else if (row==(grid.layers_ec-1)){
							fprintf(cj,"\n");
							fprintf(sj,"\n");
							fprintf(vj,"\n");
							fprintf(Ij,"\n");
							}
					}
					fflush(time);fflush(Task_info);fflush(errPT);
					//fflush(MPI_stat);
					fflush(ci);fflush(cj);
					fflush(sj);
					fflush(vi);fflush(vj);
					fflush(Ii);fflush(Ij);

					fflush(cpv_i);
					fflush(cpc_i);fflush(cpi_i);

					fflush(cpv_j);fflush(cpc_j);
					fflush(cpi_j);
				}
			tend += interval;
			rksuite.reset(tend);
		   } //end while()
/****////////*******////////****************

/************************************************************************/
/***			          After 100 seconds							 ****/
/************************************************************************/
	tend	=	tnow+interval;
		rksuite.setup(total, tnow, y, tend, TOL, thres, method, "CT", false, 1e-6, false );

			while (tnow <=tfinal) {
				// the ct() function does not guarantee to advance all the
				// way to the stop time.  Keep stepping until it does.
				do {
					computeDerivatives( tnow, y, yp );
					rksuite.ct(computeDerivatives, tnow, y, yp, cflag );
					if (cflag >= 5) {
						printf("RKSUITE error %d\n", cflag );
						MPI_Abort(MPI_COMM_WORLD,1000);
						return false;
						break;
					}
				} while (tnow < tend);

		iteration++;

/************************************************/
/*		      Communication Block			    */
/************************************************/
			update_async(myRank,tasks);
			MPI_Barrier(MPI_COMM_WORLD);

			if (iteration==1e5)
			{
				fprintf(Task_info,"JPLC (at t>100 seconds) in axial direction per EC\n");
								for (int p=0; p<grid.n_ec; p++)
									fprintf(Task_info,"[%d]\t%lf\n",p,ec[0][p].JPLC);
			}

			if (iteration % file_write_per_unit_time==0)
			{
				fprintf(time,"%lf\n",tnow);

				for (int row=0; row<grid.layers_smc;row++)
				{
					for (int col=0; col<grid.n_smc; col++)
					{
						fprintf(ci,"%2.10lf",smc[row][col].p[smc_Ca_i]);
						fprintf(vi,"%2.10lf",smc[row][col].p[smc_V_m]);
						fprintf(Ii,"%2.10lf",smc[row][col].p[smc_IP3]);


						if (col<(grid.n_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}
						}

						if (row<(grid.layers_smc-1)){
							fprintf(ci,"\t");
							fprintf(vi,"\t");
							fprintf(Ii,"\t");
							}

						else if (row==(grid.layers_smc-1)){
							fprintf(ci,"\n");
							fprintf(vi,"\n");
							fprintf(Ii,"\n");
							}
					}

				for (int row=0; row<grid.layers_ec;row++)
				{
					for (int col=0; col<grid.n_ec; col++)
					{
						fprintf(cj,"%2.10lf\t",ec[row][col].q[ec_Ca]);
						fprintf(sj,"%2.10lf\t",ec[row][col].q[ec_SR]);
						fprintf(vj,"%2.10lf\t",ec[row][col].q[ec_Vm]);
						fprintf(Ij,"%2.10lf\t",ec[row][col].q[ec_IP3]);
						}
						if (row<(grid.layers_ec-1)){
							fprintf(cj,"\t");
							fprintf(sj,"\t");
							fprintf(vj,"\t");
							fprintf(Ij,"\t");
							}
						else if (row==(grid.layers_ec-1)){
							fprintf(cj,"\n");
							fprintf(sj,"\n");
							fprintf(vj,"\n");
							fprintf(Ij,"\n");
							}
					}
					fflush(time);fflush(Task_info);fflush(errPT);
					//fflush(MPI_stat);
					fflush(ci);fflush(cj);
					fflush(sj);
					fflush(vi);fflush(vj);
					fflush(Ii);fflush(Ij);

					fflush(cpv_i);
					fflush(cpc_i);fflush(cpi_i);

					fflush(cpv_j);fflush(cpc_j);
					fflush(cpi_j);
				}
			tend += interval;
			rksuite.reset(tend);
		   } //end while()
/****////////*******////////****************

/************************************************************************/

			double t2	=	MPI_Wtime();
	//printf("[%d] : Elapsed time	=	%lf\nMPI size=%d\ttfinal=%lf\tStep size=%lf\tcount=%d\n",myRank,(t2-t1),tasks,tfinal,interval,cnt2);
	fprintf(Task_info,"Elapsed time	=	%lf\ntfinal=%lf\nStep size=%lf\n",(t2-t1),tasks,tfinal,interval);
	MPI_Barrier(MPI_COMM_WORLD);
	if (myRank==0)
		printf("\nEND OF PROGRAM\n");

	free(smc);free(smc_base);
	free(ec);free(ec_base);
	free(nn.sbuf_left);
	free(nn.sbuf_right);
	free(nn.rbuf_left);
	free(nn.rbuf_right);
	free(nn.left_smc_c);
	free(nn.right_smc_c);
	free(nn.left_ec_c);
	free(nn.right_ec_c);
	free(nn.left_smc_v);
	free(nn.right_smc_v);
	free(nn.left_ec_v);
	free(nn.right_ec_v);
	free(nn.left_smc_I);
	free(nn.right_smc_I);
	free(nn.left_ec_I);
	free(nn.right_ec_I);
	free(y);
	free(yp);
	free(ymax);

	fclose(time);fclose(Task_info);
	fclose(errPT);

	fclose(ci);fclose(cj);
	fclose(sj);
	fclose(vi);fclose(vj);
	fclose(Ii);fclose(Ij);

	fclose(cpv_i);
	fclose(cpc_i);fclose(cpi_i);

	fclose(cpv_j);fclose(cpc_j);
	fclose(cpi_j);



	MPI_Finalize();
}
void
SentimentTraining::trainBatch(int startIndex, int endIndex) {
    float value = computeDerivatives(startIndex, endIndex);
    kernelUpdateParams(mModel.params_d, mModel.derivatives_d, mModel.adagradWts_d, mOptions.learningRate);
}
/**
 * calcCLG_OF
 *
 * Main CLG-optical flow (CLG-OF) computation function.
 *
 * Parameters:
 *
 * image1          Pointer to the first image (the "previous" time frame).
 * image2          Pointer to the second image (the "current" time frame).
 * uOut            Pointer to the horizontal component of the CLG-OF solution.
 * vOut            Pointer to the vertical component of the CLG-OF solution.
 * nRows           Number of image rows (same for the CLG-OF vector field).
 * nCols           Number of image columns (same for the CLG-OF vector field).
 * iterations      Number of iterations for iterative solution.
 * alpha           Global smoothing coefficient of the CLG-OF.
 * rho             Local spatio-temporal smoothing coefficient of the CLG-OF.
 * wFactor         SOR relaxation factor, between 0 and 2.
 * verbose         Display/hide messages to the stdout.
 * coupledMode     Iteration type. 1->Pointwise-Coupled Gauss-Seidel, 0->SOR.
 *
 */
int calcCLG_OF(double* image1,
               double* image2,
               double* uOut,
               double* vOut,
               int nRows,
               int nCols,
               int iterations,
               double alpha,
               double rho,
               double wFactor,
               int verbose,
               int coupledMode) {

    if (verbose) {
        printf("calc_clg\n");
        printf("  setting up variables\n");
    }

    int i=0, j=0;

    // h, could be less than 1.0.
    double h = 1.0;

    // Matrix to vector.
    double **prevFrame, **currFrame;
    prevFrame = pMatrix(nRows, nCols);
    currFrame = pMatrix(nRows, nCols);

    for (i=0; i<nRows; i++) {
        for (j=0; j<nCols; j++) {
            prevFrame[i][j] = image1[j + i*nCols];
            currFrame[i][j] = image2[j + i*nCols];
        }
    }

    if (verbose)
        printf("  allocating memory for arrays\n");

    double **u, **v;
    double **dfdx, **dfdxw;
    double **dfdy, **dfdyw;
    double **dfdt, **dfdtw;

    u = pMatrix(nRows, nCols);
    v = pMatrix(nRows, nCols);

    // Derivatives and their warped versions.
    dfdx = pMatrix(nRows, nCols);
    dfdy = pMatrix(nRows, nCols);
    dfdt = pMatrix(nRows, nCols);

    for (i=0; i<nRows; i++) {
        for (j=0; j<nCols; j++) {
            u[i][j] = 0.0;
            v[i][j] = 0.0;
            dfdt[i][j] = currFrame[i][j] - prevFrame[i][j];
        }
    }

    if (verbose)
        printf("  allocating memory for derivatives matrices\n");

    double **J[JROWS][JCOLS];

    // Because of symmetry, only the upper part is allocated.
    int k, l;
    for (k=0; k<JROWS; k++)
        for (l=k; l<JCOLS; l++)
            J[k][l] = pMatrix(nRows, nCols);

    // Spatial derivatives obtention.
    computeDerivatives(prevFrame, currFrame, dfdx, dfdy, nRows, nCols, verbose);
    // Compute J tensor.
    computeJTensor(dfdx, dfdy, dfdt, J, nRows, nCols);

    if (verbose)
        printf("  local spatio temporal smoothing\n");

    if (rho > 0) {

        k=0, l=0;
        for (k=0; k<JROWS; k++)
            for (l=k; l<JCOLS; l++)
                matrixSmooth(J[k][l], nRows, nCols, rho);

    }

    if (iterations == 0)
        iterations = (int) (nRows * nCols / 8.0);

    if (verbose)
        printf("  performing %i relax iterations\n", iterations);

    int count = 0;
    double error = 1000000;
    double convergenceError = 0.0;

    for (count=0; count<iterations && convergenceError*1.01 < error; count++) {

        if (count > 0)
            error = convergenceError;

        if (coupledMode == 1) {

            if (verbose && count % 50 == 0 && count > 0)
                printf("  iteration %d/%d (P-C Gauss-Seidel), error=%f\n",
                       count, iterations, error);

            convergenceError = relaxPointwiseCoupledGaussSeidel(u, v, J,
                                                                nRows, nCols,
                                                                alpha);
        } else {
            if (verbose && count % 50 == 0 && count > 0)
                printf("  iteration %d/%d (SOR), error=%f\n",
                       count, iterations, error);

            convergenceError = relaxSOR(u, v, J, nRows, nCols, alpha, wFactor);
        }
    }

    // Show debug information.
    if (verbose)
        printf("  filling output after %d iterations, error=%f\n", count, error);

    // Fill output variables.
    for (i=0; i<nRows; i++) {
        for (j=0; j<nCols; j++) {
            uOut[j + i*nCols] += u[i][j];
            vOut[j + i*nCols] += v[i][j];
        }
    }

    // Free memory.
    if (verbose)
        printf("  freeing memory\n");

    freePmatrix(u, nRows);
    freePmatrix(v, nRows);

    for (k=0; k<JROWS; k++)
        for (l=k; l<JCOLS; l++)
            freePmatrix(J[k][l], nRows);

    freePmatrix(prevFrame, nRows);
    freePmatrix(currFrame, nRows);
    freePmatrix(dfdx,  nRows);
    freePmatrix(dfdy,  nRows);
    freePmatrix(dfdt,  nRows);

    if (verbose)
        printf("calc_clg: done\n");
    return 1;
}