void mexFunctionReal(	int		nlhs, 		/* number of expected outputs */
		 			mxArray	*plhs[],	/* mxArray output pointer array */
			 		int		nrhs, 		/* number of inputs */
				 	const mxArray	*prhs[]		/* mxArray input pointer array */)
{
	//
	// Get input
	//	
	ASSERT( nlhs == 2 && nrhs == 10);
	matrix<double> mxC1  = prhs[0];
	matrix<double> mxC2	 = prhs[1];
	matrix<float> mxKP1 = prhs[2];
	matrix<float> mxKP2 = prhs[3];
	matrix<double> mxKPSize1 = prhs[4];
	matrix<double> mxKPSize2 = prhs[5];
	matrix<double> mxIrange = prhs[6];
	matrix<double> mxJrange = prhs[7];
	matrix<int> mxShiftI = prhs[8];
	matrix<int> mxShiftJ = prhs[9];

	ASSERT( mxC1.O == 3 );
	ASSERT( mxC2.O == 3 );
	ASSERT( mxKPSize1.numel() == 2 );
	ASSERT( mxKPSize2.numel() == 2 );
	ASSERT(  mxIrange.numel() == 2 );
	ASSERT(  mxJrange.numel() == 2 );
	
	M1 = mxKPSize1[0];
	N1 = mxKPSize1[1];
	M2 = mxKPSize2[0];
	N2 = mxKPSize2[1];
	ASSERT( mxC1.M == M1 && mxC1.N == N1); // fulhack
	ASSERT( mxC2.M == M2 && mxC2.N == N2);
	ASSERT( mxShiftI.M == M1 && mxShiftI.N == N1);
	ASSERT( mxShiftJ.M == M1 && mxShiftJ.N == N1);
	ASSERT( mxKP1.numel() == kpLength*M1*N1);
	ASSERT( mxKP2.numel() == kpLength*M2*N2);
	
	//
	// Range of shifts to consider
	//
	ASSERT(mxIrange[0]>=-std::max(M1,M2)+1 && mxIrange[1]<=std::max(M1,M2)-1);
	ASSERT(mxJrange[0]>=-std::max(N1,N2)+1 && mxJrange[1]<=std::max(N1,N2)-1);
	max_shift_i = mxIrange[1];
	min_shift_i = mxIrange[0];
	max_shift_j = mxJrange[1];
	min_shift_j = mxJrange[0];
	// Compute number of labels
	const int num_labels = (max_shift_i-min_shift_i+1)*(max_shift_j-min_shift_j+1);
	
	
	mexPrintf("Shift-map %d x %d  with %d labels\n",M1,N1,num_labels);
	mexPrintf("I-range %d ... %d \n",min_shift_i,max_shift_i);
	mexPrintf("J-range %d ... %d \n",min_shift_j,max_shift_j);
	
	//
	// Create graph
	//
	GCoptimizationGridGraph gc_obj(M1,N1,num_labels);
	GCoptimizationGridGraph* gc = &gc_obj;

	// set up the needed data to pass to function for the data costs
	ForDataFn toFn;
	toFn.c1		= mxC1;
	toFn.c2		= mxC2;
	toFn.kp1 	= mxKP1;
	toFn.kp2 	= mxKP2;
	toFn.shiftI = mxShiftI;
	toFn.shiftJ = mxShiftJ;
	
	gc->setDataCost(&dataFn,&toFn);
	gc->setSmoothCost(&smoothFn, &toFn);
	
	// Initialize shift-map to 0
	mexPrintf("Starting shift-map: di=0, dj=0\n");
	int zero_label = shift2lab(0,0);
	for (int i=0;i<M1*N1;++i) {
		gc->setLabel(i,zero_label);
	}

	// Print energy informaion
	double energy = gc->compute_energy();
	double dataEnergy = gc->giveDataEnergy();
	double smoothEnergy = gc->giveSmoothEnergy();
	mexPrintf("Start energy : %16.0f\n",energy);
	mexPrintf("Start data   : %16.0f\n",dataEnergy);
	mexPrintf("Start smooth : %16.0f\n",smoothEnergy);
	mexPrintf("Stopping when old_energy*%f < new_energy.\n",cutoff);
	flush_output();
	
	double old_energy = energy/cutoff + 1;
	int iter = 1;
	startTime();
	while ( cutoff*old_energy > energy  && iter <= 100)
	{
		old_energy = energy;
		//energy = gc->expansion(1);
		for (int lab=0;  lab < num_labels;  lab++ )
		{
			gc->alpha_expansion(lab);
		}
		energy = gc->compute_energy();
		dataEnergy = gc->giveDataEnergy();
		smoothEnergy = gc->giveSmoothEnergy();
		double time_taken = endTime(); // Measure the time taken since last call to endtime
		mexPrintf("Iteration %3d:   T:%16.0f   D:%16.0f   S:%16.0f  time: %.2f sec\n",iter,energy,dataEnergy,smoothEnergy,time_taken);
		iter++;
		endTime(); //Don't measure the time taken by the output
	}
	
	mexPrintf("Final energy : %16.0f\n",gc->compute_energy());
	
    
     
     
	//
	// Create output
	//
	matrix<int> shiftI_out(M1,N1);
	matrix<int> shiftJ_out(M1,N1);
	plhs[0] = shiftI_out;
	plhs[1] = shiftJ_out;
	
	for ( int  ind = 0; ind < M1*N1; ind++ ) {
		int di,dj;
		int lab = gc->whatLabel(ind);
		lab2shift(lab,di,dj);
		shiftI_out[ind] = di + mxShiftI[ind];
		shiftJ_out[ind] = dj + mxShiftJ[ind];
	}
}