void invcondemonsforces(int nlhs,
                 mxArray *plhs[],
                 int nrhs,
                 const mxArray *prhs[])
{
   typedef float PixelType;
   typedef itk::Image< PixelType, Dimension >           ImageType;
   
   typedef float                                        VectorComponentType;
   typedef itk::Vector<VectorComponentType, Dimension>  VectorPixelType;
   typedef itk::Image<VectorPixelType, Dimension>       DeformationFieldType;

   typedef itk::ESMInvConDemonsRegistrationFunction
      <ImageType,ImageType,DeformationFieldType>        DemonsRegistrationFunctionType;

   
   //boost::timer timer;


   // Allocate images and deformation field
   typename ImageType::Pointer fixedimage
      = ImageType::New();
   typename ImageType::Pointer movingimage
      = ImageType::New();
   typename ImageType::Pointer fw_weightimage
      = ImageType::New();  
   typename DeformationFieldType::Pointer field
      = DeformationFieldType::New();
   
   typename ImageType::Pointer jacobianimage
      = ImageType::New();  
      
      
   typename DeformationFieldType::Pointer inv_field
      = DeformationFieldType::New();
      
   typename DeformationFieldType::Pointer update
      = DeformationFieldType::New();
   
   typename DeformationFieldType::SpacingType spacing;
   spacing.Fill( 1.0 );
   
   typename DeformationFieldType::PointType origin;
   origin.Fill( 0.0 );
   
   typename DeformationFieldType::RegionType     region;
   typename DeformationFieldType::SizeType       size;
   typename DeformationFieldType::IndexType      start;

   unsigned int numPix(1u);
   const MatlabPixelType * fixinptr =  static_cast<const MatlabPixelType *>(mxGetData(prhs[0]));
   const MatlabPixelType * movinptr =  static_cast<const MatlabPixelType *>(mxGetData(prhs[1]));
   const MatlabPixelType * fieldinptrs[Dimension];
   const MatlabPixelType * inv_fieldinptrs[Dimension];
   
   
   mwSize matlabdims[Dimension];
   for (unsigned int d=0; d<Dimension; d++)
   {
      matlabdims[d]= mxGetDimensions(prhs[0])[d];
      size[d] = matlabdims[d];
      start[d] = 0;
      numPix *= size[d];

      fieldinptrs[d] = static_cast<const MatlabPixelType *>(mxGetData(prhs[2+d]));
      inv_fieldinptrs[d] = static_cast<const MatlabPixelType *>(mxGetData(prhs[2+Dimension+d]));
   }
   
   const MatlabPixelType * jacobianptr = static_cast<const MatlabPixelType *> (mxGetData(prhs[2*Dimension + 2]));
   const MatlabPixelType * fw_weightptr = static_cast<const MatlabPixelType *> (mxGetData(prhs[2*Dimension + 3]));
   
   const double RegWeight = static_cast<double>( mxGetPr(prhs[2*Dimension+4])[0] );
   
   const unsigned int UseJacFlag = 1;
   
   //std::cout << "RegWeight : " << RegWeight << std::endl;
   
   region.SetSize( size );
   region.SetIndex( start );

   fixedimage->SetOrigin( origin );
   fixedimage->SetSpacing( spacing );
   fixedimage->SetRegions( region );
   fixedimage->Allocate();

   movingimage->SetOrigin( origin );
   movingimage->SetSpacing( spacing );
   movingimage->SetRegions( region );
   movingimage->Allocate();
   
   fw_weightimage->SetOrigin( origin );
   fw_weightimage->SetSpacing( spacing );
   fw_weightimage->SetRegions( region );
   fw_weightimage->Allocate();
   
   
   field->SetOrigin( origin );
   field->SetSpacing( spacing );
   field->SetRegions( region );
   field->Allocate();
   
   inv_field->SetOrigin( origin );
   inv_field->SetSpacing( spacing );
   inv_field->SetRegions( region );
   inv_field->Allocate();

   update->SetOrigin( origin );
   update->SetSpacing( spacing );
   update->SetRegions( region );
   update->Allocate();
   if (UseJacFlag > 0)
   {
        jacobianimage->SetOrigin( origin );
        jacobianimage->SetSpacing( spacing );
        jacobianimage->SetRegions( region );
        jacobianimage->Allocate();
   }
   

   //mexPrintf("done Allocate(); %f sec\n", timer.elapsed());
   //timer.restart();

   
   PixelType * fixptr = fixedimage->GetBufferPointer();
   const PixelType * const fixbuff_end = fixptr + numPix;
   PixelType * movptr = movingimage->GetBufferPointer();
   PixelType * fwweightptr = NULL;
   PixelType * jacptr = NULL;
   if (UseJacFlag > 0)
   {
       jacptr = jacobianimage->GetBufferPointer();
   }
   
   
   
   fwweightptr = fw_weightimage->GetBufferPointer();
   
   VectorPixelType * fieldptr = field->GetBufferPointer();
   VectorPixelType * inv_fieldptr = inv_field->GetBufferPointer();
   
   while ( fixptr != fixbuff_end )
   {
      *fixptr++ = *fixinptr++;
      *movptr++ = *movinptr++;
      *fwweightptr++ = *fw_weightptr++; 
      
      for (unsigned int d=0; d<Dimension; d++)
      {
         (*fieldptr)[d] = *(fieldinptrs[d])++;
      }
      if (UseJacFlag > 0)
      {
        *jacptr++ = *jacobianptr++; 
      }
      
      ++fieldptr;
      
      for (unsigned int d=0; d<Dimension; d++)
      {
         (*inv_fieldptr)[d] = *(inv_fieldinptrs[d])++;
      }
      ++inv_fieldptr;
   }

   // Create demons function
   typename DemonsRegistrationFunctionType::Pointer drfp
      = DemonsRegistrationFunctionType::New();

   //mexPrintf("step size: %f\n",mxGetPr(prhs[2*Dimension+2])[0]);
   //drfp->SetMaximumUpdateStepLength( mxGetPr(prhs[2*Dimension+2])[0] );

   typename DemonsRegistrationFunctionType::GradientType gtype = DemonsRegistrationFunctionType::Symmetric;
   drfp->SetUseGradientType( gtype );

   drfp->SetDeformationField( field );
   drfp->SetInvDeformationField( inv_field);
   drfp->SetFixedImage( fixedimage );
   drfp->SetMovingImage( movingimage );
   
   drfp->SetRegWeight(RegWeight);
   
   drfp->SetUseFwWeight(true);
   drfp->SetFwWeightImage(fw_weightimage);
   
   if (UseJacFlag > 0)
   {
       drfp->SetUseJacobian(true);
       drfp->SetJacobianDetImage(jacobianimage);
   }
   else
   {
       drfp->SetUseJacobian(false);
   }
   
   drfp->InitializeIteration();

   //mexPrintf("done demons function init %f sec\n", timer.elapsed());
   //timer.restart();

   const itk::Size<Dimension> radius = drfp->GetRadius();

   // Break the input into a series of regions.  The first region is free
   // of boundary conditions, the rest with boundary conditions.  We operate
   // on the output region because input has been copied to output.
   typedef itk::NeighborhoodAlgorithm::ImageBoundaryFacesCalculator
      <DeformationFieldType> FaceCalculatorType;
   typedef typename FaceCalculatorType::FaceListType FaceListType;
   typedef typename DemonsRegistrationFunctionType::NeighborhoodType
      NeighborhoodIteratorType;
   typedef itk::ImageRegionIterator<DeformationFieldType> UpdateIteratorType;

   FaceCalculatorType faceCalculator;
   
   FaceListType faceList = faceCalculator(field, region, radius);
   typename FaceListType::iterator fIt = faceList.begin();
   
   // Ask the function object for a pointer to a data structure it
   // will use to manage any global values it needs.  We'll pass this
   // back to the function object at each calculation and then
   // again so that the function object can use it to determine a
   // time step for this iteration.
   void * globalData = drfp->GetGlobalDataPointer();
   
   // Process the non-boundary region.
   NeighborhoodIteratorType nD(radius, field, *fIt);
   UpdateIteratorType       nU(update,  *fIt);
   nD.GoToBegin();
   while( !nD.IsAtEnd() )
   {
      nU.Value() = drfp->ComputeUpdate(nD, globalData);
      ++nD;
      ++nU;
   }

   // Process each of the boundary faces.
   
   NeighborhoodIteratorType bD;
   UpdateIteratorType   bU;
   for (++fIt; fIt != faceList.end(); ++fIt)
   {
      bD = NeighborhoodIteratorType(radius, field, *fIt);
      bU = UpdateIteratorType(update, *fIt);
      
      bD.GoToBegin();
      bU.GoToBegin();
      while ( !bD.IsAtEnd() )
      {
         bU.Value() = drfp->ComputeUpdate(bD, globalData);
         ++bD;
         ++bU;
      }
   }

   // Ask the finite difference function to compute the time step for
   // this iteration.  We give it the global data pointer to use, then
   // ask it to free the global data memory.
   //timeStep = df->ComputeGlobalTimeStep(globalData);
   drfp->ReleaseGlobalDataPointer(globalData);


   //mexPrintf("done actual computations %f sec\n", timer.elapsed());
   //timer.restart();


   // Allocate outputs
   const mxClassID classID = mxGetClassID(prhs[0]);
   MatlabPixelType * outptrs[Dimension];
   for (unsigned int d=0; d<Dimension; d++)
   {
      plhs[d] = mxCreateNumericArray(
         Dimension, matlabdims, classID, mxREAL);

      outptrs[d] = static_cast<MatlabPixelType *>(mxGetData(plhs[d]));
   }

   
   //mexPrintf("done allocate outputs %f sec\n", timer.elapsed());
   //timer.restart();
   

   // put result into outputs
   const VectorPixelType * upptr = update->GetBufferPointer();
   const VectorPixelType * const upbuff_end = upptr + numPix;
   
   while ( upptr != upbuff_end )
   {
      for (unsigned int d=0; d<Dimension; d++)
      {
         *(outptrs[d])++ = (*upptr)[d];
      }
      ++upptr;
   }

   //mexPrintf("done outputs copy %f sec\n", timer.elapsed());
}
void readwarpfile(int nlhs,
               mxArray *plhs[],
               int nrhs,
               const mxArray *prhs[])
{
   typedef int PixelType;
   typedef itk::Image< PixelType, Dimension >           ImageType;
 //  mexPrintf("the dimentsion is ", Dimension);   
 //  typedef                                           PixelType;
   typedef itk::Vector<PixelType, Dimension>  VectorPixelType;
   typedef itk::Image<VectorPixelType, Dimension>       DeformationFieldType;

   typedef itk::WarpImageFilter
      <ImageType, ImageType, DeformationFieldType>      WarperType;
   mexPrintf("input number is: %d", nrhs);
   mexPrintf("output parameternumber is: %d", nlhs);
 // mexPrintf("the dimentsion is :%d", Dimension); 
   //boost::timer timer;
   typename DeformationFieldType::SizeType       size;
   typename DeformationFieldType::IndexType      start;
   unsigned int numPix(1u);
   char * inputDfFilename; 
   mwSize matlabdims[Dimension];

// here, we should read image parameter
   for (unsigned int d=0; d<Dimension; d++)
   {
      matlabdims[d]= mxGetDimensions(prhs[1])[d];
      size[d] = matlabdims[d];
      start[d] = 0;
      numPix *= size[d];

//    inptrs[d] = static_cast<const MatlabPixelType *>(mxGetData(prhs[d]));
   }
   inputDfFilename = mxArrayToString(prhs[0]);
   mexPrintf("output file name is %s", inputDfFilename);
   typename itk::ImageFileReader<DeformationFieldType>::Pointer df_reader = itk::ImageFileReader<DeformationFieldType>::New();
   df_reader->SetFileName(inputDfFilename);
   df_reader->Update();

   // Allocate images and deformation field
   typename ImageType::Pointer image
      = ImageType::New();
   typename DeformationFieldType::Pointer field = DeformationFieldType::New();
//  typename DeformationFieldType::Pointer field = 0; 
   typename DeformationFieldType::SpacingType spacing;
   spacing.Fill( 1.0 );
   
   typename DeformationFieldType::PointType origin;
   origin.Fill( 0.0 );
   
   typename DeformationFieldType::RegionType     region;
   field = df_reader->GetOutput();


   region = field->GetLargestPossibleRegion();

// initialize output point
   const mxClassID classID = mxGetClassID(prhs[1]);
   MatlabPixelType * fieldoutptrs[Dimension];
   for (unsigned int d=0; d<Dimension; d++)
   {
      plhs[d] = mxCreateNumericArray(
         Dimension, matlabdims, classID, mxREAL);

      fieldoutptrs[d] = static_cast<MatlabPixelType *>(mxGetData(plhs[d]));
   }


// read field
   VectorPixelType * fieldptr = field->GetBufferPointer();
   const VectorPixelType * const buff_end = fieldptr + numPix;
 

     while ( fieldptr != buff_end )
   {
      for (unsigned int d=0; d<Dimension; d++)
      {
         *(fieldoutptrs[d])++ = (*fieldptr)[d];
      }
      ++fieldptr;
   }

}