int main(int argc, char *argv[])
{
    LibUtilities::SessionReaderSharedPtr vSession = LibUtilities::SessionReader::CreateInstance(argc, argv);

    LibUtilities::CommSharedPtr vComm = vSession->GetComm();
    
    MultiRegions::ContField3DHomogeneous1DSharedPtr Exp_u, Exp_v, Exp_w;
    
    StdRegions::ConstFactorMap factors;
    FlagList flags;

    if( (argc != 2) && (argc != 3))
    {
        fprintf(stderr,"Usage: Deriv3DHomo2D meshfile [SysSolnType]   \n");
        exit(1);
    }

    //----------------------------------------------
    // Read in mesh from input file
    SpatialDomains::MeshGraphSharedPtr graph2D = MemoryManager<SpatialDomains::MeshGraph2D>::AllocateSharedPtr(vSession);
    //----------------------------------------------

    //----------------------------------------------
    // Define Expansion
    int nzpoints;
    NekDouble lz;
    int FFT;

    vSession->LoadParameter("HomModesZ", nzpoints);
	vSession->LoadParameter("LZ",        lz);
	vSession->LoadParameter("USEFFT",    FFT);
	
	bool useFFT = false;
	bool deal = false;
	if(FFT==1){useFFT = true;}
			
	
	const LibUtilities::PointsKey PkeyZ(nzpoints,LibUtilities::eFourierSingleModeSpaced);
	const LibUtilities::BasisKey  BkeyZ(LibUtilities::eFourierHalfModeRe,nzpoints,PkeyZ);
		
	Exp_u = MemoryManager<MultiRegions::ContField3DHomogeneous1D>::AllocateSharedPtr(vSession,BkeyZ,lz,useFFT,deal,graph2D,vSession->GetVariable(0));
	Exp_v = MemoryManager<MultiRegions::ContField3DHomogeneous1D>::AllocateSharedPtr(vSession,BkeyZ,lz,useFFT,deal,graph2D,vSession->GetVariable(1));
	Exp_w = MemoryManager<MultiRegions::ContField3DHomogeneous1D>::AllocateSharedPtr(vSession,BkeyZ,lz,useFFT,deal,graph2D,vSession->GetVariable(2));
		

    //----------------------------------------------
    // Print summary of solution details
        flags.set(eUseGlobal, false);
		
    const SpatialDomains::ExpansionMap &expansions = graph2D->GetExpansions();
	
    LibUtilities::BasisKey bkey0 = expansions.begin()->second->m_basisKeyVector[0];
    
	cout << "Calculating Derivatives (Homogeneous in z-plane):"  << endl;
	cout << "         Lz              : " << lz << endl;
    cout << "         N.modes         : " << bkey0.GetNumModes() << endl;
	cout << "         N.Z h**o modes  : " << BkeyZ.GetNumModes() << endl;
    cout << endl;
    //----------------------------------------------

    //----------------------------------------------
    // Set up coordinates of mesh for Forcing function evaluation
	int nq  = Exp_u->GetTotPoints();
	
	Array<OneD,NekDouble>  xc0,xc1,xc2;
    
    xc0 = Array<OneD,NekDouble>(nq,0.0);
    xc1 = Array<OneD,NekDouble>(nq,0.0);
    xc2 = Array<OneD,NekDouble>(nq,0.0);

    Exp_u->GetCoords(xc0,xc1,xc2);
    //----------------------------------------------
    Array<OneD,NekDouble>  dudx,dvdy,dwdz;
	Array<OneD,NekDouble>  dump;
	dump = Array<OneD,NekDouble>(nq,0.0);
	dudx = Array<OneD,NekDouble>(nq,0.0);
	dvdy = Array<OneD,NekDouble>(nq,0.0);
	dwdz = Array<OneD,NekDouble>(nq,0.0);
    //----------------------------------------------
    // Define initial fields
    LibUtilities::EquationSharedPtr ffunc_u = vSession->GetFunction("InitialCondition", 0);
	LibUtilities::EquationSharedPtr ffunc_v = vSession->GetFunction("InitialCondition", 1);
	LibUtilities::EquationSharedPtr ffunc_w = vSession->GetFunction("InitialCondition", 2);
	
	LibUtilities::EquationSharedPtr exac_u = vSession->GetFunction("ExactSolution", 0);
	LibUtilities::EquationSharedPtr exac_v = vSession->GetFunction("ExactSolution", 1);
	LibUtilities::EquationSharedPtr exac_w = vSession->GetFunction("ExactSolution", 2);
    	

    ffunc_u->Evaluate(xc0,xc1,xc2,Exp_u->UpdatePhys());
    ffunc_v->Evaluate(xc0,xc1,xc2,Exp_v->UpdatePhys());
    ffunc_w->Evaluate(xc0,xc1,xc2,Exp_w->UpdatePhys());

    exac_u->Evaluate(xc0,xc1,xc2,dudx);
    exac_v->Evaluate(xc0,xc1,xc2,dvdy);
    exac_w->Evaluate(xc0,xc1,xc2,dwdz);

    //----------------------------------------------
	
    //Taking derivative and printing the error
    cout << "Deriv u" << endl;
    Exp_u->PhysDeriv(Exp_u->GetPhys(),Exp_u->UpdatePhys(),dump,dump);
    cout << "Deriv u done" << endl;
    
    
    cout << "L infinity error:  " << Exp_u->Linf(Exp_u->GetPhys(), dudx) << endl;
    cout << "L 2 error  :       " << Exp_u->L2  (Exp_u->GetPhys(), dudx) << endl;	
    
    Exp_v->PhysDeriv(Exp_v->GetPhys(),dump,Exp_v->UpdatePhys(),dump);
    
    cout << "L infinity error:  " << Exp_v->Linf(Exp_v->GetPhys(), dvdy) << endl;
    cout << "L 2 error  :       " << Exp_v->L2  (Exp_v->GetPhys(), dvdy) << endl;
    
    Exp_w->PhysDeriv(Exp_w->GetPhys(),dump,dump,Exp_w->UpdatePhys());
    
    cout << "L infinity error:  " << Exp_w->Linf(Exp_w->GetPhys(), dwdz) << endl;
    cout << "L 2 error  :       " << Exp_w->L2  (Exp_w->GetPhys(), dwdz) << endl;
    
    return 0;
}
int main(int argc, char *argv[])
{
    MultiRegions::ContField3DSharedPtr Exp,Fce,Sol;
    int     i, nq,  coordim;
    Array<OneD,NekDouble>  fce,sol;
    Array<OneD,NekDouble>  xc0,xc1,xc2;
    NekDouble  lambda;
    vector<string> vFilenames;

    if(argc != 6)
    {
        fprintf(stderr,"Usage: TimingCGHelmSolve3D Type MeshSize NumModes OptimisationLevel OperatorToTest\n");
        fprintf(stderr,"    where: - Type is one of the following:\n");
        fprintf(stderr,"                  1: Regular  Hexahedrons \n");
        fprintf(stderr,"                  2: Deformed Hexahedrons (may not be supported) \n");
        fprintf(stderr,"                  3: Regular  Tetrahedrons \n");
        fprintf(stderr,"    where: - MeshSize is 1/h \n");
        fprintf(stderr,"    where: - NumModes is the number of 1D modes of the expansion \n");
        fprintf(stderr,"    where: - OptimisationLevel is one of the following:\n");
        fprintf(stderr,"                  0: Use elemental sum-factorisation evaluation \n");
        fprintf(stderr,"                  2: Use elemental matrix evaluation using blockmatrices \n");
        fprintf(stderr,"                  3: Use global matrix evaluation \n");
        fprintf(stderr,"                  4: Use optimal evaluation (this option requires optimisation-files being set-up) \n");
        fprintf(stderr,"    where: - OperatorToTest is one of the following:\n");
        fprintf(stderr,"                  0: BwdTrans \n");
        fprintf(stderr,"                  1: Inner Product \n");
        fprintf(stderr,"                  2: Mass Matrix \n");
        exit(1);
    }

    boost::filesystem::path basePath(BASE_PATH);

     int Type        = atoi(argv[1]);
     int MeshSize    = atoi(argv[2]);
     int NumModes    = atoi(argv[3]);
     int optLevel    = atoi(argv[4]);
     int opToTest    = atoi(argv[5]);

     //----------------------------------------------
     // Retrieve the necessary input files
     stringstream MeshFileName;
     stringstream MeshFileDirectory;
     stringstream BCfileName;
     stringstream ExpansionsFileName;
     stringstream GlobOptFileName;

     switch(Type)
    {
    case 1:
        {
            MeshFileDirectory << "RegularHexMeshes";
            MeshFileName << "UnitCube_RegularHexMesh_h_1_" << MeshSize << ".xml";
        }
        break;
    case 2:
        {
            MeshFileDirectory << "DeformedHexMeshes";
            MeshFileName << "UnitCube_DeformedHexMesh_h_1_" << MeshSize << ".xml";
        }
        break;
    case 3:
        {
            MeshFileDirectory << "RegularTetMeshes";
            MeshFileName << "UnitCube_RegularTetMesh_h_1_" << MeshSize << ".xml";
        }
        break;
    default:
        {
            cerr << "Type should be equal to one of the following values: "<< endl;
            cerr << "  1: Regular Hexahedrons" << endl;
            cerr << "  2: Deformed Hexahedrons" << endl;
            cerr << "  3: Regular Tetrahedrons" << endl;
            exit(1);
        }
    }

    BCfileName << "UnitCube_DirichletBoundaryConditions.xml";
    ExpansionsFileName << "NektarExpansionsNummodes" << NumModes << ".xml";

    switch(optLevel)
    {
    case 0:
        {
            GlobOptFileName << "NoGlobalMat.xml";
        }
        break;
    case 2:
        {
            GlobOptFileName << "DoBlockMat.xml";
        }
        break;
    case 3:
        {
            GlobOptFileName << "DoGlobalMat.xml";
        }
        break;
    case 4:
        {
            ASSERTL0(false,"Optimisation level not set up");
        }
        break;
    default:
        {
            ASSERTL0(false,"Unrecognised optimisation level");
        }
    }


    boost::filesystem::path MeshFilePath = basePath /
        boost::filesystem::path("InputFiles") /
        boost::filesystem::path("Geometry") /
        boost::filesystem::path(MeshFileDirectory.str()) /
        boost::filesystem::path(MeshFileName.str());
    vFilenames.push_back(PortablePath(MeshFilePath));

    boost::filesystem::path BCfilePath = basePath /
        boost::filesystem::path("InputFiles") /
        boost::filesystem::path("Conditions") /
        boost::filesystem::path(BCfileName.str());
    vFilenames.push_back(PortablePath(BCfilePath));

    boost::filesystem::path ExpansionsFilePath = basePath /
        boost::filesystem::path("InputFiles") /
        boost::filesystem::path("Expansions") /
        boost::filesystem::path(ExpansionsFileName.str());
    vFilenames.push_back(PortablePath(ExpansionsFilePath));

    boost::filesystem::path GlobOptFilePath = basePath /
        boost::filesystem::path("InputFiles") /
        boost::filesystem::path("Optimisation") /
        boost::filesystem::path(GlobOptFileName.str());
    vFilenames.push_back(PortablePath(GlobOptFilePath));

    //----------------------------------------------

    StdRegions::MatrixType type;

    switch (opToTest)
    {
        case 0:
            type = StdRegions::eBwdTrans;
            break;
        case 1:
            type = StdRegions::eIProductWRTBase;
            break;
        case 2:
            type = StdRegions::eMass;
            break;
        case 3:
            type = StdRegions::eHelmholtz;
            break;
        default:
            cout << "Operator " << opToTest << " not defined." << endl;
    }

    LibUtilities::SessionReaderSharedPtr vSession
            = LibUtilities::SessionReader::CreateInstance(argc, argv, vFilenames);

    //----------------------------------------------
    // Read in mesh from input file
    SpatialDomains::MeshGraphSharedPtr graph3D = MemoryManager<SpatialDomains::MeshGraph3D>::AllocateSharedPtr(vSession);
    //----------------------------------------------

    //----------------------------------------------
    // Print summary of solution details
    lambda = vSession->GetParameter("Lambda");
    //----------------------------------------------

    //----------------------------------------------
    // Define Expansion
    Exp = MemoryManager<MultiRegions::ContField3D>
                    ::AllocateSharedPtr(vSession, graph3D, vSession->GetVariable(0));
    //----------------------------------------------
    int NumElements = Exp->GetExpSize();

    //----------------------------------------------
    // Set up coordinates of mesh for Forcing function evaluation
    coordim = Exp->GetCoordim(0);
    nq      = Exp->GetTotPoints();

    xc0 = Array<OneD,NekDouble>(nq,0.0);
    xc1 = Array<OneD,NekDouble>(nq,0.0);
    xc2 = Array<OneD,NekDouble>(nq,0.0);

    switch(coordim)
    {
    case 1:
        Exp->GetCoords(xc0);
        break;
    case 2:
        Exp->GetCoords(xc0,xc1);
        break;
    case 3:
        Exp->GetCoords(xc0,xc1,xc2);
        break;
    }
    //----------------------------------------------

    //----------------------------------------------
    // Define forcing function for first variable defined in file
    fce = Array<OneD,NekDouble>(nq);
    LibUtilities::EquationSharedPtr ffunc = vSession->GetFunction("Forcing",0);
    for(i = 0; i < nq; ++i)
    {
        fce[i] = ffunc->Evaluate(xc0[i],xc1[i],xc2[i]);
    }
    //----------------------------------------------

    //----------------------------------------------
    // Setup expansion containing the  forcing function
    Fce = MemoryManager<MultiRegions::ContField3D>::AllocateSharedPtr(*Exp);
    Fce->SetPhys(fce);
    //----------------------------------------------

    //----------------------------------------------
    // See if there is an exact solution, if so
    // evaluate and plot errors
    LibUtilities::EquationSharedPtr ex_sol = vSession->GetFunction("ExactSolution",0);
    //----------------------------------------------
    // evaluate exact solution
    sol = Array<OneD,NekDouble>(nq);
    for(i = 0; i < nq; ++i)
    {
        sol[i] = ex_sol->Evaluate(xc0[i],xc1[i],xc2[i]);
    }
    Sol = MemoryManager<MultiRegions::ContField3D>::AllocateSharedPtr(*Exp);
    Sol->SetPhys(sol);
    Sol->SetPhysState(true);
    //----------------------------------------------

    NekDouble L2Error;
    NekDouble LinfError;
    if (type == StdRegions::eHelmholtz)
    {
        FlagList flags;
        flags.set(eUseGlobal, true);
        StdRegions::ConstFactorMap factors;
        factors[StdRegions::eFactorLambda] = lambda;

        //----------------------------------------------
        // Helmholtz solution taking physical forcing
        Exp->HelmSolve(Fce->GetPhys(), Exp->UpdateCoeffs(),flags,factors);
        // GeneralMatrixOp does not impose boundary conditions.
        //  MultiRegions::GlobalMatrixKey key(type, lambda, Exp->GetLocalToGlobalMap());
        //  Exp->GeneralMatrixOp (key, Fce->GetPhys(),Exp->UpdateContCoeffs(), true);
        //----------------------------------------------

        //----------------------------------------------
        // Backward Transform Solution to get solved values at
        Exp->BwdTrans(Exp->GetCoeffs(), Exp->UpdatePhys(), 
                      MultiRegions::eGlobal);
        //----------------------------------------------
        L2Error    = Exp->L2  (Sol->GetPhys());
        LinfError  = Exp->Linf(Sol->GetPhys());
    }
    else
    {
        Exp->FwdTrans(Sol->GetPhys(), Exp->UpdateCoeffs(),
                      MultiRegions::eGlobal);

        //----------------------------------------------
        // Backward Transform Solution to get solved values at
        Exp->BwdTrans(Exp->GetCoeffs(), Exp->UpdatePhys(),
                      MultiRegions::eGlobal);
        //----------------------------------------------
        L2Error    = Exp->L2  (Sol->GetPhys());
        LinfError  = Exp->Linf(Sol->GetPhys());
    }

    //--------------------------------------------
    // alternative error calculation
/*    const LibUtilities::PointsKey PkeyT1(30,LibUtilities::eGaussLobattoLegendre);
    const LibUtilities::PointsKey PkeyT2(30,LibUtilities::eGaussRadauMAlpha1Beta0);
    const LibUtilities::PointsKey PkeyQ1(30,LibUtilities::eGaussLobattoLegendre);
    const LibUtilities::PointsKey PkeyQ2(30,LibUtilities::eGaussLobattoLegendre);
    const LibUtilities::BasisKey  BkeyT1(LibUtilities::eModified_A,NumModes,PkeyT1);
    const LibUtilities::BasisKey  BkeyT2(LibUtilities::eModified_B,NumModes,PkeyT2);
    const LibUtilities::BasisKey  BkeyQ1(LibUtilities::eModified_A,NumModes,PkeyQ1);
    const LibUtilities::BasisKey  BkeyQ2(LibUtilities::eModified_A,NumModes,PkeyQ2);


    MultiRegions::ExpList3DSharedPtr ErrorExp =
        MemoryManager<MultiRegions::ExpList3D>::AllocateSharedPtr(BkeyT1,BkeyT2,BkeyT3,BkeyQ1,BkeyQ2,BkeyQ3,graph3D);

    int ErrorCoordim = ErrorExp->GetCoordim(0);
    int ErrorNq      = ErrorExp->GetTotPoints();

    Array<OneD,NekDouble> ErrorXc0(ErrorNq,0.0);
    Array<OneD,NekDouble> ErrorXc1(ErrorNq,0.0);
    Array<OneD,NekDouble> ErrorXc2(ErrorNq,0.0);

    switch(ErrorCoordim)
    {
    case 1:
        ErrorExp->GetCoords(ErrorXc0);
        break;
    case 2:
        ErrorExp->GetCoords(ErrorXc0,ErrorXc1);
        break;
    case 3:
        ErrorExp->GetCoords(ErrorXc0,ErrorXc1,ErrorXc2);
        break;
    }


    // evaluate exact solution
    Array<OneD,NekDouble> ErrorSol(ErrorNq);
    for(i = 0; i < ErrorNq; ++i)
    {
        ErrorSol[i] = ex_sol->Evaluate(ErrorXc0[i],ErrorXc1[i],ErrorXc2[i]);
    }

    // calcualte spectral/hp approximation on the quad points of this new
    // expansion basis
    Exp->GlobalToLocal(Exp->GetContCoeffs(),ErrorExp->UpdateCoeffs());
    ErrorExp->BwdTrans_IterPerExp(ErrorExp->GetCoeffs(),ErrorExp->UpdatePhys());

    NekDouble L2ErrorBis    = ErrorExp->L2  (ErrorSol);
    NekDouble LinfErrorBis  = ErrorExp->Linf(ErrorSol);
*/
    //--------------------------------------------
#if 0
    cout << "L infinity error: " << LinfErrorBis << endl;
    cout << "L 2 error:        " << L2ErrorBis   << endl;
#endif
    //----------------------------------------------
    NekDouble exeTime;
    int NumCalls;

    exeTime = TimeMatrixOp(type, Exp, Fce, NumCalls, lambda);

    int nLocCoeffs     = Exp->GetLocalToGlobalMap()->GetNumLocalCoeffs();
    int nGlobCoeffs    = Exp->GetLocalToGlobalMap()->GetNumGlobalCoeffs();
    int nLocBndCoeffs  = Exp->GetLocalToGlobalMap()->GetNumLocalBndCoeffs();
    int nGlobBndCoeffs = Exp->GetLocalToGlobalMap()->GetNumGlobalBndCoeffs();
    int nLocDirCoeffs  = Exp->GetLocalToGlobalMap()->GetNumLocalDirBndCoeffs();
    int nGlobDirCoeffs = Exp->GetLocalToGlobalMap()->GetNumGlobalDirBndCoeffs();
//    MultiRegions::GlobalMatrixKey key(StdRegions::eHelmholtz,lambda,Exp->GetLocalToGlobalMap());
//    int nnz            = Exp->GetGlobalMatrixNnz(key);

    ostream &outfile = cout;
    outfile.precision(0);
    outfile << setw(10) << Type << " ";
    outfile << setw(10) << NumElements << " ";
    outfile << setw(10) << NumModes << " ";
    outfile << setw(10) << NumCalls << " ";
    outfile << setw(10) << fixed << noshowpoint << exeTime << " ";
    outfile << setw(10) << fixed << noshowpoint << ((NekDouble) (exeTime/((NekDouble)NumCalls))) << " ";
    outfile.precision(7);
    outfile << setw(15) << scientific << noshowpoint << L2Error << " ";
    outfile << setw(15) << scientific << noshowpoint << "- "; // << L2ErrorBis << " ";
    outfile << setw(15) << scientific << noshowpoint << LinfError << " ";
    outfile << setw(15) << scientific << noshowpoint << "- ";// << LinfErrorBis << " ";
    outfile << setw(10) << nLocCoeffs  << " ";
    outfile << setw(10) << nGlobCoeffs << " ";
    outfile << setw(10) << nLocBndCoeffs  << " ";
    outfile << setw(10) << nGlobBndCoeffs << " ";
    outfile << setw(10) << nLocDirCoeffs  << " ";
    outfile << setw(10) << nGlobDirCoeffs << " ";
    outfile << setw(10) << "- "; // << nnz << " ";
    outfile << setw(10) << optLevel << " ";
    outfile << endl;
    //----------------------------------------------

    return 0;
}