void runProgram()
{
    /*
     * Read the obstacle geometry.
     */

    pcout << std::endl << "Reading STL data for the obstacle geometry." << std::endl;
    Array<T,3> center(param.cx, param.cy, param.cz);
    Array<T,3> centerLB(param.cxLB, param.cyLB, param.czLB);
    // The triangle-set defines the surface of the geometry.
    TriangleSet<T> triangleSet(param.geometry_fname, DBL);

    // Place the obstacle in the correct place in the simulation domain.
    // Here the "geometric center" of the obstacle is computed manually,
    // by computing first its bounding cuboid. In cases that the STL
    // file with the geometry of the obstacle contains its center as
    // the point, say (0, 0, 0), then the following variable
    // "obstacleCenter" must be set to (0, 0, 0) manually.
    Cuboid<T> bCuboid = triangleSet.getBoundingCuboid();
    Array<T,3> obstacleCenter = 0.5 * (bCuboid.lowerLeftCorner + bCuboid.upperRightCorner);
    triangleSet.translate(-obstacleCenter);
    triangleSet.scale(1.0/param.dx); // In lattice units from now on...
    triangleSet.translate(centerLB);
    triangleSet.writeBinarySTL(outputDir+"obstacle_LB.stl");

    // The DEFscaledMesh, and the triangle-boundary are more sophisticated data
    // structures used internally by Palabos to treat the boundary.
    plint xDirection = 0;
    plint borderWidth = 1;      // Because Guo acts in a one-cell layer.
                                // Requirement: margin>=borderWidth.
    plint margin = 1;           // Extra margin of allocated cells around the obstacle, for the case of moving walls.
    plint blockSize = 0;        // Size of blocks in the sparse/parallel representation.
                                // Zero means: don't use sparse representation.
    DEFscaledMesh<T> defMesh(triangleSet, 0, xDirection, margin, Dot3D(0, 0, 0));
    TriangleBoundary3D<T> boundary(defMesh);
    //boundary.getMesh().inflate();

    pcout << "tau = " << 1.0/param.omega << std::endl;
    pcout << "dx = " << param.dx << std::endl;
    pcout << "dt = " << param.dt << std::endl;
    pcout << "Number of iterations in an integral time scale: " << (plint) (1.0/param.dt) << std::endl;

    /*
     * Voxelize the domain.
     */

    // Voxelize the domain means: decide which lattice nodes are inside the obstacle and which are outside.
    pcout << std::endl << "Voxelizing the domain." << std::endl;
    plint extendedEnvelopeWidth = 2;   // Extrapolated off-lattice BCs.
    const int flowType = voxelFlag::outside;
    VoxelizedDomain3D<T> voxelizedDomain (
            boundary, flowType, param.boundingBox(), borderWidth, extendedEnvelopeWidth, blockSize );
    pcout << getMultiBlockInfo(voxelizedDomain.getVoxelMatrix()) << std::endl;

    /*
     * Generate the lattice, the density and momentum blocks.
     */

    pcout << "Generating the lattice, the rhoBar and j fields." << std::endl;
    MultiBlockLattice3D<T,DESCRIPTOR> *lattice = new MultiBlockLattice3D<T,DESCRIPTOR>(voxelizedDomain.getVoxelMatrix());
    if (param.useSmago) {
        defineDynamics(*lattice, lattice->getBoundingBox(),
                new SmagorinskyBGKdynamics<T,DESCRIPTOR>(param.omega, param.cSmago));
        pcout << "Using Smagorinsky BGK dynamics." << std::endl;
    } else {
        defineDynamics(*lattice, lattice->getBoundingBox(),
                new BGKdynamics<T,DESCRIPTOR>(param.omega));
        pcout << "Using BGK dynamics." << std::endl;
    }
    bool velIsJ = false;
    defineDynamics(*lattice, voxelizedDomain.getVoxelMatrix(), lattice->getBoundingBox(),
            new NoDynamics<T,DESCRIPTOR>(), voxelFlag::inside);
    lattice->toggleInternalStatistics(false);

    MultiBlockManagement3D sparseBlockManagement(lattice->getMultiBlockManagement());

    // The rhoBar and j fields are used at both the collision and at the implementation of the
    // outflow boundary condition.
    plint envelopeWidth = 1;
    MultiScalarField3D<T> *rhoBar = new MultiScalarField3D<T> (
            MultiBlockManagement3D (
                sparseBlockManagement.getSparseBlockStructure(),
                sparseBlockManagement.getThreadAttribution().clone(),
                envelopeWidth ),
            defaultMultiBlockPolicy3D().getBlockCommunicator(),
            defaultMultiBlockPolicy3D().getCombinedStatistics(),
            defaultMultiBlockPolicy3D().getMultiScalarAccess<T>() );
    rhoBar->toggleInternalStatistics(false);

    MultiTensorField3D<T,3> *j = new MultiTensorField3D<T,3> (
            MultiBlockManagement3D (
                sparseBlockManagement.getSparseBlockStructure(),
                sparseBlockManagement.getThreadAttribution().clone(),
                envelopeWidth ),
            defaultMultiBlockPolicy3D().getBlockCommunicator(),
            defaultMultiBlockPolicy3D().getCombinedStatistics(),
            defaultMultiBlockPolicy3D().getMultiTensorAccess<T,3>() );
    j->toggleInternalStatistics(false);

    std::vector<MultiBlock3D*> lattice_rho_bar_j_arg;
    lattice_rho_bar_j_arg.push_back(lattice);
    lattice_rho_bar_j_arg.push_back(rhoBar);
    lattice_rho_bar_j_arg.push_back(j);
    integrateProcessingFunctional(
            new ExternalRhoJcollideAndStream3D<T,DESCRIPTOR>(),
            lattice->getBoundingBox(), lattice_rho_bar_j_arg, 0);
    integrateProcessingFunctional(
            new BoxRhoBarJfunctional3D<T,DESCRIPTOR>(),
            lattice->getBoundingBox(), lattice_rho_bar_j_arg, 3); // rhoBar and j are computed at level 3 because
                                                                  // the boundary conditions are on levels 1 and 2.

    /*
     * Generate the off-lattice boundary condition on the obstacle and the outer-domain boundary conditions.
     */

    pcout << "Generating boundary conditions." << std::endl;

    OffLatticeBoundaryCondition3D<T,DESCRIPTOR,Velocity> *boundaryCondition;
    
    BoundaryProfiles3D<T,Velocity> profiles;
    bool useAllDirections=true;
    OffLatticeModel3D<T,Velocity>* offLatticeModel=0;
    if (param.freeSlipWall) {
        profiles.setWallProfile(new FreeSlipProfile3D<T>);
    }
    else {
        profiles.setWallProfile(new NoSlipProfile3D<T>);
    }
    offLatticeModel =
         new GuoOffLatticeModel3D<T,DESCRIPTOR> (
            new TriangleFlowShape3D<T,Array<T,3> >(voxelizedDomain.getBoundary(), profiles),
            flowType, useAllDirections );
    offLatticeModel->setVelIsJ(velIsJ);
    boundaryCondition = new OffLatticeBoundaryCondition3D<T,DESCRIPTOR,Velocity>(
            offLatticeModel, voxelizedDomain, *lattice);
    
    boundaryCondition->insert();

    // The boundary condition algorithm or the outer domain.
    OnLatticeBoundaryCondition3D<T,DESCRIPTOR>* outerBoundaryCondition
        = createLocalBoundaryCondition3D<T,DESCRIPTOR>();
    outerDomainBoundaries(lattice, rhoBar, j, outerBoundaryCondition);

    /*
     * Implement the outlet sponge zone.
     */

    if (param.numOutletSpongeCells > 0) {
        T bulkValue;
        Array<plint,6> numSpongeCells;

        if (param.outletSpongeZoneType == 0) {
            pcout << "Generating an outlet viscosity sponge zone." << std::endl;
            bulkValue = param.omega;
        } else if (param.outletSpongeZoneType == 1) {
            pcout << "Generating an outlet Smagorinsky sponge zone." << std::endl;
            bulkValue = param.cSmago;
        } else {
            pcout << "Error: unknown type of sponge zone." << std::endl;
            exit(-1);
        }

        // Number of sponge zone lattice nodes at all the outer domain boundaries.
        // So: 0 means the boundary at x = 0
        //     1 means the boundary at x = nx-1
        //     2 means the boundary at y = 0
        //     and so on...
        numSpongeCells[0] = 0;
        numSpongeCells[1] = param.numOutletSpongeCells;
        numSpongeCells[2] = 0;
        numSpongeCells[3] = 0;
        numSpongeCells[4] = 0;
        numSpongeCells[5] = 0;

        std::vector<MultiBlock3D*> args;
        args.push_back(lattice);

        if (param.outletSpongeZoneType == 0) {
            applyProcessingFunctional(new ViscositySpongeZone<T,DESCRIPTOR>(
                        param.nx, param.ny, param.nz, bulkValue, numSpongeCells),
                    lattice->getBoundingBox(), args);
        } else {
            applyProcessingFunctional(new SmagorinskySpongeZone<T,DESCRIPTOR>(
                        param.nx, param.ny, param.nz, bulkValue, param.targetSpongeCSmago, numSpongeCells),
                    lattice->getBoundingBox(), args);
        }
    }

    /*
     * Setting the initial conditions.
     */

    // Initial condition: Constant pressure and velocity-at-infinity everywhere.
    Array<T,3> uBoundary(param.getInletVelocity(0), 0.0, 0.0);
    initializeAtEquilibrium(*lattice, lattice->getBoundingBox(), 1.0, uBoundary);
    applyProcessingFunctional(
            new BoxRhoBarJfunctional3D<T,DESCRIPTOR>(),
            lattice->getBoundingBox(), lattice_rho_bar_j_arg); // Compute rhoBar and j before VirtualOutlet is executed.
    lattice->executeInternalProcessors(1); // Execute all processors except the ones at level 0.
    lattice->executeInternalProcessors(2);
    lattice->executeInternalProcessors(3);

    /*
     * Particles (streamlines).
     */

    // This part of the code that relates to particles, is purely for visualization
    // purposes. Particles are used to compute streamlines essentially.

    // Definition of a particle field.
    MultiParticleField3D<ParticleFieldT>* particles = 0;

    if (param.useParticles) {
        particles = new MultiParticleField3D<ParticleFieldT> (
            lattice->getMultiBlockManagement(),
            defaultMultiBlockPolicy3D().getCombinedStatistics() );

        std::vector<MultiBlock3D*> particleArg;
        particleArg.push_back(particles);

        std::vector<MultiBlock3D*> particleFluidArg;
        particleFluidArg.push_back(particles);
        particleFluidArg.push_back(lattice);

        // Functional that advances the particles to their new position at each predefined time step.
        integrateProcessingFunctional (
                new AdvanceParticlesEveryWhereFunctional3D<T,DESCRIPTOR>(param.cutOffSpeedSqr),
                lattice->getBoundingBox(), particleArg, 0);
        // Functional that assigns the particle velocity according to the particle's position in the fluid.
        integrateProcessingFunctional (
                new FluidToParticleCoupling3D<T,DESCRIPTOR>((T) param.particleTimeFactor),
                lattice->getBoundingBox(), particleFluidArg, 1 );

        // Definition of a domain from which particles will be injected in the flow field.
        Box3D injectionDomain(0, 0, centerLB[1]-0.25*param.ny, centerLB[1]+0.25*param.ny,
                centerLB[2]-0.25*param.nz, centerLB[2]+0.25*param.nz);

        // Definition of simple mass-less particles.
        Particle3D<T,DESCRIPTOR>* particleTemplate=0;
        particleTemplate = new PointParticle3D<T,DESCRIPTOR>(0, Array<T,3>(0.,0.,0.), Array<T,3>(0.,0.,0.));

        // Functional which injects particles with predefined probability from the specified injection domain.
        std::vector<MultiBlock3D*> particleInjectionArg;
        particleInjectionArg.push_back(particles);

        integrateProcessingFunctional (
                new InjectRandomParticlesFunctional3D<T,DESCRIPTOR>(particleTemplate, param.particleProbabilityPerCell),
                injectionDomain, particleInjectionArg, 0 );

        // Definition of an absorbtion domain for the particles.
        Box3D absorbtionDomain(param.outlet);

        // Functional which absorbs the particles which reach the specified absorbtion domain.
        integrateProcessingFunctional (
                new AbsorbParticlesFunctional3D<T,DESCRIPTOR>, absorbtionDomain, particleArg, 0 );

        particles->executeInternalProcessors();
    }

    /*
     * Starting the simulation.
     */

    plb_ofstream energyFile((outputDir+"average_energy.dat").c_str());

    pcout << std::endl;
    pcout << "Starting simulation." << std::endl;
    for (plint i = 0; i < param.maxIter; ++i) {
        if (i <= param.initialIter) {
            Array<T,3> uBoundary(param.getInletVelocity(i), 0.0, 0.0);
            setBoundaryVelocity(*lattice, param.inlet, uBoundary);
        }

        if (i % param.statIter == 0) {
             pcout << "At iteration " << i << ", t = " << i*param.dt << std::endl;
             Array<T,3> force(boundaryCondition->getForceOnObject());
             T factor = util::sqr(util::sqr(param.dx)) / util::sqr(param.dt);
             pcout << "Force on object over fluid density: F[x] = " << force[0]*factor << ", F[y] = "
                   << force[1]*factor << ", F[z] = " << force[2]*factor << std::endl;
             T avEnergy = boundaryCondition->computeAverageEnergy() * util::sqr(param.dx) / util::sqr(param.dt);
             pcout << "Average kinetic energy over fluid density: E = " << avEnergy << std::endl;
             energyFile << i*param.dt << "  " << avEnergy << std::endl;
             pcout << std::endl;
        }

        if (i % param.vtkIter == 0) {
            pcout << "Writing VTK at time t = " << i*param.dt << endl;
            writeVTK(*boundaryCondition, i);
            if (param.useParticles) {
                writeParticleVtk<T,DESCRIPTOR> (
                        *particles, createFileName(outputDir+"particles_", i, PADDING) + ".vtk",
                        param.maxNumParticlesToWrite );
            }
        }

        if (i % param.imageIter == 0) {
            pcout << "Writing PPM image at time t = " << i*param.dt << endl;
            writePPM(*boundaryCondition, i);
        }

        lattice->executeInternalProcessors();
        lattice->incrementTime();
        if (param.useParticles && i % param.particleTimeFactor == 0) {
            particles->executeInternalProcessors();
        }
    }
    
    energyFile.close();
    delete outerBoundaryCondition;
    delete boundaryCondition;
    if (param.useParticles) {
        delete particles;
    }
    delete j;
    delete rhoBar;
    delete lattice;
} 
// Instantiate the boundary conditions for the outer domain.
void outerDomainBoundaries(MultiBlockLattice3D<T,DESCRIPTOR> *lattice,
                           MultiScalarField3D<T> *rhoBar,
                           MultiTensorField3D<T,3> *j,
                           OnLatticeBoundaryCondition3D<T,DESCRIPTOR> *bc)
{
    Array<T,3> uBoundary(param.getInletVelocity(0), 0.0, 0.0);

    if (param.lateralFreeSlip) {
        pcout << "Free-slip lateral boundaries." << std::endl;

        lattice->periodicity().toggleAll(false);
        rhoBar->periodicity().toggleAll(false);
        j->periodicity().toggleAll(false);

        bc->setVelocityConditionOnBlockBoundaries(*lattice, param.inlet, boundary::dirichlet);
        setBoundaryVelocity(*lattice, param.inlet, uBoundary);

        bc->setVelocityConditionOnBlockBoundaries(*lattice, param.lateral1, boundary::freeslip);
        bc->setVelocityConditionOnBlockBoundaries(*lattice, param.lateral2, boundary::freeslip);
        bc->setVelocityConditionOnBlockBoundaries(*lattice, param.lateral3, boundary::freeslip);
        bc->setVelocityConditionOnBlockBoundaries(*lattice, param.lateral4, boundary::freeslip);

        // The VirtualOutlet is a sophisticated outflow boundary condition.
        Box3D globalDomain(lattice->getBoundingBox());
        std::vector<MultiBlock3D*> bcargs;
        bcargs.push_back(lattice);
        bcargs.push_back(rhoBar);
        bcargs.push_back(j);
        T outsideDensity = 1.0;
        int bcType = 1;
        integrateProcessingFunctional(new VirtualOutlet<T,DESCRIPTOR>(outsideDensity, globalDomain, bcType),
                param.outlet, bcargs, 2);
    } else {
        pcout << "Periodic lateral boundaries." << std::endl;

        lattice->periodicity().toggleAll(true);
        rhoBar->periodicity().toggleAll(true);
        j->periodicity().toggleAll(true);

        lattice->periodicity().toggle(0, false);
        rhoBar->periodicity().toggle(0, false);
        j->periodicity().toggle(0, false);

        bc->addVelocityBoundary0N(param.inlet, *lattice);
        setBoundaryVelocity(*lattice, param.inlet, uBoundary);

        // The VirtualOutlet is a sophisticated outflow boundary condition.
        // The "globalDomain" argument for the boundary condition must be
        // bigger than the actual bounding box of the simulation for
        // the directions which are periodic.
        Box3D globalDomain(lattice->getBoundingBox());
        globalDomain.y0 -= 2; // y-periodicity
        globalDomain.y1 += 2;
        globalDomain.z0 -= 2; // z-periodicity
        globalDomain.z1 += 2;
        std::vector<MultiBlock3D*> bcargs;
        bcargs.push_back(lattice);
        bcargs.push_back(rhoBar);
        bcargs.push_back(j);
        T outsideDensity = 1.0;
        int bcType = 1;
        integrateProcessingFunctional(new VirtualOutlet<T,DESCRIPTOR>(outsideDensity, globalDomain, bcType),
                param.outlet, bcargs, 2);
    }
}