void FemPoroelasticResidualLocalAssembler::assembleComponents
    (   const NumLib::TimeStep &timestep,
        const MeshLib::IElement &e, 
        const std::vector<size_t> &vec_order, 
        const std::vector<LocalVectorType> &vec_x0,
        const std::vector<LocalVectorType> &vec_x1,
        std::vector<LocalVectorType> &vec_r
        )
{
    assert(vec_order.size()==3);

    //TODO how do you know 1st is ux and 3rd is p? who decides it?
    const size_t id_ux = 0;
    const size_t id_uy = 1;
    const size_t id_p = 2;

    const size_t u_order = vec_order[id_ux];
    assert(u_order==vec_order[id_uy]);
    const size_t p_order = vec_order[id_p];
    const LocalVectorType &ux0 = vec_x0[id_ux];
    const LocalVectorType &uy0 = vec_x0[id_uy];
    const LocalVectorType &ux1 = vec_x1[id_ux];
    const LocalVectorType &uy1 = vec_x1[id_uy];
    // combine ux and uy
    LocalVectorType u0(ux0.rows()*2);
    LocalVectorType u1(ux0.rows()*2);
    for (int i=0; i<ux0.rows(); i++) {
        u0(i) = ux0(i);
        u0(i+ux0.rows()) = uy0(i);
        u1(i) = ux1(i);
        u1(i+ux0.rows()) = uy1(i);
    }
    const LocalVectorType &p0 = vec_x0[id_p];
    const LocalVectorType &p1 = vec_x1[id_p];
    // ------------------------------------------------------------------------
    // Element
    // ------------------------------------------------------------------------
    const size_t dim = e.getDimension();
    const size_t n_strain_components = getNumberOfStrainComponents(dim);
    const size_t nnodes_u = e.getNumberOfNodes(u_order);
    const size_t nnodes_p = e.getNumberOfNodes(p_order);
    const NumLib::TXPosition e_pos(NumLib::TXPosition::Element, e.getID());

    // ------------------------------------------------------------------------
    // Transient
    // ------------------------------------------------------------------------
    const double dt = timestep.getTimeStepSize();
    const double theta = 1.0;

    // ------------------------------------------------------------------------
    // Material (assuming element constant)
    // ------------------------------------------------------------------------
    size_t mat_id = e.getGroupID();
    size_t fluid_id = 0; //TODO
    Ogs6FemData* femData = Ogs6FemData::getInstance();
    MaterialLib::PorousMedia* pm = femData->list_pm[mat_id];
    MaterialLib::Solid *solidphase = femData->list_solid[mat_id];
    MaterialLib::Fluid *fluidphase = femData->list_fluid[fluid_id];

    // solid
    double rho_s = .0;
    if (solidphase->density!=NULL)
        solidphase->density->eval(e_pos, rho_s);
    LocalMatrixType De = LocalMatrixType::Zero(n_strain_components, n_strain_components);
    MathLib::LocalMatrix nv(1,1);
    MathLib::LocalMatrix E(1,1);
    solidphase->poisson_ratio->eval(e_pos, nv);
    solidphase->Youngs_modulus->eval(e_pos, E);
    double Lambda, G, K;
    MaterialLib::calculateLameConstant(nv(0,0), E(0,0), Lambda, G, K);
    MaterialLib::setElasticConsitutiveTensor(dim, Lambda, G, De);

    // fluid
    double mu = .0;
    fluidphase->dynamic_viscosity->eval(e_pos, mu);
    double rho_f = .0;
    fluidphase->density->eval(e_pos, rho_f);

    // media
    double k;
    pm->permeability->eval(e_pos, k);
    double n = .0;
    pm->porosity->eval(e_pos, n);
    double s = .0;
    pm->storage->eval(e_pos, s);
    double k_mu;
    k_mu = k / mu;


    // ------------------------------------------------------------------------
    // Body force
    // ------------------------------------------------------------------------
    LocalVectorType body_force = LocalVectorType::Zero(dim);
    bool hasGravity = false;
    if (hasGravity) {
        body_force[dim-1] = rho_s * 9.81;
    }

    // ------------------------------------------------------------------------
    // Local component assembly
    // ------------------------------------------------------------------------
    LocalMatrixType Kuu = LocalMatrixType::Zero(nnodes_u*dim, nnodes_u*dim);
    LocalMatrixType Cup = LocalMatrixType::Zero(nnodes_u*dim, nnodes_p);
    LocalMatrixType Kpp = LocalMatrixType::Zero(nnodes_p, nnodes_p);
    LocalMatrixType Mpp = LocalMatrixType::Zero(nnodes_p, nnodes_p);
    LocalMatrixType Cpu = LocalMatrixType::Zero(nnodes_p, nnodes_u*dim);
    LocalVectorType Fu = LocalVectorType::Zero(nnodes_u*dim);
    LocalVectorType Fp = LocalVectorType::Zero(nnodes_p);

    // temp matrix
    LocalMatrixType B = LocalMatrixType::Zero(n_strain_components, nnodes_u*dim);
    LocalMatrixType Nuvw = LocalMatrixType::Zero(dim, nnodes_u*dim);
    const LocalMatrixType m = get_m(dim);

    //
    FemLib::IFiniteElement* fe_u = _feObjects.getFeObject(e, u_order);
    FemLib::IFiniteElement* fe_p = _feObjects.getFeObject(e, p_order);
    FemLib::IFemNumericalIntegration *q_u = fe_u->getIntegrationMethod();
    double gp_x[3], real_x[3];
    for (size_t j=0; j<q_u->getNumberOfSamplingPoints(); j++) {
        q_u->getSamplingPoint(j, gp_x);
        fe_u->computeBasisFunctions(gp_x);
        fe_p->computeBasisFunctions(gp_x);
        fe_u->getRealCoordinates(real_x);
        double fac_u = fe_u->getDetJ() * q_u->getWeight(j);

        //--- local component ----
        // set N,B
        LocalMatrixType &Nu = *fe_u->getBasisFunction();
        LocalMatrixType &dNu = *fe_u->getGradBasisFunction();
        setNu_Matrix_byComponent(dim, nnodes_u, Nu, Nuvw);
        setB_Matrix_byComponent(dim, nnodes_u, dNu, B);
        LocalMatrixType &Np = *fe_p->getBasisFunction();

        // K_uu += B^T * D * B
        Kuu.noalias() += fac_u * B.transpose() * De * B;

        // C_up += B^T * m * Np
        Cup.noalias() += fac_u * B.transpose() * m * Np;

        // Fu += N^T * b
        if (hasGravity) {
            Fu.noalias() += fac_u * Nuvw.transpose() * body_force;
        }
    }
    Fu.noalias() += (theta - 1) * Kuu * u0 + (1-theta)* Cup * p0;

    FemLib::IFemNumericalIntegration *q_p = fe_p->getIntegrationMethod();
    for (size_t j=0; j<q_p->getNumberOfSamplingPoints(); j++) {
        q_p->getSamplingPoint(j, gp_x);
        fe_u->computeBasisFunctions(gp_x);
        fe_p->computeBasisFunctions(gp_x);
        fe_p->getRealCoordinates(real_x);
        double fac = fe_p->getDetJ() * q_p->getWeight(j);

        //--- local component ----
        // set N,B
        LocalMatrixType &dNu = *fe_u->getGradBasisFunction();
        setB_Matrix_byComponent(dim, nnodes_u, dNu, B);
        LocalMatrixType &Np = *fe_p->getBasisFunction();
        LocalMatrixType &dNp = *fe_p->getGradBasisFunction();

        // M_pp += Np^T * S * Np
        Mpp.noalias() += fac * Np.transpose() * s * Np;

        // K_pp += dNp^T * K * dNp
        Kpp.noalias() += fac * dNp.transpose() * k_mu * dNp;

        // C_pu += Np^T * m^T * B
        Cpu.noalias() += fac * Np.transpose() * m.transpose() * B;
    }

    // Backward euler
    Fp = (1.0/dt * Mpp - (1-theta)*Kpp)* p0 + 1.0/dt * Cpu * u0;

    // r = K*u - RHS
    LocalVectorType r_u = Kuu * u1 - Cup * p1 - Fu;
    LocalVectorType r_p = 1.0/dt * Cpu * u1 + (1.0/dt * Mpp + theta * Kpp) * p1 - Fp;
//    if (e.getID()==0) {
//        std::cout << "u1=" << std::endl << u1 << std::endl;
//        std::cout << "p1=" << std::endl << p1 << std::endl;
//        std::cout << "Fp=" << std::endl << Fp << std::endl;
//        std::cout << "r_p=" << std::endl << r_p << std::endl;
//    }

    //
    for (size_t i=0; i<dim; i++) {
        vec_r[i] = r_u.segment(i*nnodes_u, nnodes_u);
    }
    vec_r[id_p] = r_p;
}
// Private methods
// determine the slave/master pair in contact, and setup Vectors (N,T1,T2)
int ZeroLengthInterface2D::contactDetect(int s, int m1, int m2, int stage)
{
  //+--------------+-----------------+----------------+----------------+---------------+
  // NOTES: some methods to get displacements from nodes
  //+--------------+-----------------+----------------+----------------+---------------+
  // getDisp() :         get commit(k-1) disp, will be commit(k) after commit
  // getTrialDisp():     get Trial(k) disp
  // getIncrDisp():      get Trial(k)-Commit(k-1), will be 0 after commit
  // getIncrDeltaDisp(): get Trial(k)-Trial(k-1),  will be 0 after commit
  //+--------------+-----------------+----------------+----------------+--------------
  ////////////////////////////// for transient gap ///////////////////////////////////
  // DEFINE:
  // gap = (U_master-U_slave) / dot(ContactNormal),
  // defines overlapped normal distance, always keep positive (+) when contacted
  ///*
  // get current position and after trial displacement for (slave, master1, master2) nodes
  int i;
  const Vector &xs = nodePointers[s]->getCrds();
  const Vector &uxs = nodePointers[s]->getTrialDisp();
  const Vector &x1 = nodePointers[m1]->getCrds();
  const Vector &ux1= nodePointers[m1]->getTrialDisp();
  const Vector &x2 = nodePointers[m2]->getCrds();
  const Vector &ux2= nodePointers[m2]->getTrialDisp();
  
  Vector trial_slave(2), trial_master1(2), trial_master2(2);
  for (i = 0; i < 2; i++) {
    trial_slave(i) = xs(i) + uxs(i);
    trial_master1(i) = x1(i) + ux1(i);
    trial_master2(i) = x2(i) + ux2(i);
    //opserr << "trial_slave: " << trial_slave(i) << "\n";
    //opserr << "trial_master1: " << trial_master1(i) << "\n";
    //opserr << "trial_master2: " << trial_master2(i) << "\n";
  }
  
  // calculate normal gap for contact
  Vector diff(2);
  Vector ContactTangent(2);
  for (i = 0; i < 2; i++) {
    diff(i) = trial_master2(i) - trial_master1(i);
    //opserr << "diff: " << diff(i) << "\n";
  }
  double L  = diff.Norm();
  // tangent vector
  for (i = 0; i < 2; i++) ContactTangent(i) = (1/L) * (trial_master2(i) - trial_master1(i));
  // normal vector
  ContactNormal(0) = - ContactTangent(1);
  ContactNormal(1) = ContactTangent(0);
  
  normal_gap(s) = 0;
  double alpha = 0;
  double alpha_bar = 0;
  for (i = 0; i < 2; i++) {
    alpha += (1/L) * (trial_slave(i) - trial_master1(i)) * ContactTangent(i);
    normal_gap(s) += (trial_slave(i) - trial_master1(i)) * ContactNormal(i);
    diff(i) = x2(i) - x1(i);
  }
  
  double gapgap = normal_gap(s);
  
  double L_bar = diff.Norm();
  for (i = 0; i < 2; i++) alpha_bar += (1/L_bar) * (xs(i) - x1(i)) * ContactTangent(i);
  shear_gap(s) = (alpha - alpha_bar) * L_bar;
  /*
    /////////////////////////////// for transient gap ///////////////////////////////
    // we have another way to define the gap, can replace previous code block if want
    ////////////////////////////// for dynamic gap //////////////////////////////////
    const Vector   // get current trial incremental position
    &U_slave = nodePointers[0]->getCrds() + nodePointers[0]->getIncrDisp();
    const Vector
    &U_master= nodePointers[1]->getCrds() + nodePointers[1]->getIncrDisp();
    gap=0;
    int i;
    for (i=0; i<2; i++){
    gap += (U_master(i)-U_slave(i))* ContactNormal(i);
    }
    gap+=gap_n;
    ///////////////// for dynamic gap //////////////////////
    */
  // stage = 0 means searching slave nodes against master segments
  // stage = 1 means searching master nodes against slave segments
  if ((stage == 0  && normal_gap(s) >= 0 && alpha > 0 && alpha < 1) ||
      (stage == 1  && normal_gap(s) >= 0 && alpha >= 0 && alpha <= 1)) { // in contact
    N(0) = ContactNormal(0);
    N(1) = ContactNormal(1);
    N(2) = -(1 - alpha) * N(0);
    N(3) = -(1 - alpha) * N(1);
    N(4) = -(alpha) * N(0);
    N(5) = -(alpha) * N(1);
    
    T(0) = ContactTangent(0);
    T(1) = ContactTangent(1);
    T(2) = -(1-alpha) * T(0);
    T(3) = -(1-alpha) * T(1);
    T(4) = -(alpha) * T(0);
    T(5) = -(alpha) * T(1);
    
    return 1;
  } else {
    return 0; // Not in contact
  }
}