//---------------------------------------------------------
DMat& NDG2D::ConformingHrefine2D(IMat& edgerefineflag, const DMat& Qin)
//---------------------------------------------------------
{
#if (0)
  OutputNodes(false); // volume nodes
//OutputNodes(true);  // face nodes
#endif


  // function newQ = ConformingHrefine2D(edgerefineflag, Q)
  // Purpose: apply edge splits as requested by edgerefineflag

  IVec v1("v1"), v2("v2"), v3("v3"), tvi;
  DVec x1("x1"), x2("x2"), x3("x3"), y1("y1"), y2("y2"), y3("y3");
  DVec a1("a1"), a2("a2"), a3("a3");

  // count vertices
  assert (VX.size() == Nv);

  // find vertex triplets for elements to be refined
  v1 = EToV(All,1);  v2 = EToV(All,2);  v3 = EToV(All,3);
  x1 = VX(v1);       x2 = VX(v2);       x3 = VX(v3);
  y1 = VY(v1);       y2 = VY(v2);       y3 = VY(v3);

  // find angles at each element vertex (in radians)
  VertexAngles(x1,x2,x3,y1,y2,y3, a1,a2,a3);

  // absolute value of angle size
  a1.set_abs(); a2.set_abs(); a3.set_abs();

  int k=0,k1=0,f1=0,k2=0,f2=0, e1=0,e2=0,e3=0, b1=0,b2=0,b3=0, ref=0;
  IVec m1,m2,m3; DVec mx1, my1, mx2, my2, mx3, my3;

  // create new vertices at edge centers of marked elements 
  // (use unique numbering derived from unique edge number))
  m1 = max(IVec(Nv*(v1-1)+v2+1), IVec(Nv*(v2-1)+v1+1)); mx1=0.5*(x1+x2); my1=0.5*(y1+y2);
  m2 = max(IVec(Nv*(v2-1)+v3+1), IVec(Nv*(v3-1)+v2+1)); mx2=0.5*(x2+x3); my2=0.5*(y2+y3);
  m3 = max(IVec(Nv*(v1-1)+v3+1), IVec(Nv*(v3-1)+v1+1)); mx3=0.5*(x3+x1); my3=0.5*(y3+y1);

  // ensure that both elements sharing an edge are split
  for (k1=1; k1<=K; ++k1) {
    for (f1=1; f1<=Nfaces; ++f1) {
      if (edgerefineflag(k1,f1)) {
        k2 = EToE(k1,f1); 
        f2 = EToF(k1,f1);
        edgerefineflag(k2,f2) = 1;
      }
    }
  }

  // store old data
  IMat oldEToV = EToV;  DVec oldVX = VX, oldVY = VY; 

  // count the number of elements in the refined mesh
  int newK = countrefinefaces(edgerefineflag);
  EToV.resize(newK, Nfaces, true, 0);
  IMat newBCType(newK,3, "newBCType");
  
  //   kold = [];
  IVec kold(newK, "kold");  Index1D KI,KIo;

  int sk=1, skstart=0, skend=0;

  for (k=1; k<=K; ++k)
  {
    skstart = sk;

    e1 = edgerefineflag(k,1); b1 = BCType(k,1);
    e2 = edgerefineflag(k,2); b2 = BCType(k,2);
    e3 = edgerefineflag(k,3); b3 = BCType(k,3);
    ref = e1 + 2*e2 + 4*e3;
    
    switch (ref) {

    case 0: 
      EToV(sk, All) = IVec(v1(k),v2(k),v3(k));    newBCType(sk,All) = IVec(b1, b2, b3); ++sk;
      break;

    case 1:
      EToV(sk, All) = IVec(v1(k),m1(k),v3(k));    newBCType(sk,All) = IVec(b1,  0, b3); ++sk;
      EToV(sk, All) = IVec(m1(k),v2(k),v3(k));    newBCType(sk,All) = IVec(b1, b2,  0); ++sk;
      break;

    case 2:
      EToV(sk, All) = IVec(v2(k),m2(k),v1(k));    newBCType(sk,All) = IVec(b2,  0, b1); ++sk;
      EToV(sk, All) = IVec(m2(k),v3(k),v1(k));    newBCType(sk,All) = IVec(b2, b3,  0); ++sk;
      break;

    case 4:
      EToV(sk, All) = IVec(v3(k),m3(k),v2(k));    newBCType(sk,All) = IVec(b3,  0, b2); ++sk;
      EToV(sk, All) = IVec(m3(k),v1(k),v2(k));    newBCType(sk,All) = IVec(b3, b1,  0); ++sk;
      break;

    case 3:
      EToV(sk, All) = IVec(m1(k),v2(k),m2(k));    newBCType(sk,All) = IVec(b1, b2,  0); ++sk;
      if (a1(k) > a3(k)) { // split largest angle
        EToV(sk, All) = IVec(v1(k),m1(k),m2(k));  newBCType(sk,All) = IVec(b1,  0,  0); ++sk;
        EToV(sk, All) = IVec(v1(k),m2(k),v3(k));  newBCType(sk,All) = IVec( 0, b2, b3); ++sk;
      } else {
        EToV(sk, All) = IVec(v3(k),m1(k),m2(k));  newBCType(sk,All) = IVec( 0,  0, b2); ++sk;
        EToV(sk, All) = IVec(v3(k),v1(k),m1(k));  newBCType(sk,All) = IVec(b3, b1,  0); ++sk;
      }
      break;

    case 5:
      EToV(sk, All) = IVec(v1(k),m1(k),m3(k));    newBCType(sk,All) = IVec(b1,  0, b3); ++sk;
      if (a2(k) > a3(k)) { 
        // split largest angle
        EToV(sk, All) = IVec(v2(k),m3(k),m1(k));  newBCType(sk,All) = IVec( 0,  0, b1); ++sk;
        EToV(sk, All) = IVec(v2(k),v3(k),m3(k));  newBCType(sk,All) = IVec(b2, b3,  0); ++sk;
      } else {
        EToV(sk, All) = IVec(v3(k),m3(k),m1(k));  newBCType(sk,All) = IVec(b3,  0,  0); ++sk;
        EToV(sk, All) = IVec(v3(k),m1(k),v2(k));  newBCType(sk,All) = IVec( 0, b1, b2); ++sk;
      }
      break;

    case 6:
      EToV(sk, All) = IVec(v3(k),m3(k),m2(k));    newBCType(sk,All) = IVec(b3,  0, b2); ++sk;
      if (a1(k) > a2(k)) { 
        // split largest angle
        EToV(sk, All) = IVec(v1(k),m2(k),m3(k));  newBCType(sk,All) = IVec( 0, 0, b3); ++sk;
        EToV(sk, All) = IVec(v1(k),v2(k),m2(k));  newBCType(sk,All) = IVec(b1, b2,  0); ++sk;
      } else {
        EToV(sk, All) = IVec(v2(k),m2(k),m3(k));  newBCType(sk,All) = IVec(b2,  0,  0); ++sk;
        EToV(sk, All) = IVec(v2(k),m3(k),v1(k));  newBCType(sk,All) = IVec( 0 , b3, b1); ++sk;
      }
      break;

    default:
      // split all 
      EToV(sk, All) = IVec(m1(k),m2(k),m3(k)); newBCType(sk, All) = IVec( 0, 0,  0); ++sk;
      EToV(sk, All) = IVec(v1(k),m1(k),m3(k)); newBCType(sk, All) = IVec(b1, 0, b3); ++sk;
      EToV(sk, All) = IVec(v2(k),m2(k),m1(k)); newBCType(sk, All) = IVec(b2, 0, b1); ++sk;
      EToV(sk, All) = IVec(v3(k),m3(k),m2(k)); newBCType(sk, All) = IVec(b3, 0, b2); ++sk;
      break;
    }
    
    skend = sk;

    // kold = [kold; k*ones(skend-skstart, 1)];

    // element k is to be refined into (1:4) child elements.
    // store parent element numbers in array "kold" to help 
    // with accessing parent vertex data during refinement.

    KI.reset(skstart, skend-1); // ids of child elements
    kold(KI) = k;               // mark as children of element k
  }

  // Finished with edgerefineflag.  Delete if OBJ_temp
  if (edgerefineflag.get_mode() == OBJ_temp) { 
    delete (&edgerefineflag); 
  }


  // renumber new nodes contiguously
  // ids = unique([v1;v2;v3;m1;m2;m3]);
  bool unique=true; IVec IDS, ids;
  IDS = concat( concat(v1,v2,v3), concat(m1,m2,m3) );
  ids = sort(IDS, unique);
  Nv = ids.size();

  int max_id = EToV.max_val();
  umMSG(1, "max id in EToV is %d\n", max_id);

  //         M     N   nnz vals triplet
  CSi newids(max_id,1, Nv,  1,    1  );
  //  newids = sparse(max(max(EToV)),1);

  int i=0, j=1;
  for (i=1; i<=Nv; ++i) {
  //     newids(ids)= (1:Nv);
    newids.set1(ids(i),j, i);   // load 1-based triplets
  }          // row   col x
  newids.compress();            // convert to csc form


  // Matlab -----------------------------------------------
  // v1 = newids(v1); v2 = newids(v2); v3 = newids(v3);
  // m1 = newids(m1); m2 = newids(m2); m3 = newids(m3);
  //-------------------------------------------------------

  int KVi=v1.size(), KMi=m1.size();
  // read from copies, overwrite originals 
  
  // 1. reload ids for new vertices
  tvi = v1;  for (i=1;i<=KVi;++i) {v1(i) = newids(tvi(i), 1);}
  tvi = v2;  for (i=1;i<=KVi;++i) {v2(i) = newids(tvi(i), 1);}
  tvi = v3;  for (i=1;i<=KVi;++i) {v3(i) = newids(tvi(i), 1);}

  // 2. load ids for new (midpoint) vertices
  tvi = m1;  for (i=1;i<=KMi;++i) {m1(i) = newids(tvi(i), 1);}
  tvi = m2;  for (i=1;i<=KMi;++i) {m2(i) = newids(tvi(i), 1);}
  tvi = m3;  for (i=1;i<=KMi;++i) {m3(i) = newids(tvi(i), 1);}

  VX.resize(Nv); VY.resize(Nv);
  VX(v1) =  x1; VX(v2) =  x2; VX(v3) =  x3;
  VY(v1) =  y1; VY(v2) =  y2; VY(v3) =  y3;
  VX(m1) = mx1; VX(m2) = mx2; VX(m3) = mx3;
  VY(m1) = my1; VY(m2) = my2; VY(m3) = my3;


  if (newK != (sk-1)) {
    umERROR("NDG2D::ConformingHrefine2D", "Inconsistent element count: expect %d, but sk = %d", newK, (sk-1));
  } else {
    K = newK; // sk-1;
  }

  // dumpIMat(EToV, "EToV (before)");

  // EToV = newids(EToV);
  for (j=1; j<=3; ++j) {
    for (k=1; k<=K; ++k) {
      EToV(k,j) = newids(EToV(k,j), 1);
    }
  }

#if (0)
  dumpIMat(EToV, "EToV (after)");
  // umERROR("Checking ids", "Nigel, check EToV");
#endif


  BCType = newBCType;

  Nv = VX.size();
  // xold = x; yold = y;

  StartUp2D();


#if (1)
  OutputNodes(false); // volume nodes
//OutputNodes(true);  // face nodes
//umERROR("Exiting early", "Check adapted {volume,face} nodes");
#endif


  // allocate return object
  int Nfields = Qin.num_cols();
  DMat* tmpQ = new DMat(Np*K, Nfields, "newQ", OBJ_temp);
  DMat& newQ = *tmpQ;  // use a reference for syntax

  // quick return, if no interpolation is required
  if (Qin.size()<1) {
    return newQ;
  }

  
  DVec rOUT(Np),sOUT(Np),xout,yout,xy1(2),xy2(2),xy3(2),tmp(2),rhs;
  int ko=0,kv1=0,kv2=0,kv3=0,n=0;  DMat A(2,2), interp;
  DMat oldQ = const_cast<DMat&>(Qin);

  for (k=1; k<=K; ++k)
  {
    ko = kold(k); xout = x(All,k); yout = y(All,k);
    kv1=oldEToV(ko,1); kv2=oldEToV(ko,2); kv3=oldEToV(ko,3);
    xy1.set(oldVX(kv1), oldVY(kv1));
    xy2.set(oldVX(kv2), oldVY(kv2));
    xy3.set(oldVX(kv3), oldVY(kv3));
    A.set_col(1, xy2-xy1); A.set_col(2, xy3-xy1);
    
    for (i=1; i<=Np; ++i) {
      tmp.set(xout(i), yout(i));
      rhs = 2.0*tmp - xy2 - xy3;
      tmp = A|rhs;
      rOUT(i) = tmp(1);
      sOUT(i) = tmp(2);
    }

    KI.reset (Np*(k -1)+1, Np*k );  // nodes in new element k
    KIo.reset(Np*(ko-1)+1, Np*ko);  // nodes in old element ko

    interp = Vandermonde2D(N, rOUT, sOUT)*invV;

    for (n=1; n<=Nfields; ++n) 
    {
    //newQ(:,k,n)= interp*  Q(:,ko,n);
      //DVec tm1 = interp*oldQ(KIo,n);
      //dumpDVec(tm1, "tm1");
      newQ(KI,n) = interp*oldQ(KIo,n);
    }
  }
    
  return newQ;
}
// GAUSS-LOBATTO QUADRATURE ALONG EDGE
void SetEdgeDataGL_Unst(const mesh& Mesh, 
			int NumQuadPoints, 
			int NumBasisOrder, 
			edge_data_Unst* EdgeData)
{
  // Quick error check
  if (NumQuadPoints<2 || NumQuadPoints>6 || NumBasisOrder<1 || NumBasisOrder>5)
    {
      printf(" \n");
      printf(" Error in SetEdgeData_Unst.cpp \n");
      printf("   NumQuadPoints must be 2,3,4,5, or 6.\n");
      printf("   NumBasisOrder must be 1,2,3,4, or 5.\n");
      printf("     NumQuadPoints = %i\n",NumQuadPoints);
      printf("     NumBasisOrder = %i\n",NumBasisOrder);
      printf("\n");
      exit(1);
    }

  // ---------------------------------
  // Set quadrature weights and points
  // ---------------------------------
  switch( NumQuadPoints )
    {
    case 2:
      EdgeData->GL_wgts1d->set(1,  1.0 );
      EdgeData->GL_wgts1d->set(2,  1.0 );
      
      EdgeData->GL_xpts1d->set(1,  1.0 );
      EdgeData->GL_xpts1d->set(2, -1.0 );
      break;
      
    case 3:
      EdgeData->GL_wgts1d->set(1,  onethird );
      EdgeData->GL_wgts1d->set(2,  4.0*onethird );
      EdgeData->GL_wgts1d->set(3,  onethird );
      
      EdgeData->GL_xpts1d->set(1,  1.0 );
      EdgeData->GL_xpts1d->set(2,  0.0 );
      EdgeData->GL_xpts1d->set(3, -1.0 );
      break;
      
    case 4:
      EdgeData->GL_wgts1d->set(1,  0.5*onethird );
      EdgeData->GL_wgts1d->set(2,  2.5*onethird );
      EdgeData->GL_wgts1d->set(3,  2.5*onethird );
      EdgeData->GL_wgts1d->set(4,  0.5*onethird );
      
      EdgeData->GL_xpts1d->set(1,  1.0  );
      EdgeData->GL_xpts1d->set(2,  osq5 );
      EdgeData->GL_xpts1d->set(3, -osq5 );
      EdgeData->GL_xpts1d->set(4, -1.0  );
      break;
      
    case 5:
      EdgeData->GL_wgts1d->set(1,  0.1  );
      EdgeData->GL_wgts1d->set(2,  49.0/90.0 );
      EdgeData->GL_wgts1d->set(3,  32.0/45.0 );
      EdgeData->GL_wgts1d->set(4,  49.0/90.0 );
      EdgeData->GL_wgts1d->set(5,  0.1 );
      
      EdgeData->GL_xpts1d->set(1,  1.0      );
      EdgeData->GL_xpts1d->set(2,  sq3*osq7 );
      EdgeData->GL_xpts1d->set(3,  0.0      );
      EdgeData->GL_xpts1d->set(4, -sq3*osq7 );
      EdgeData->GL_xpts1d->set(5, -1.0      );        
      break;
      
    case 6:      
      EdgeData->GL_wgts1d->set(1,  0.2*onethird  );
      EdgeData->GL_wgts1d->set(2,  (1.4 - 0.1*sq7)*onethird );
      EdgeData->GL_wgts1d->set(3,  (1.4 + 0.1*sq7)*onethird );
      EdgeData->GL_wgts1d->set(4,  (1.4 + 0.1*sq7)*onethird );
      EdgeData->GL_wgts1d->set(5,  (1.4 - 0.1*sq7)*onethird );
      EdgeData->GL_wgts1d->set(6,  0.2*onethird );
      
      EdgeData->GL_xpts1d->set(1,  1.0                           );
      EdgeData->GL_xpts1d->set(2,  (1/21.0)*sqrt(147.0+42.0*sq7) );
      EdgeData->GL_xpts1d->set(3,  (1/21.0)*sqrt(147.0-42.0*sq7) );
      EdgeData->GL_xpts1d->set(4, -(1/21.0)*sqrt(147.0-42.0*sq7) );
      EdgeData->GL_xpts1d->set(5, -(1/21.0)*sqrt(147.0+42.0*sq7) );
      EdgeData->GL_xpts1d->set(6, -1.0                           );
      break;
    }

  // ---------------------------------
  // Legendre basis functions on the 
  // left and right of each edge
  // ---------------------------------
  const int NumEdges = Mesh.get_NumEdges();
  const int NumBasisComps = (NumBasisOrder*(NumBasisOrder+1))/2;
  dTensor1 xp1(3);
  dTensor1 yp1(3);
  dTensor1 xp2(3);
  dTensor1 yp2(3);
  dTensor1 xy1(2);
  dTensor1 xy2(2);
  dTensor1 mu1(NumBasisComps);
  dTensor1 mu2(NumBasisComps);

  for (int i=1; i<=NumEdges; i++)
    {   
      // Get edge information
      const double x1 = Mesh.get_edge(i,1);
      const double y1 = Mesh.get_edge(i,2);
      const double x2 = Mesh.get_edge(i,3);
      const double y2 = Mesh.get_edge(i,4);
      
      const int e1 = Mesh.get_eelem(i,1);
      const int e2 = Mesh.get_eelem(i,2);

      // Get element information about
      // the two elements that meet at
      // the current edge
      const double Area1 = Mesh.get_area_prim(e1);
      const double Area2 = Mesh.get_area_prim(e2);

      for (int k=1; k<=3; k++)
	{
	  xp1.set(k, Mesh.get_node(Mesh.get_tnode(e1,k),1) );
	  yp1.set(k, Mesh.get_node(Mesh.get_tnode(e1,k),2) );

	  xp2.set(k, Mesh.get_node(Mesh.get_tnode(e2,k),1) );
	  yp2.set(k, Mesh.get_node(Mesh.get_tnode(e2,k),2) );
	}

      const double xc1 = (xp1.get(1) + xp1.get(2) + xp1.get(3))/3.0;
      const double yc1 = (yp1.get(1) + yp1.get(2) + yp1.get(3))/3.0;
      const double xc2 = (xp2.get(1) + xp2.get(2) + xp2.get(3))/3.0;
      const double yc2 = (yp2.get(1) + yp2.get(2) + yp2.get(3))/3.0;

      // quadrature points on the edge
      for (int m=1; m<=NumQuadPoints; m++)
	{
	  // Take integration point s (in [-1,1])
	  // and map to physical domain
	  const double s = EdgeData->GL_xpts1d->get(m);
	  const double x = x1 + 0.5*(s+1.0)*(x2-x1);
	  const double y = y1 + 0.5*(s+1.0)*(y2-y1);

	  // Take physical point (x,y)
	  // and map into the coordinates
	  // of the two triangles that are
	  // adjacent to the current edge
	  xy1.set(1, ((yp1.get(3)-yp1.get(1))*(x-xc1) 
		    + (xp1.get(1)-xp1.get(3))*(y-yc1))/(2.0*Area1) );
	  xy1.set(2, ((yp1.get(1)-yp1.get(2))*(x-xc1) 
		    + (xp1.get(2)-xp1.get(1))*(y-yc1))/(2.0*Area1) );
	  
	  xy2.set(1, ((yp2.get(3)-yp2.get(1))*(x-xc2) 
		    + (xp2.get(1)-xp2.get(3))*(y-yc2))/(2.0*Area2) );
	  xy2.set(2, ((yp2.get(1)-yp2.get(2))*(x-xc2) 
		    + (xp2.get(2)-xp2.get(1))*(y-yc2))/(2.0*Area2) );

	  // Evaluate monomials at locations xy1
	  double xi = xy1.get(1);
	  double xi2 = xi*xi;
	  double xi3 = xi*xi2;
	  double xi4 = xi*xi3;

	  double eta = xy1.get(2);
	  double eta2 = eta*eta;
	  double eta3 = eta*eta2;
	  double eta4 = eta*eta3;

	  switch( NumBasisOrder )
	    {
	    case 5:  // fifth order		    		    
	      mu1.set(15, eta4     );
	      mu1.set(14, xi4      );
	      mu1.set(13, xi2*eta2 );
	      mu1.set(12, eta3*xi  );
	      mu1.set(11, xi3*eta  );
	      
	    case 4:  // fourth order
	      mu1.set(10, eta3     );
	      mu1.set(9,  xi3      );
	      mu1.set(8,  xi*eta2  );
	      mu1.set(7,  eta*xi2  );
	      
	    case 3:  // third order
	      mu1.set(6,  eta2     );
	      mu1.set(5,  xi2      );
	      mu1.set(4,  xi*eta   );		    
	      
	    case 2:  // second order		    
	      mu1.set(3, eta       );
	      mu1.set(2, xi        );
	      
	    case 1:  // first order
	      mu1.set(1, 1.0       );
	      
	      break;		    
	    }
	  
	  // Evaluate monomials at locations xy2
	  xi = xy2.get(1);
	  xi2 = xi*xi;
	  xi3 = xi*xi2;
	  xi4 = xi*xi3;
	  
	  eta = xy2.get(2);
	  eta2 = eta*eta;
	  eta3 = eta*eta2;
	  eta4 = eta*eta3;

	  switch( NumBasisOrder )
	    {
	    case 5:  // fifth order		    		    
	      mu2.set(15, eta4     );
	      mu2.set(14, xi4      );
	      mu2.set(13, xi2*eta2 );
	      mu2.set(12, eta3*xi  );
	      mu2.set(11, xi3*eta  );
	      
	    case 4:  // fourth order
	      mu2.set(10, eta3     );
	      mu2.set(9,  xi3      );
	      mu2.set(8,  xi*eta2  );
	      mu2.set(7,  eta*xi2  );
	      
	    case 3:  // third order
	      mu2.set(6,  eta2     );
	      mu2.set(5,  xi2      );
	      mu2.set(4,  xi*eta   );		    
	      
	    case 2:  // second order		    
	      mu2.set(3, eta       );
	      mu2.set(2, xi        );
	      
	    case 1:  // first order
	      mu2.set(1, 1.0       );
	      
	      break;		    
	    }
	  
	  // Finally, convert monomials to Legendre Polys
	  // on the two adjacent triangle
	  for (int k=1; k<=NumBasisComps; k++)
	    {
	      double tmp1 = 0.0;
	      double tmp2 = 0.0;
	      for (int j=1; j<=k; j++)
		{  
		  tmp1 = tmp1 + Mmat[k-1][j-1]*mu1.get(j);
		  tmp2 = tmp2 + Mmat[k-1][j-1]*mu2.get(j);
		}
	      
	      EdgeData->GL_phi_left->set(i,m,k,  tmp1 );
	      EdgeData->GL_phi_right->set(i,m,k, tmp2 );
	    }
	}
    }
  
}