コード例 #1
0
ファイル: math_emulate.c プロジェクト: UnitedMarsupials/kame
static int
math_emulate(struct trapframe * info)
{
	unsigned short code;
	temp_real tmp;
	char * address;
	u_long oldeip;

	/* ever used fp? */
	if ((((struct pcb *)curproc->p_addr)->pcb_flags & FP_SOFTFP) == 0) {
		((struct pcb *)curproc->p_addr)->pcb_flags |= FP_SOFTFP;
		I387.cwd = 0x037f;
		I387.swd = 0x0000;
		I387.twd = 0x0000;
	}

	if (I387.cwd & I387.swd & 0x3f)
		I387.swd |= 0x8000;
	else
		I387.swd &= 0x7fff;
	oldeip = info->tf_eip;
/* 0x001f means user code space */
	if ((u_short)info->tf_cs != 0x001F) {
		printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs,
			oldeip);
		panic("?Math emulation needed in kernel?");
	}
	code = get_fs_word((unsigned short *) oldeip);
	bswapw(code);
	code &= 0x7ff;
	I387.fip = oldeip;
	*(unsigned short *) &I387.fcs = (u_short) info->tf_cs;
	*(1+(unsigned short *) &I387.fcs) = code;
	info->tf_eip += 2;
	switch (code) {
		case 0x1d0: /* fnop */
			return(0);
		case 0x1d1: case 0x1d2: case 0x1d3:  /* fst to 32-bit mem */
		case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7:
			math_abort(info,SIGILL);
		case 0x1e0: /* fchs */
			ST(0).exponent ^= 0x8000;
			return(0);
		case 0x1e1: /* fabs */
			ST(0).exponent &= 0x7fff;
			return(0);
		case 0x1e2: case 0x1e3:
			math_abort(info,SIGILL);
		case 0x1e4: /* ftst */
			ftst(PST(0));
			return(0);
		case 0x1e5: /* fxam */
			printf("fxam not implemented\n");
			math_abort(info,SIGILL);
		case 0x1e6: case 0x1e7: /* fldenv */
			math_abort(info,SIGILL);
		case 0x1e8: /* fld1 */
			fpush();
			ST(0) = CONST1;
			return(0);
		case 0x1e9: /* fld2t */
			fpush();
			ST(0) = CONSTL2T;
			return(0);
		case 0x1ea: /* fld2e */
			fpush();
			ST(0) = CONSTL2E;
			return(0);
		case 0x1eb: /* fldpi */
			fpush();
			ST(0) = CONSTPI;
			return(0);
		case 0x1ec: /* fldlg2 */
			fpush();
			ST(0) = CONSTLG2;
			return(0);
		case 0x1ed: /* fldln2 */
			fpush();
			ST(0) = CONSTLN2;
			return(0);
		case 0x1ee: /* fldz */
			fpush();
			ST(0) = CONSTZ;
			return(0);
		case 0x1ef:
			math_abort(info,SIGILL);
		case 0x1f0: /* f2xm1 */
		case 0x1f1: /* fyl2x */
		case 0x1f2: /* fptan */
		case 0x1f3: /* fpatan */
		case 0x1f4: /* fxtract */
		case 0x1f5: /* fprem1 */
		case 0x1f6: /* fdecstp */
		case 0x1f7: /* fincstp */
		case 0x1f8: /* fprem */
		case 0x1f9: /* fyl2xp1 */
		case 0x1fa: /* fsqrt */
		case 0x1fb: /* fsincos */
		case 0x1fe: /* fsin */
		case 0x1ff: /* fcos */
			uprintf(
			 "math_emulate: instruction %04x not implemented\n",
			  code + 0xd800);
			math_abort(info,SIGILL);
		case 0x1fc: /* frndint */
			frndint(PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x1fd: /* fscale */
			/* incomplete and totally inadequate -wfj */
			Fscale(PST(0), PST(1), &tmp);
			real_to_real(&tmp,&ST(0));
			return(0);			/* 19 Sep 92*/
		case 0x2e9: /* ????? */
/* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9  ATS */
			fucom(PST(1),PST(0));
			fpop(); fpop();
			return(0);
		case 0x3d0: case 0x3d1: /* fist ?? */
			return(0);
		case 0x3e2: /* fclex */
			I387.swd &= 0x7f00;
			return(0);
		case 0x3e3: /* fninit */
			I387.cwd = 0x037f;
			I387.swd = 0x0000;
			I387.twd = 0x0000;
			return(0);
		case 0x3e4:
			return(0);
		case 0x6d9: /* fcompp */
			fcom(PST(1),PST(0));
			fpop(); fpop();
			return(0);
		case 0x7e0: /* fstsw ax */
			*(short *) &info->tf_eax = I387.swd;
			return(0);
	}
	switch (code >> 3) {
		case 0x18: /* fadd */
			fadd(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x19: /* fmul */
			fmul(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x1a: /* fcom */
			fcom(PST(code & 7),PST(0));
			return(0);
		case 0x1b: /* fcomp */
			fcom(PST(code & 7),PST(0));
			fpop();
			return(0);
		case 0x1c: /* fsubr */
			real_to_real(&ST(code & 7),&tmp);
			tmp.exponent ^= 0x8000;
			fadd(PST(0),&tmp,&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x1d: /* fsub */
			ST(0).exponent ^= 0x8000;
			fadd(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x1e: /* fdivr */
			fdiv(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x1f: /* fdiv */
			fdiv(PST(code & 7),PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x38: /* fld */
			fpush();
			ST(0) = ST((code & 7)+1);  /* why plus 1 ????? ATS */
			return(0);
		case 0x39: /* fxch */
			fxchg(&ST(0),&ST(code & 7));
			return(0);
		case 0x3b: /*  ??? ??? wrong ???? ATS */
			ST(code & 7) = ST(0);
			fpop();
			return(0);
		case 0x98: /* fadd */
			fadd(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0x99: /* fmul */
			fmul(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0x9a: /* ???? , my manual don't list a direction bit
for fcom , ??? ATS */
			fcom(PST(code & 7),PST(0));
			return(0);
		case 0x9b: /* same as above , ATS */
			fcom(PST(code & 7),PST(0));
			fpop();
			return(0);
		case 0x9c: /* fsubr */
			ST(code & 7).exponent ^= 0x8000;
			fadd(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0x9d: /* fsub */
			real_to_real(&ST(0),&tmp);
			tmp.exponent ^= 0x8000;
			fadd(PST(code & 7),&tmp,&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0x9e: /* fdivr */
			fdiv(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0x9f: /* fdiv */
			fdiv(PST(code & 7),PST(0),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			return(0);
		case 0xb8: /* ffree */
			printf("ffree not implemented\n");
			math_abort(info,SIGILL);
		case 0xb9: /* fstp ???? where is the pop ? ATS */
			fxchg(&ST(0),&ST(code & 7));
			return(0);
		case 0xba: /* fst */
			ST(code & 7) = ST(0);
			return(0);
		case 0xbb: /* ????? encoding of fstp to mem ? ATS */
			ST(code & 7) = ST(0);
			fpop();
			return(0);
		case 0xbc: /* fucom */
			fucom(PST(code & 7),PST(0));
			return(0);
		case 0xbd: /* fucomp */
			fucom(PST(code & 7),PST(0));
			fpop();
			return(0);
		case 0xd8: /* faddp */
			fadd(PST(code & 7),PST(0),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xd9: /* fmulp */
			fmul(PST(code & 7),PST(0),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */
			fcom(PST(code & 7),PST(0));
			fpop();
			return(0);
		case 0xdc: /* fsubrp */
			ST(code & 7).exponent ^= 0x8000;
			fadd(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xdd: /* fsubp */
			real_to_real(&ST(0),&tmp);
			tmp.exponent ^= 0x8000;
			fadd(PST(code & 7),&tmp,&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xde: /* fdivrp */
			fdiv(PST(0),PST(code & 7),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xdf: /* fdivp */
			fdiv(PST(code & 7),PST(0),&tmp);
			real_to_real(&tmp,&ST(code & 7));
			fpop();
			return(0);
		case 0xf8: /* fild 16-bit mem ???? ATS */
			printf("ffree not implemented\n");
			math_abort(info,SIGILL);
			fpop();
			return(0);
		case 0xf9: /*  ????? ATS */
			fxchg(&ST(0),&ST(code & 7));
			return(0);
		case 0xfa: /* fist 16-bit mem ? ATS */
		case 0xfb: /* fistp 16-bit mem ? ATS */
			ST(code & 7) = ST(0);
			fpop();
			return(0);
	}
	switch ((code>>3) & 0xe7) {
		case 0x22:
			put_short_real(PST(0),info,code);
			return(0);
		case 0x23:
			put_short_real(PST(0),info,code);
			fpop();
			return(0);
		case 0x24:
			address = ea(info,code);
			for (code = 0 ; code < 7 ; code++) {
				((long *) & I387)[code] =
				   get_fs_long((unsigned long *) address);
				address += 4;
			}
			return(0);
		case 0x25:
			address = ea(info,code);
			*(unsigned short *) &I387.cwd =
				get_fs_word((unsigned short *) address);
			return(0);
		case 0x26:
			address = ea(info,code);
			/*verify_area(address,28);*/
			for (code = 0 ; code < 7 ; code++) {
				put_fs_long( ((long *) & I387)[code],
					(unsigned long *) address);
				address += 4;
			}
			return(0);
		case 0x27:
			address = ea(info,code);
			/*verify_area(address,2);*/
			put_fs_word(I387.cwd,(short *) address);
			return(0);
		case 0x62:
			put_long_int(PST(0),info,code);
			return(0);
		case 0x63:
			put_long_int(PST(0),info,code);
			fpop();
			return(0);
		case 0x65:
			fpush();
			get_temp_real(&tmp,info,code);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0x67:
			put_temp_real(PST(0),info,code);
			fpop();
			return(0);
		case 0xa2:
			put_long_real(PST(0),info,code);
			return(0);
		case 0xa3:
			put_long_real(PST(0),info,code);
			fpop();
			return(0);
		case 0xa4:
			address = ea(info,code);
			for (code = 0 ; code < 27 ; code++) {
				((long *) & I387)[code] =
				   get_fs_long((unsigned long *) address);
				address += 4;
			}
			return(0);
		case 0xa6:
			address = ea(info,code);
			/*verify_area(address,108);*/
			for (code = 0 ; code < 27 ; code++) {
				put_fs_long( ((long *) & I387)[code],
					(unsigned long *) address);
				address += 4;
			}
			I387.cwd = 0x037f;
			I387.swd = 0x0000;
			I387.twd = 0x0000;
			return(0);
		case 0xa7:
			address = ea(info,code);
			/*verify_area(address,2);*/
			put_fs_word(I387.swd,(short *) address);
			return(0);
		case 0xe2:
			put_short_int(PST(0),info,code);
			return(0);
		case 0xe3:
			put_short_int(PST(0),info,code);
			fpop();
			return(0);
		case 0xe4:
			fpush();
			get_BCD(&tmp,info,code);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0xe5:
			fpush();
			get_longlong_int(&tmp,info,code);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 0xe6:
			put_BCD(PST(0),info,code);
			fpop();
			return(0);
		case 0xe7:
			put_longlong_int(PST(0),info,code);
			fpop();
			return(0);
	}
	switch (code >> 9) {
		case 0:
			get_short_real(&tmp,info,code);
			break;
		case 1:
			get_long_int(&tmp,info,code);
			break;
		case 2:
			get_long_real(&tmp,info,code);
			break;
		case 4:
			get_short_int(&tmp,info,code);
	}
	switch ((code>>3) & 0x27) {
		case 0:
			fadd(&tmp,PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 1:
			fmul(&tmp,PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 2:
			fcom(&tmp,PST(0));
			return(0);
		case 3:
			fcom(&tmp,PST(0));
			fpop();
			return(0);
		case 4:
			tmp.exponent ^= 0x8000;
			fadd(&tmp,PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 5:
			ST(0).exponent ^= 0x8000;
			fadd(&tmp,PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 6:
			fdiv(PST(0),&tmp,&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
		case 7:
			fdiv(&tmp,PST(0),&tmp);
			real_to_real(&tmp,&ST(0));
			return(0);
	}
	if ((code & 0x138) == 0x100) {
			fpush();
			real_to_real(&tmp,&ST(0));
			return(0);
	}
	printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs,
		info->tf_eip,code);
	math_abort(info,SIGFPE);
}
コード例 #2
0
//---------------------------------------------------------
DVec& NDG2D::PoissonIPDGbc2D
(DVec& ubc, //[in]
 DVec& qbc  //[in]
)
//---------------------------------------------------------
{
  // function [OP] = PoissonIPDGbc2D()
  // Purpose: Set up the discrete Poisson matrix directly
  //          using LDG. The operator is set up in the weak form

  // build DG derivative matrices
  int max_OP = (K*Np*Np*(1+Nfaces));

  // initialize parameters
  DVec faceR("faceR"), faceS("faceS");
  DMat V1D("V1D"), Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)");
  IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM");
  double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0;
  int i=0,k1=0,f1=0,id=0;
  IVec i1_Nfp = Range(1,Nfp);
  double N1N1 = double((N+1)*(N+1));

  // build local face matrices
  DMat massEdge[4]; // = zeros(Np,Np,Nfaces);
  for (i=1; i<=Nfaces; ++i) {
    massEdge[i].resize(Np,Np);
  }

  // face mass matrix 1
  Fm = Fmask(All,1); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[1](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 2
  Fm = Fmask(All,2); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[2](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 3
  Fm = Fmask(All,3); faceS = s(Fm); 
  V1D = Vandermonde1D(N, faceS); 
  massEdge[3](Fm,Fm) = inv(V1D*trans(V1D));
 
  // build DG right hand side
  DVec* pBC = new DVec(Np*K, "bc", OBJ_temp); 
  DVec& bc = (*pBC);  // reference, for syntax
  ////////////////////////////////////////////////////////////////

  umMSG(1, "\n ==> {OP} assembly [bc]: ");
  for (k1=1; k1<=K; ++k1)
  {
    if (! (k1%100)) { umMSG(1, "%d, ",k1); }

    // rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(NGauss));

    // Build element-to-element parts of operator
    for (f1=1; f1<=Nfaces; ++f1)
    {
      if (BCType(k1,f1))
      {
        ////////////////////////added by Kevin ///////////////////////////////
        Fm1 = Fmask(All,f1); 
        fidM  = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp;
        id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces;

        lnx = nx(id); lny = ny(id); 
        lsJ = sJ(id); hinv = Fscale(id);

        Dx = rx(1,k1)*Dr + sx(1,k1)*Ds;  
        Dy = ry(1,k1)*Dr + sy(1,k1)*Ds;
        Dn1 = lnx*Dx + lny*Dy;

      //mmE = lsJ*massEdge(:,:,f1);
      //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM);

        mmE_Fm1 = massEdge[f1](All,Fm1);  mmE_Fm1 *= lsJ;

        gtau = 10*N1N1*hinv; // set penalty scaling
        //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM);

        switch(BCType(k1,f1)){
	  case BC_Dirichlet: 
            bc(Np*(k1-1)+Range(1,Np)) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1)*ubc(fidM);  
            break;
          case BC_Neuman:
            bc(Np*(k1-1)+Range(1,Np)) += mmE_Fm1*qbc(fidM);
            break;
	default:
	  std::cout<<"warning: boundary condition is incorrect"<<std::endl;
	}
      }
    }
  }
  return bc;
}
コード例 #3
0
//---------------------------------------------------------
void NDG3D::PoissonIPDG3D(CSd& spOP, CSd& spMM)
//---------------------------------------------------------
{
  // function [OP,MM] = PoissonIPDG3D()
  //
  // Purpose: Set up the discrete Poisson matrix directly
  //          using LDG. The operator is set up in the weak form


  DVec faceR("faceR"), faceS("faceS"), faceT("faceT");
  DMat V2D;  IVec Fm("Fm");  IVec i1_Nfp = Range(1,Nfp);
  double opti1=0.0, opti2=0.0; int i=0; 

  umLOG(1, "\n ==> {OP,MM} assembly: ");
  opti1 = timer.read(); // time assembly

  // build local face matrices
  DMat massEdge[5]; // = zeros(Np,Np,Nfaces);
  for (i=1; i<=Nfaces; ++i) {
    massEdge[i].resize(Np,Np);
  }

  // face mass matrix 1
  Fm = Fmask(All,1); faceR=r(Fm); faceS=s(Fm); 
  V2D = Vandermonde2D(N, faceR, faceS);
  massEdge[1](Fm,Fm) = inv(V2D*trans(V2D));

  // face mass matrix 2
  Fm = Fmask(All,2); faceR = r(Fm); faceT = t(Fm);
  V2D = Vandermonde2D(N, faceR, faceT);
  massEdge[2](Fm,Fm) = inv(V2D*trans(V2D));

  // face mass matrix 3
  Fm = Fmask(All,3); faceS = s(Fm); faceT = t(Fm);
  V2D = Vandermonde2D(N, faceS, faceT); 
  massEdge[3](Fm,Fm) = inv(V2D*trans(V2D));

  // face mass matrix 4
  Fm = Fmask(All,4); faceS = s(Fm); faceT = t(Fm);
  V2D = Vandermonde2D(N, faceS, faceT); 
  massEdge[4](Fm,Fm) = inv(V2D*trans(V2D));

  // build local volume mass matrix
  MassMatrix = trans(invV)*invV;

  DMat Dx("Dx"),Dy("Dy"),Dz("Dz"), Dx2("Dx2"),Dy2("Dy2"),Dz2("Dz2");
  DMat Dn1("Dn1"),Dn2("Dn2"), mmE("mmE"), OP11("OP11"), OP12("OP12");
  DMat mmE_All_Fm1, mmE_Fm1_Fm1, Dn2_Fm2_All;
  IMat rows1,cols1,rows2,cols2;  int k1=0,f1=0,k2=0,f2=0,id=0;
  Index1D entries, entriesMM, idsM;  IVec fidM,vidM,Fm1,vidP,Fm2;
  double lnx=0.0,lny=0.0,lnz=0.0,lsJ=0.0,hinv=0.0,gtau=0.0;
  double N1N1 = double((N+1)*(N+1)); int NpNp = Np*Np;

  // build DG derivative matrices
  int max_OP = (K*Np*Np*(1+Nfaces));
  int max_MM = (K*Np*Np);

  // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax}
  IVec OPi(max_OP), OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax;
  // "MM" triplets (i,j,x)
  IVec MMi(max_MM), MMj(max_MM); DVec MMx(max_MM);
  IVec OnesNp = Ones(Np);

  // global node numbering
  entries.reset(1,NpNp); entriesMM.reset(1,NpNp);

  OP12.resize(Np,Np);

  for (k1=1; k1<=K; ++k1)
  {
    if (! (k1%250)) { umLOG(1, "%d, ",k1); }

    rows1 = outer( Range((k1-1)*Np+1,k1*Np), OnesNp );
    cols1 = trans(rows1);

    // Build local operators  
    Dx = rx(1,k1)*Dr + sx(1,k1)*Ds + tx(1,k1)*Dt;   
    Dy = ry(1,k1)*Dr + sy(1,k1)*Ds + ty(1,k1)*Dt;
    Dz = rz(1,k1)*Dr + sz(1,k1)*Ds + tz(1,k1)*Dt;

    OP11 = J(1,k1)*(trans(Dx)*MassMatrix*Dx + 
                    trans(Dy)*MassMatrix*Dy + 
                    trans(Dz)*MassMatrix*Dz);

    // Build element-to-element parts of operator
    for (f1=1; f1<=Nfaces; ++f1) {
      k2 = EToE(k1,f1); f2 = EToF(k1,f1); 

      rows2 = outer( Range((k2-1)*Np+1, k2*Np), OnesNp );
      cols2 = trans(rows2);

      fidM  = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp;
      vidM = vmapM(fidM); Fm1 = mod(vidM-1,Np)+1;
      vidP = vmapP(fidM); Fm2 = mod(vidP-1,Np)+1;

      id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces;
      lnx = nx(id);  lny = ny(id);  lnz = nz(id); lsJ = sJ(id); 
      hinv = std::max(Fscale(id), Fscale(1+(f2-1)*Nfp, k2));    

      Dx2 = rx(1,k2)*Dr + sx(1,k2)*Ds + tx(1,k2)*Dt;   
      Dy2 = ry(1,k2)*Dr + sy(1,k2)*Ds + ty(1,k2)*Dt;
      Dz2 = rz(1,k2)*Dr + sz(1,k2)*Ds + tz(1,k2)*Dt;
      
      Dn1 = lnx*Dx  + lny*Dy  + lnz*Dz;
      Dn2 = lnx*Dx2 + lny*Dy2 + lnz*Dz2;

      mmE = lsJ*massEdge[f1];

      gtau = 2.0 * N1N1 * hinv; // set penalty scaling

      if (EToE(k1,f1)==k1) {
        OP11 += ( gtau*mmE - mmE*Dn1 - trans(Dn1)*mmE ); // ok
      }
      else 
      {
        // interior face variational terms
        OP11 += 0.5*( gtau*mmE - mmE*Dn1 - trans(Dn1)*mmE );

        // extract mapped regions:
        mmE_All_Fm1 = mmE(All,Fm1);
        mmE_Fm1_Fm1 = mmE(Fm1,Fm1);
        Dn2_Fm2_All = Dn2(Fm2,All);

        OP12 = 0.0;   // reset to zero
        OP12(All,Fm2)  = -0.5*(       gtau*mmE_All_Fm1 );
        OP12(Fm1,All) -=  0.5*(            mmE_Fm1_Fm1*Dn2_Fm2_All );
      //OP12(All,Fm2) -=  0.5*(-trans(Dn1)*mmE_All_Fm1 );
        OP12(All,Fm2) +=  0.5*( trans(Dn1)*mmE_All_Fm1 );

        // load this set of triplets
#if (1)
        OPi(entries)=rows1; OPj(entries)=cols2, OPx(entries)=OP12;
        entries += (NpNp);
#else
        //###########################################################
        // load only the lower triangle (after droptol test?)
        sk=0; start=entries(1);
        for (int i=1; i<=NpNp; ++i) {
          eid = start+i;
          id=entries(eid); rid=rows1(i); cid=cols2(i);
          if (rows1(rid) >= cid) {          // take lower triangle
            if ( fabs(OP12(id)) > 1e-15) {  // drop small entries
              ++sk; OPi(id)=rid; OPj(id)=cid, OPx(id)=OP12(id);
            }
          }
        }
        entries += sk;
        //###########################################################
#endif
      }
    }

    OPi(entries  )=rows1; OPj(entries  )=cols1, OPx(entries  )=OP11;
    MMi(entriesMM)=rows1; MMj(entriesMM)=cols1; MMx(entriesMM)=J(1,k1)*MassMatrix;
    entries += (NpNp); entriesMM += (NpNp);
  }
  umLOG(1, "\n ==> {OP,MM} to sparse\n");

  entries.reset(1, entries.hi()-Np*Np);

  // Extract triplets from the large buffers. Note: this 
  // requires copying each array, and since these arrays 
  // can be HUGE(!), we force immediate deallocation:

  Ai=OPi(entries);  OPi.Free();
  Aj=OPj(entries);  OPj.Free();
  Ax=OPx(entries);  OPx.Free();
  umLOG(1, " ==> triplets ready (OP) nnz = %10d\n", entries.hi());

  // adjust triplet indices for 0-based sparse operators
  Ai -= 1; Aj -= 1; MMi -= 1; MMj -= 1;  int npk=Np*K;

#if defined(NDG_USE_CHOLMOD) || defined(NDG_New_CHOLINC)
  // load only the lower triangle tril(OP)        free args?
  spOP.load(npk,npk, Ai,Aj,Ax, sp_LT, false,1e-15, true);  // {LT, false} -> TriL
#else
  // select {upper,lower,both} triangles
//spOP.load(npk,npk, Ai,Aj,Ax, sp_LT, true,1e-15,true);   // LT -> enforce symmetry
//spOP.load(npk,npk, Ai,Aj,Ax, sp_All,true,1e-15,true);   // All-> includes "noise"
//spOP.load(npk,npk, Ai,Aj,Ax, sp_UT, false,1e-15,true);  // UT -> triu(OP) only
#endif

  Ai.Free();  Aj.Free();  Ax.Free();

  umLOG(1, " ==> triplets ready (MM) nnz = %10d\n", entriesMM.hi());

  //-------------------------------------------------------
  // The mass matrix operator will NOT be factorised, 
  // Load ALL elements (both upper and lower triangles):
  //-------------------------------------------------------
  spMM.load(npk,npk, MMi,MMj,MMx, sp_All,false,1.00e-15,true);
  MMi.Free(); MMj.Free(); MMx.Free();

  opti2 = timer.read(); // time assembly
  umLOG(1, " ==> {OP,MM} converted to csc.  (%g secs)\n", opti2-opti1);
}
コード例 #4
0
void NDG2D::PoissonIPDGbc2D(
  CSd& spOP //[out] sparse operator 
  )
{
  // function [OP] = PoissonIPDGbc2D()
  // Purpose: Set up the discrete Poisson matrix directly
  //          using LDG. The operator is set up in the weak form

  // build DG derivative matrices
  int max_OP = (K*Np*Np*(1+Nfaces));

  //initialize parameters
  DVec faceR("faceR"), faceS("faceS");
  IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM");
  DMat V1D("V1D"); int i=0;

  // build local face matrices
  DMat massEdge[4]; // = zeros(Np,Np,Nfaces);
  for (i=1; i<=Nfaces; ++i) {
    massEdge[i].resize(Np,Np);
  }

  // face mass matrix 1
  Fm = Fmask(All,1); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[1](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 2
  Fm = Fmask(All,2); faceR = r(Fm); 
  V1D = Vandermonde1D(N, faceR);
  massEdge[2](Fm,Fm) = inv(V1D*trans(V1D));

  // face mass matrix 3
  Fm = Fmask(All,3); faceS = s(Fm); 
  V1D = Vandermonde1D(N, faceS); 
  massEdge[3](Fm,Fm) = inv(V1D*trans(V1D));

  //continue initialize parameters
  DMat Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)");
  double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0;
  int k1=0,f1=0,id=0;
  IVec i1_Nfp = Range(1,Nfp);
  double N1N1 = double((N+1)*(N+1));
  
  // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax}
  IVec OPi(max_OP),OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax;
  IMat rows1, cols1;  Index1D entries; DMat OP11(Np,Nfp, 0.0);

  // global node numbering
  entries.reset(1,Np*Nfp); 
  cols1 = outer(Ones(Np), Range(1,Nfp));

  umMSG(1, "\n ==> {OP} assembly [bc]: ");
  for (k1=1; k1<=K; ++k1)
  {
    if (! (k1%100)) { umMSG(1, "%d, ",k1); }
    rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(Nfp));

    // Build element-to-element parts of operator
    for (f1=1; f1<=Nfaces; ++f1)
    {
      if (BCType(k1,f1))
      {   
        ////////////////////////added by Kevin ///////////////////////////////
        Fm1 = Fmask(All,f1); 
        fidM  = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp;
        id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces;

        lnx = nx(id); lny = ny(id); 
        lsJ = sJ(id); hinv = Fscale(id);

        Dx = rx(1,k1)*Dr + sx(1,k1)*Ds;  
        Dy = ry(1,k1)*Dr + sy(1,k1)*Ds;
        Dn1 = lnx*Dx + lny*Dy;

      //mmE = lsJ*massEdge(:,:,f1);
      //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM);

        mmE_Fm1 = massEdge[f1](All,Fm1);  mmE_Fm1 *= lsJ;

        gtau = 10*N1N1*hinv; // set penalty scaling
        //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM);

        switch(BCType(k1,f1)){
	  case BC_Dirichlet: 
            OP11 = gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1;  
            break;
          case BC_Neuman:
            OP11 = mmE_Fm1;
            break;
	default:
	  std::cout<<"warning: boundary condition is incorrect"<<std::endl;
	}

        OPi(entries)=rows1; OPj(entries)=cols1; OPx(entries)=OP11; 
        entries += (Np*Nfp);
      }
      cols1 += Nfp;
    }
  }

  umMSG(1, "\n ==> {OPbc} to sparse\n");
  entries.reset(1, entries.hi()-(Np*Nfp));

  // extract triplets from large buffers
  Ai=OPi(entries); Aj=OPj(entries); Ax=OPx(entries);

  // These arrays can be HUGE, so force deallocation
  OPi.Free(); OPj.Free(); OPx.Free();

  // return 0-based sparse result
  Ai -= 1; Aj -= 1;

  //-------------------------------------------------------
  // This operator is not symmetric, and will NOT be 
  // factorised, only used to create reference RHS's:
  //
  //    refrhsbcPR = spOP1 * bcPR;
  //    refrhsbcUx = spOP2 * bcUx;
  //    refrhsbcUy = spOP2 * bcUy;
  //
  // Load ALL elements (both upper and lower triangles):
  //-------------------------------------------------------
  spOP.load(Np*K, Nfp*Nfaces*K, Ai,Aj,Ax, sp_All,false, 1e-15,true);

  Ai.Free();  Aj.Free();  Ax.Free();
  umMSG(1, " ==> {OPbc} ready.\n");

#if (1)
  // check on original estimates for nnx
  umMSG(1, " ==> max_OP: %12d\n", max_OP);
  umMSG(1, " ==> nnz_OP: %12d\n", entries.hi());
#endif
}