static int math_emulate(struct trapframe * info) { unsigned short code; temp_real tmp; char * address; u_long oldeip; /* ever used fp? */ if ((((struct pcb *)curproc->p_addr)->pcb_flags & FP_SOFTFP) == 0) { ((struct pcb *)curproc->p_addr)->pcb_flags |= FP_SOFTFP; I387.cwd = 0x037f; I387.swd = 0x0000; I387.twd = 0x0000; } if (I387.cwd & I387.swd & 0x3f) I387.swd |= 0x8000; else I387.swd &= 0x7fff; oldeip = info->tf_eip; /* 0x001f means user code space */ if ((u_short)info->tf_cs != 0x001F) { printf("math_emulate: %04x:%08lx\n", (u_short)info->tf_cs, oldeip); panic("?Math emulation needed in kernel?"); } code = get_fs_word((unsigned short *) oldeip); bswapw(code); code &= 0x7ff; I387.fip = oldeip; *(unsigned short *) &I387.fcs = (u_short) info->tf_cs; *(1+(unsigned short *) &I387.fcs) = code; info->tf_eip += 2; switch (code) { case 0x1d0: /* fnop */ return(0); case 0x1d1: case 0x1d2: case 0x1d3: /* fst to 32-bit mem */ case 0x1d4: case 0x1d5: case 0x1d6: case 0x1d7: math_abort(info,SIGILL); case 0x1e0: /* fchs */ ST(0).exponent ^= 0x8000; return(0); case 0x1e1: /* fabs */ ST(0).exponent &= 0x7fff; return(0); case 0x1e2: case 0x1e3: math_abort(info,SIGILL); case 0x1e4: /* ftst */ ftst(PST(0)); return(0); case 0x1e5: /* fxam */ printf("fxam not implemented\n"); math_abort(info,SIGILL); case 0x1e6: case 0x1e7: /* fldenv */ math_abort(info,SIGILL); case 0x1e8: /* fld1 */ fpush(); ST(0) = CONST1; return(0); case 0x1e9: /* fld2t */ fpush(); ST(0) = CONSTL2T; return(0); case 0x1ea: /* fld2e */ fpush(); ST(0) = CONSTL2E; return(0); case 0x1eb: /* fldpi */ fpush(); ST(0) = CONSTPI; return(0); case 0x1ec: /* fldlg2 */ fpush(); ST(0) = CONSTLG2; return(0); case 0x1ed: /* fldln2 */ fpush(); ST(0) = CONSTLN2; return(0); case 0x1ee: /* fldz */ fpush(); ST(0) = CONSTZ; return(0); case 0x1ef: math_abort(info,SIGILL); case 0x1f0: /* f2xm1 */ case 0x1f1: /* fyl2x */ case 0x1f2: /* fptan */ case 0x1f3: /* fpatan */ case 0x1f4: /* fxtract */ case 0x1f5: /* fprem1 */ case 0x1f6: /* fdecstp */ case 0x1f7: /* fincstp */ case 0x1f8: /* fprem */ case 0x1f9: /* fyl2xp1 */ case 0x1fa: /* fsqrt */ case 0x1fb: /* fsincos */ case 0x1fe: /* fsin */ case 0x1ff: /* fcos */ uprintf( "math_emulate: instruction %04x not implemented\n", code + 0xd800); math_abort(info,SIGILL); case 0x1fc: /* frndint */ frndint(PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x1fd: /* fscale */ /* incomplete and totally inadequate -wfj */ Fscale(PST(0), PST(1), &tmp); real_to_real(&tmp,&ST(0)); return(0); /* 19 Sep 92*/ case 0x2e9: /* ????? */ /* if this should be a fucomp ST(0),ST(1) , it must be a 0x3e9 ATS */ fucom(PST(1),PST(0)); fpop(); fpop(); return(0); case 0x3d0: case 0x3d1: /* fist ?? */ return(0); case 0x3e2: /* fclex */ I387.swd &= 0x7f00; return(0); case 0x3e3: /* fninit */ I387.cwd = 0x037f; I387.swd = 0x0000; I387.twd = 0x0000; return(0); case 0x3e4: return(0); case 0x6d9: /* fcompp */ fcom(PST(1),PST(0)); fpop(); fpop(); return(0); case 0x7e0: /* fstsw ax */ *(short *) &info->tf_eax = I387.swd; return(0); } switch (code >> 3) { case 0x18: /* fadd */ fadd(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x19: /* fmul */ fmul(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x1a: /* fcom */ fcom(PST(code & 7),PST(0)); return(0); case 0x1b: /* fcomp */ fcom(PST(code & 7),PST(0)); fpop(); return(0); case 0x1c: /* fsubr */ real_to_real(&ST(code & 7),&tmp); tmp.exponent ^= 0x8000; fadd(PST(0),&tmp,&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x1d: /* fsub */ ST(0).exponent ^= 0x8000; fadd(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x1e: /* fdivr */ fdiv(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x1f: /* fdiv */ fdiv(PST(code & 7),PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 0x38: /* fld */ fpush(); ST(0) = ST((code & 7)+1); /* why plus 1 ????? ATS */ return(0); case 0x39: /* fxch */ fxchg(&ST(0),&ST(code & 7)); return(0); case 0x3b: /* ??? ??? wrong ???? ATS */ ST(code & 7) = ST(0); fpop(); return(0); case 0x98: /* fadd */ fadd(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0x99: /* fmul */ fmul(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0x9a: /* ???? , my manual don't list a direction bit for fcom , ??? ATS */ fcom(PST(code & 7),PST(0)); return(0); case 0x9b: /* same as above , ATS */ fcom(PST(code & 7),PST(0)); fpop(); return(0); case 0x9c: /* fsubr */ ST(code & 7).exponent ^= 0x8000; fadd(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0x9d: /* fsub */ real_to_real(&ST(0),&tmp); tmp.exponent ^= 0x8000; fadd(PST(code & 7),&tmp,&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0x9e: /* fdivr */ fdiv(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0x9f: /* fdiv */ fdiv(PST(code & 7),PST(0),&tmp); real_to_real(&tmp,&ST(code & 7)); return(0); case 0xb8: /* ffree */ printf("ffree not implemented\n"); math_abort(info,SIGILL); case 0xb9: /* fstp ???? where is the pop ? ATS */ fxchg(&ST(0),&ST(code & 7)); return(0); case 0xba: /* fst */ ST(code & 7) = ST(0); return(0); case 0xbb: /* ????? encoding of fstp to mem ? ATS */ ST(code & 7) = ST(0); fpop(); return(0); case 0xbc: /* fucom */ fucom(PST(code & 7),PST(0)); return(0); case 0xbd: /* fucomp */ fucom(PST(code & 7),PST(0)); fpop(); return(0); case 0xd8: /* faddp */ fadd(PST(code & 7),PST(0),&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xd9: /* fmulp */ fmul(PST(code & 7),PST(0),&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xda: /* ??? encoding of ficom with 16 bit mem ? ATS */ fcom(PST(code & 7),PST(0)); fpop(); return(0); case 0xdc: /* fsubrp */ ST(code & 7).exponent ^= 0x8000; fadd(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xdd: /* fsubp */ real_to_real(&ST(0),&tmp); tmp.exponent ^= 0x8000; fadd(PST(code & 7),&tmp,&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xde: /* fdivrp */ fdiv(PST(0),PST(code & 7),&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xdf: /* fdivp */ fdiv(PST(code & 7),PST(0),&tmp); real_to_real(&tmp,&ST(code & 7)); fpop(); return(0); case 0xf8: /* fild 16-bit mem ???? ATS */ printf("ffree not implemented\n"); math_abort(info,SIGILL); fpop(); return(0); case 0xf9: /* ????? ATS */ fxchg(&ST(0),&ST(code & 7)); return(0); case 0xfa: /* fist 16-bit mem ? ATS */ case 0xfb: /* fistp 16-bit mem ? ATS */ ST(code & 7) = ST(0); fpop(); return(0); } switch ((code>>3) & 0xe7) { case 0x22: put_short_real(PST(0),info,code); return(0); case 0x23: put_short_real(PST(0),info,code); fpop(); return(0); case 0x24: address = ea(info,code); for (code = 0 ; code < 7 ; code++) { ((long *) & I387)[code] = get_fs_long((unsigned long *) address); address += 4; } return(0); case 0x25: address = ea(info,code); *(unsigned short *) &I387.cwd = get_fs_word((unsigned short *) address); return(0); case 0x26: address = ea(info,code); /*verify_area(address,28);*/ for (code = 0 ; code < 7 ; code++) { put_fs_long( ((long *) & I387)[code], (unsigned long *) address); address += 4; } return(0); case 0x27: address = ea(info,code); /*verify_area(address,2);*/ put_fs_word(I387.cwd,(short *) address); return(0); case 0x62: put_long_int(PST(0),info,code); return(0); case 0x63: put_long_int(PST(0),info,code); fpop(); return(0); case 0x65: fpush(); get_temp_real(&tmp,info,code); real_to_real(&tmp,&ST(0)); return(0); case 0x67: put_temp_real(PST(0),info,code); fpop(); return(0); case 0xa2: put_long_real(PST(0),info,code); return(0); case 0xa3: put_long_real(PST(0),info,code); fpop(); return(0); case 0xa4: address = ea(info,code); for (code = 0 ; code < 27 ; code++) { ((long *) & I387)[code] = get_fs_long((unsigned long *) address); address += 4; } return(0); case 0xa6: address = ea(info,code); /*verify_area(address,108);*/ for (code = 0 ; code < 27 ; code++) { put_fs_long( ((long *) & I387)[code], (unsigned long *) address); address += 4; } I387.cwd = 0x037f; I387.swd = 0x0000; I387.twd = 0x0000; return(0); case 0xa7: address = ea(info,code); /*verify_area(address,2);*/ put_fs_word(I387.swd,(short *) address); return(0); case 0xe2: put_short_int(PST(0),info,code); return(0); case 0xe3: put_short_int(PST(0),info,code); fpop(); return(0); case 0xe4: fpush(); get_BCD(&tmp,info,code); real_to_real(&tmp,&ST(0)); return(0); case 0xe5: fpush(); get_longlong_int(&tmp,info,code); real_to_real(&tmp,&ST(0)); return(0); case 0xe6: put_BCD(PST(0),info,code); fpop(); return(0); case 0xe7: put_longlong_int(PST(0),info,code); fpop(); return(0); } switch (code >> 9) { case 0: get_short_real(&tmp,info,code); break; case 1: get_long_int(&tmp,info,code); break; case 2: get_long_real(&tmp,info,code); break; case 4: get_short_int(&tmp,info,code); } switch ((code>>3) & 0x27) { case 0: fadd(&tmp,PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 1: fmul(&tmp,PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 2: fcom(&tmp,PST(0)); return(0); case 3: fcom(&tmp,PST(0)); fpop(); return(0); case 4: tmp.exponent ^= 0x8000; fadd(&tmp,PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 5: ST(0).exponent ^= 0x8000; fadd(&tmp,PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); case 6: fdiv(PST(0),&tmp,&tmp); real_to_real(&tmp,&ST(0)); return(0); case 7: fdiv(&tmp,PST(0),&tmp); real_to_real(&tmp,&ST(0)); return(0); } if ((code & 0x138) == 0x100) { fpush(); real_to_real(&tmp,&ST(0)); return(0); } printf("Unknown math-insns: %04x:%08x %04x\n",(u_short)info->tf_cs, info->tf_eip,code); math_abort(info,SIGFPE); }
//--------------------------------------------------------- DVec& NDG2D::PoissonIPDGbc2D (DVec& ubc, //[in] DVec& qbc //[in] ) //--------------------------------------------------------- { // function [OP] = PoissonIPDGbc2D() // Purpose: Set up the discrete Poisson matrix directly // using LDG. The operator is set up in the weak form // build DG derivative matrices int max_OP = (K*Np*Np*(1+Nfaces)); // initialize parameters DVec faceR("faceR"), faceS("faceS"); DMat V1D("V1D"), Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)"); IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM"); double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0; int i=0,k1=0,f1=0,id=0; IVec i1_Nfp = Range(1,Nfp); double N1N1 = double((N+1)*(N+1)); // build local face matrices DMat massEdge[4]; // = zeros(Np,Np,Nfaces); for (i=1; i<=Nfaces; ++i) { massEdge[i].resize(Np,Np); } // face mass matrix 1 Fm = Fmask(All,1); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[1](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 2 Fm = Fmask(All,2); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[2](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 3 Fm = Fmask(All,3); faceS = s(Fm); V1D = Vandermonde1D(N, faceS); massEdge[3](Fm,Fm) = inv(V1D*trans(V1D)); // build DG right hand side DVec* pBC = new DVec(Np*K, "bc", OBJ_temp); DVec& bc = (*pBC); // reference, for syntax //////////////////////////////////////////////////////////////// umMSG(1, "\n ==> {OP} assembly [bc]: "); for (k1=1; k1<=K; ++k1) { if (! (k1%100)) { umMSG(1, "%d, ",k1); } // rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(NGauss)); // Build element-to-element parts of operator for (f1=1; f1<=Nfaces; ++f1) { if (BCType(k1,f1)) { ////////////////////////added by Kevin /////////////////////////////// Fm1 = Fmask(All,f1); fidM = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp; id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces; lnx = nx(id); lny = ny(id); lsJ = sJ(id); hinv = Fscale(id); Dx = rx(1,k1)*Dr + sx(1,k1)*Ds; Dy = ry(1,k1)*Dr + sy(1,k1)*Ds; Dn1 = lnx*Dx + lny*Dy; //mmE = lsJ*massEdge(:,:,f1); //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM); mmE_Fm1 = massEdge[f1](All,Fm1); mmE_Fm1 *= lsJ; gtau = 10*N1N1*hinv; // set penalty scaling //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM); switch(BCType(k1,f1)){ case BC_Dirichlet: bc(Np*(k1-1)+Range(1,Np)) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1)*ubc(fidM); break; case BC_Neuman: bc(Np*(k1-1)+Range(1,Np)) += mmE_Fm1*qbc(fidM); break; default: std::cout<<"warning: boundary condition is incorrect"<<std::endl; } } } } return bc; }
//--------------------------------------------------------- void NDG3D::PoissonIPDG3D(CSd& spOP, CSd& spMM) //--------------------------------------------------------- { // function [OP,MM] = PoissonIPDG3D() // // Purpose: Set up the discrete Poisson matrix directly // using LDG. The operator is set up in the weak form DVec faceR("faceR"), faceS("faceS"), faceT("faceT"); DMat V2D; IVec Fm("Fm"); IVec i1_Nfp = Range(1,Nfp); double opti1=0.0, opti2=0.0; int i=0; umLOG(1, "\n ==> {OP,MM} assembly: "); opti1 = timer.read(); // time assembly // build local face matrices DMat massEdge[5]; // = zeros(Np,Np,Nfaces); for (i=1; i<=Nfaces; ++i) { massEdge[i].resize(Np,Np); } // face mass matrix 1 Fm = Fmask(All,1); faceR=r(Fm); faceS=s(Fm); V2D = Vandermonde2D(N, faceR, faceS); massEdge[1](Fm,Fm) = inv(V2D*trans(V2D)); // face mass matrix 2 Fm = Fmask(All,2); faceR = r(Fm); faceT = t(Fm); V2D = Vandermonde2D(N, faceR, faceT); massEdge[2](Fm,Fm) = inv(V2D*trans(V2D)); // face mass matrix 3 Fm = Fmask(All,3); faceS = s(Fm); faceT = t(Fm); V2D = Vandermonde2D(N, faceS, faceT); massEdge[3](Fm,Fm) = inv(V2D*trans(V2D)); // face mass matrix 4 Fm = Fmask(All,4); faceS = s(Fm); faceT = t(Fm); V2D = Vandermonde2D(N, faceS, faceT); massEdge[4](Fm,Fm) = inv(V2D*trans(V2D)); // build local volume mass matrix MassMatrix = trans(invV)*invV; DMat Dx("Dx"),Dy("Dy"),Dz("Dz"), Dx2("Dx2"),Dy2("Dy2"),Dz2("Dz2"); DMat Dn1("Dn1"),Dn2("Dn2"), mmE("mmE"), OP11("OP11"), OP12("OP12"); DMat mmE_All_Fm1, mmE_Fm1_Fm1, Dn2_Fm2_All; IMat rows1,cols1,rows2,cols2; int k1=0,f1=0,k2=0,f2=0,id=0; Index1D entries, entriesMM, idsM; IVec fidM,vidM,Fm1,vidP,Fm2; double lnx=0.0,lny=0.0,lnz=0.0,lsJ=0.0,hinv=0.0,gtau=0.0; double N1N1 = double((N+1)*(N+1)); int NpNp = Np*Np; // build DG derivative matrices int max_OP = (K*Np*Np*(1+Nfaces)); int max_MM = (K*Np*Np); // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax} IVec OPi(max_OP), OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax; // "MM" triplets (i,j,x) IVec MMi(max_MM), MMj(max_MM); DVec MMx(max_MM); IVec OnesNp = Ones(Np); // global node numbering entries.reset(1,NpNp); entriesMM.reset(1,NpNp); OP12.resize(Np,Np); for (k1=1; k1<=K; ++k1) { if (! (k1%250)) { umLOG(1, "%d, ",k1); } rows1 = outer( Range((k1-1)*Np+1,k1*Np), OnesNp ); cols1 = trans(rows1); // Build local operators Dx = rx(1,k1)*Dr + sx(1,k1)*Ds + tx(1,k1)*Dt; Dy = ry(1,k1)*Dr + sy(1,k1)*Ds + ty(1,k1)*Dt; Dz = rz(1,k1)*Dr + sz(1,k1)*Ds + tz(1,k1)*Dt; OP11 = J(1,k1)*(trans(Dx)*MassMatrix*Dx + trans(Dy)*MassMatrix*Dy + trans(Dz)*MassMatrix*Dz); // Build element-to-element parts of operator for (f1=1; f1<=Nfaces; ++f1) { k2 = EToE(k1,f1); f2 = EToF(k1,f1); rows2 = outer( Range((k2-1)*Np+1, k2*Np), OnesNp ); cols2 = trans(rows2); fidM = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp; vidM = vmapM(fidM); Fm1 = mod(vidM-1,Np)+1; vidP = vmapP(fidM); Fm2 = mod(vidP-1,Np)+1; id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces; lnx = nx(id); lny = ny(id); lnz = nz(id); lsJ = sJ(id); hinv = std::max(Fscale(id), Fscale(1+(f2-1)*Nfp, k2)); Dx2 = rx(1,k2)*Dr + sx(1,k2)*Ds + tx(1,k2)*Dt; Dy2 = ry(1,k2)*Dr + sy(1,k2)*Ds + ty(1,k2)*Dt; Dz2 = rz(1,k2)*Dr + sz(1,k2)*Ds + tz(1,k2)*Dt; Dn1 = lnx*Dx + lny*Dy + lnz*Dz; Dn2 = lnx*Dx2 + lny*Dy2 + lnz*Dz2; mmE = lsJ*massEdge[f1]; gtau = 2.0 * N1N1 * hinv; // set penalty scaling if (EToE(k1,f1)==k1) { OP11 += ( gtau*mmE - mmE*Dn1 - trans(Dn1)*mmE ); // ok } else { // interior face variational terms OP11 += 0.5*( gtau*mmE - mmE*Dn1 - trans(Dn1)*mmE ); // extract mapped regions: mmE_All_Fm1 = mmE(All,Fm1); mmE_Fm1_Fm1 = mmE(Fm1,Fm1); Dn2_Fm2_All = Dn2(Fm2,All); OP12 = 0.0; // reset to zero OP12(All,Fm2) = -0.5*( gtau*mmE_All_Fm1 ); OP12(Fm1,All) -= 0.5*( mmE_Fm1_Fm1*Dn2_Fm2_All ); //OP12(All,Fm2) -= 0.5*(-trans(Dn1)*mmE_All_Fm1 ); OP12(All,Fm2) += 0.5*( trans(Dn1)*mmE_All_Fm1 ); // load this set of triplets #if (1) OPi(entries)=rows1; OPj(entries)=cols2, OPx(entries)=OP12; entries += (NpNp); #else //########################################################### // load only the lower triangle (after droptol test?) sk=0; start=entries(1); for (int i=1; i<=NpNp; ++i) { eid = start+i; id=entries(eid); rid=rows1(i); cid=cols2(i); if (rows1(rid) >= cid) { // take lower triangle if ( fabs(OP12(id)) > 1e-15) { // drop small entries ++sk; OPi(id)=rid; OPj(id)=cid, OPx(id)=OP12(id); } } } entries += sk; //########################################################### #endif } } OPi(entries )=rows1; OPj(entries )=cols1, OPx(entries )=OP11; MMi(entriesMM)=rows1; MMj(entriesMM)=cols1; MMx(entriesMM)=J(1,k1)*MassMatrix; entries += (NpNp); entriesMM += (NpNp); } umLOG(1, "\n ==> {OP,MM} to sparse\n"); entries.reset(1, entries.hi()-Np*Np); // Extract triplets from the large buffers. Note: this // requires copying each array, and since these arrays // can be HUGE(!), we force immediate deallocation: Ai=OPi(entries); OPi.Free(); Aj=OPj(entries); OPj.Free(); Ax=OPx(entries); OPx.Free(); umLOG(1, " ==> triplets ready (OP) nnz = %10d\n", entries.hi()); // adjust triplet indices for 0-based sparse operators Ai -= 1; Aj -= 1; MMi -= 1; MMj -= 1; int npk=Np*K; #if defined(NDG_USE_CHOLMOD) || defined(NDG_New_CHOLINC) // load only the lower triangle tril(OP) free args? spOP.load(npk,npk, Ai,Aj,Ax, sp_LT, false,1e-15, true); // {LT, false} -> TriL #else // select {upper,lower,both} triangles //spOP.load(npk,npk, Ai,Aj,Ax, sp_LT, true,1e-15,true); // LT -> enforce symmetry //spOP.load(npk,npk, Ai,Aj,Ax, sp_All,true,1e-15,true); // All-> includes "noise" //spOP.load(npk,npk, Ai,Aj,Ax, sp_UT, false,1e-15,true); // UT -> triu(OP) only #endif Ai.Free(); Aj.Free(); Ax.Free(); umLOG(1, " ==> triplets ready (MM) nnz = %10d\n", entriesMM.hi()); //------------------------------------------------------- // The mass matrix operator will NOT be factorised, // Load ALL elements (both upper and lower triangles): //------------------------------------------------------- spMM.load(npk,npk, MMi,MMj,MMx, sp_All,false,1.00e-15,true); MMi.Free(); MMj.Free(); MMx.Free(); opti2 = timer.read(); // time assembly umLOG(1, " ==> {OP,MM} converted to csc. (%g secs)\n", opti2-opti1); }
void NDG2D::PoissonIPDGbc2D( CSd& spOP //[out] sparse operator ) { // function [OP] = PoissonIPDGbc2D() // Purpose: Set up the discrete Poisson matrix directly // using LDG. The operator is set up in the weak form // build DG derivative matrices int max_OP = (K*Np*Np*(1+Nfaces)); //initialize parameters DVec faceR("faceR"), faceS("faceS"); IVec Fm("Fm"), Fm1("Fm1"), fidM("fidM"); DMat V1D("V1D"); int i=0; // build local face matrices DMat massEdge[4]; // = zeros(Np,Np,Nfaces); for (i=1; i<=Nfaces; ++i) { massEdge[i].resize(Np,Np); } // face mass matrix 1 Fm = Fmask(All,1); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[1](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 2 Fm = Fmask(All,2); faceR = r(Fm); V1D = Vandermonde1D(N, faceR); massEdge[2](Fm,Fm) = inv(V1D*trans(V1D)); // face mass matrix 3 Fm = Fmask(All,3); faceS = s(Fm); V1D = Vandermonde1D(N, faceS); massEdge[3](Fm,Fm) = inv(V1D*trans(V1D)); //continue initialize parameters DMat Dx("Dx"),Dy("Dy"), Dn1("Dn1"), mmE_Fm1("mmE(:,Fm1)"); double lnx=0.0,lny=0.0,lsJ=0.0,hinv=0.0,gtau=0.0; int k1=0,f1=0,id=0; IVec i1_Nfp = Range(1,Nfp); double N1N1 = double((N+1)*(N+1)); // "OP" triplets (i,j,x), extracted to {Ai,Aj,Ax} IVec OPi(max_OP),OPj(max_OP), Ai,Aj; DVec OPx(max_OP), Ax; IMat rows1, cols1; Index1D entries; DMat OP11(Np,Nfp, 0.0); // global node numbering entries.reset(1,Np*Nfp); cols1 = outer(Ones(Np), Range(1,Nfp)); umMSG(1, "\n ==> {OP} assembly [bc]: "); for (k1=1; k1<=K; ++k1) { if (! (k1%100)) { umMSG(1, "%d, ",k1); } rows1 = outer(Range((k1-1)*Np+1,k1*Np), Ones(Nfp)); // Build element-to-element parts of operator for (f1=1; f1<=Nfaces; ++f1) { if (BCType(k1,f1)) { ////////////////////////added by Kevin /////////////////////////////// Fm1 = Fmask(All,f1); fidM = (k1-1)*Nfp*Nfaces + (f1-1)*Nfp + i1_Nfp; id = 1+(f1-1)*Nfp + (k1-1)*Nfp*Nfaces; lnx = nx(id); lny = ny(id); lsJ = sJ(id); hinv = Fscale(id); Dx = rx(1,k1)*Dr + sx(1,k1)*Ds; Dy = ry(1,k1)*Dr + sy(1,k1)*Ds; Dn1 = lnx*Dx + lny*Dy; //mmE = lsJ*massEdge(:,:,f1); //bc(All,k1) += (gtau*mmE(All,Fm1) - Dn1'*mmE(All,Fm1))*ubc(fidM); mmE_Fm1 = massEdge[f1](All,Fm1); mmE_Fm1 *= lsJ; gtau = 10*N1N1*hinv; // set penalty scaling //bc(All,k1) += (gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1) * ubc(fidM); switch(BCType(k1,f1)){ case BC_Dirichlet: OP11 = gtau*mmE_Fm1 - trans(Dn1)*mmE_Fm1; break; case BC_Neuman: OP11 = mmE_Fm1; break; default: std::cout<<"warning: boundary condition is incorrect"<<std::endl; } OPi(entries)=rows1; OPj(entries)=cols1; OPx(entries)=OP11; entries += (Np*Nfp); } cols1 += Nfp; } } umMSG(1, "\n ==> {OPbc} to sparse\n"); entries.reset(1, entries.hi()-(Np*Nfp)); // extract triplets from large buffers Ai=OPi(entries); Aj=OPj(entries); Ax=OPx(entries); // These arrays can be HUGE, so force deallocation OPi.Free(); OPj.Free(); OPx.Free(); // return 0-based sparse result Ai -= 1; Aj -= 1; //------------------------------------------------------- // This operator is not symmetric, and will NOT be // factorised, only used to create reference RHS's: // // refrhsbcPR = spOP1 * bcPR; // refrhsbcUx = spOP2 * bcUx; // refrhsbcUy = spOP2 * bcUy; // // Load ALL elements (both upper and lower triangles): //------------------------------------------------------- spOP.load(Np*K, Nfp*Nfaces*K, Ai,Aj,Ax, sp_All,false, 1e-15,true); Ai.Free(); Aj.Free(); Ax.Free(); umMSG(1, " ==> {OPbc} ready.\n"); #if (1) // check on original estimates for nnx umMSG(1, " ==> max_OP: %12d\n", max_OP); umMSG(1, " ==> nnz_OP: %12d\n", entries.hi()); #endif }