inline void TrmmLLNA ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLNA"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || L.Width() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLNA: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,VR, STAR> X1_VR_STAR(g); DistMatrix<T,STAR,MR > X1Trans_STAR_MR(g); DistMatrix<T,MC, STAR> Z1_MC_STAR(g); X1_VR_STAR.AlignWith( L ); X1Trans_STAR_MR.AlignWith( L ); Z1_MC_STAR.AlignWith( L ); PartitionRight( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); Zeros( X1.Height(), X1.Width(), Z1_MC_STAR ); //--------------------------------------------------------------------// X1_VR_STAR = X1; X1Trans_STAR_MR.TransposeFrom( X1_VR_STAR ); LocalTrmmAccumulateLLN ( TRANSPOSE, diag, alpha, L, X1Trans_STAR_MR, Z1_MC_STAR ); X1.SumScatterFrom( Z1_MC_STAR ); //--------------------------------------------------------------------// SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmLLTA ( Orientation orientation, UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmLLTA"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( orientation == NORMAL ) throw std::logic_error ("TrmmLLTA expects a (Conjugate)Transpose option"); if( L.Height() != L.Width() || L.Height() != X.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmLLTA: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); DistMatrix<T,MC,STAR> X1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); X1_MC_STAR.AlignWith( L ); Z1_MR_STAR.AlignWith( L ); PartitionRight( X, XL, XR, 0 ); while( XL.Width() < X.Width() ) { RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); Zeros( X1.Height(), X1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// X1_MC_STAR = X1; LocalTrmmAccumulateLLT ( orientation, diag, alpha, L, X1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); X1 = Z1_MR_MC; //--------------------------------------------------------------------// SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
static void analop_esil(RAnal *a, RAnalOp *op, ut64 addr, const ut8 *buf, const char *buf_asm) { r_strbuf_init (&op->esil); r_strbuf_set (&op->esil, ""); switch (buf[0]) { // Irregulars sorted by lower nibble case 0x00: /* nop */ emit(","); break; case 0x10: /* jbc */ k(BIT_R "&,?{,%2$d,1,<<,255,^,%1$d,&=[1],%3$hhd,3,+,pc,+=,}"); break; case 0x20: /* jb */ k(BIT_R "&,?{,%3$hhd,3,+,pc,+=,}"); break; case 0x30: /* jnb */ k(BIT_R "&,!,?{,%3$hhd,3,+,pc,+=,}"); break; case 0x40: /* jc */ emitf("C,!,?{,%hhd,2,+,pc,+=,}", buf[1]); break; case 0x50: /* jnc */ emitf("C,""?{,%hhd,2,+,pc,+=,}", buf[1]); break; case 0x60: /* jz */ emitf("A,!,?{,%hhd,2,+,pc,+=,}", buf[1]); break; case 0x70: /* jnz */ emitf("A,""?{,%hhd,2,+,pc,+=,}", buf[1]); break; case 0x80: /* sjmp */ j(ESX_L1 JMP("2")); break; case 0x90: /* mov */ emitf("%d,dptr,=", (buf[1]<<8) + buf[2]); break; case 0xA0: /* orl */ k(BIT_R "C,|="); break; case 0xB0: /* anl */ k(BIT_R "C,&="); break; case 0xC0: /* push */ h(XR(IB1) PUSH1); break; case 0xD0: /* pop */ h(POP1 XW(IB1)); break; case 0xE0: /* movx */ /* TODO */ break; case 0xF0: /* movx */ /* TODO */ break; case 0x11: case 0x31: case 0x51: case 0x71: case 0x91: case 0xB1: case 0xD1: case 0xF1: emit(CALL("2")); // fall through case 0x01: case 0x21: case 0x41: case 0x61: case 0x81: case 0xA1: case 0xC1: case 0xE1: emitf("0x%x,pc,=", (addr & 0xF800) | ((((unsigned short)buf[0])<<3) & 0x0700) | buf[1]); break; case 0x02: /* ljmp */ emitf( "%d,pc,=", (unsigned int)((buf[1]<<8)+buf[2])); break; case 0x12: /* lcall */ emitf(CALL("3")",%d,pc,=", (unsigned int)((buf[1]<<8)+buf[2])); break; case 0x22: /* ret */ emitf(POP2 "pc,="); break; case 0x32: /* reti */ /* TODO */ break; case 0x72: /* orl */ /* TODO */ break; case 0x82: /* anl */ /* TODO */ break; case 0x92: /* mov */ /* TODO */ break; case 0xA2: /* mov */ /* TODO */ break; case 0xB2: /* cpl */ k("%2$d,1,<<,%1$d,^=[1]"); break; case 0xC2: /* clr */ /* TODO */ break; case 0x03: /* rr */ emit("1,A,0x101,*,>>,A,="); break; case 0x13: /* rrc */ /* TODO */ break; case 0x23: /* rl */ emit("7,A,0x101,*,>>,A,="); break; case 0x33: /* rlc */ /* TODO */ break; case 0x73: /* jmp */ emit("dptr,A,+,pc,="); break; case 0x83: /* movc */ emit("A,dptr,+,[1],A,="); break; case 0x93: /* movc */ emit("A,pc,+,[1],A,="); break; case 0xA3: /* inc */ h(XI(IB1, "++")); break; case 0xB3: /* cpl */ emit("1," XI(C, "^")); break; case 0xC3: /* clr */ emit("0,C,="); break; // Regulars sorted by upper nibble OP_GROUP_UNARY_4(0x00, "++") OP_GROUP_UNARY_4(0x10, "--") OP_GROUP_INPLACE_LHS_4(0x20, A, "+") case 0x34: h (XR(L1) "C,+," XI(A, "+")) break; case 0x35: h (XR(IB1) "C,+," XI(A, "+")) break; case 0x36: case 0x37: j (XR(R0I) "C,+," XI(A, "+")) break; case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: h (XR(R0) "C,+," XI(A, "+")) break; OP_GROUP_INPLACE_LHS_4(0x40, A, "|") OP_GROUP_INPLACE_LHS_4(0x50, A, "&") OP_GROUP_INPLACE_LHS_4(0x60, A, "^") case 0x74: h (XR(L1) XW(A)) break; case 0x75: h (XR(L2) XW(IB1)) break; case 0x76: case 0x77: j (XR(L1) XW(R0I)) break; case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: h (XR(L1) XW(R0)) break; case 0x84: /* div */ emit("B,!,OV,=,0,A,B,A,/=,A,B,*,-,-,B,=,0,C,="); break; case 0x85: /* mov */ h(IRAM_BASE ",%2$d,+,[1]," IRAM_BASE ",%2$d,+,=[1]"); break; case 0x86: case 0x87: j (XR(R0I) XW(IB1)) break; case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: h (XR(R0) XW(IB1)) break; OP_GROUP_INPLACE_LHS_4(0x90, A, ".") case 0xA4: /* mul */ emit("8,A,B,*,DUP,>>,DUP,!,!,OV,=,B,=,A,=,0,C,="); break; case 0xA5: /* ??? */ emit("0,TRAP"); break; case 0xA6: case 0xA7: j (XR(IB1) XW(R0I)) break; case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: h (XR(IB1) XW(R0)) break; case 0xB4: h (XR(L1) XR(A) "!=,?{,%3$hhd,2,+pc,+=,}") break; case 0xB5: h (XR(IB1) XR(A) "!=,?{,%3$hhd,2,+pc,+=,}") break; case 0xB6: case 0xB7: j (XR(L1) XR(R0I) "!=,?{,%3$hhd,2,+pc,+=,}") break; case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: h (XR(L1) XR(R0) "!=,?{,%3$hhd,2,+pc,+=,}") break; case 0xC4: /* swap */ emit("4,A,0x101,*,>>,A,="); break; case 0xC5: /* xch */ /* TODO */ break; case 0xC6: case 0xC7: /* xch */ /* TODO */ break; case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: /* xch */ h (XR(A) XR(R0) XW(A) "," XW(R0)); break; case 0xD2: /* setb */ /* TODO */ break; case 0xD3: /* setb */ /* TODO */ break; case 0xD4: /* da */ emit("A,--="); break; case 0xD5: /* djnz */ h(XI(R0I, "--") "," XR(R0I) CJMP(L2, "2")); break; case 0xD6: /* xchd */ /* TODO */ break; case 0xD7: /* xchd */ /* TODO */ break; case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF: /* djnz */ h(XI(R0, "--") "," XR(R0) CJMP(L1, "2")); break; case 0xE2: case 0xE3: /* movx */ j(XRAM_BASE "r%0$d,+,[1]," XW(A)); break; case 0xE4: /* clr */ emit("0,A,="); break; case 0xE5: /* mov */ h (XR(IB1) XW(A)) break; case 0xE6: case 0xE7: /* mov */ j (XR(R0I) XW(A)) break; case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED: case 0xEE: case 0xEF: /* mov */ h (XR(R0) XW(A)) break; case 0xF2: case 0xF3: /* movx */ j(XR(A) XRAM_BASE "r%0$d,+,=[1]"); case 0xF4: /* cpl */ h ("255" XI(A, "^")) break; case 0xF5: /* mov */ h (XR(A) XW(IB1)) break; case 0xF6: case 0xF7: /* mov */ j (XR(A) XW(R0I)) break; case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: case 0xFF: /* mov */ h (XR(A) XW(R0)) break; default: break; } }
static void analop_esil(RAnal *a, RAnalOp *op, ut64 addr, const ut8 *buf) { r_strbuf_init (&op->esil); r_strbuf_set (&op->esil, ""); switch (buf[0]) { // Irregulars sorted by lower nibble case 0x00: /* nop */ emit (","); break; case 0x10: /* jbc bit, offset */ k (BIT_R "?{," BIT_MASK XI(BIT, "&") JMP ",}"); break; case 0x20: /* jb bit, offset */ k (BIT_R CJMP); break; case 0x30: /* jnb bit, offset */ k (BIT_R "!," CJMP); break; case 0x40: /* jc offset */ h ("c,1,&," CJMP); break; case 0x50: /* jnc offset */ h ("c,1,&,!," CJMP ); break; case 0x60: /* jz offset */ h ("a,0,==," CJMP); break; case 0x70: /* jnz offset */ h ("a,0,==,!," CJMP); break; case 0x11: case 0x31: case 0x51: case 0x71: case 0x91: case 0xB1: case 0xD1: case 0xF1: /* acall addr11 */ case 0x12: /* lcall addr16 */ j (CALL); /* fall through */ case 0x01: case 0x21: case 0x41: case 0x61: case 0x81: case 0xA1: case 0xC1: case 0xE1: /* ajmp addr11 */ case 0x02: /* ljmp addr16 */ case 0x80: /* sjmp offset */ j (JMP); break; case 0x22: /* ret */ case 0x32: /* reti */ emitf (POP2 "pc,="); break; case 0x03: /* rr a */ emit ("1,a,0x101,*,>>,a,=," FLAG_P); break; case 0x04: /* inc a */ h (XI(A, "++") FLAG_P); break; case 0x05: /* inc direct */ h (XI(IB1, "++")); break; case 0x06: case 0x07: /* inc @Ri */ j (XI(RI, "++")); break; case 0x08: case 0x09: case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F: /* dec @Rn */ h (XI(RN, "++")); break; case 0x13: /* rrc a */ emit ("7,c,<<,1,a,&,c,=,0x7f,1,a,>>,&,+,a,=," FLAG_P); break; case 0x14: /* dec a */ h (XI(A, "--") FLAG_P); break; case 0x15: /* dec direct */ h (XI(IB1, "--")); break; case 0x16: case 0x17: /* dec @Ri */ j (XI(RI, "--")); break; case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: /* dec @Rn */ h (XI(RN, "--")); break; case 0x23: /* rl a */ h ("7,a,0x101,*,>>,a,=," FLAG_P); break; TEMPLATE_ALU (0x20, "+", FLAG_C FLAG_AC FLAG_OV FLAG_P) /* 0x24..0x2f add a,.. */ case 0x33: /* rlc a */ h ("c,1,&,a,a,+=,$c7,c,=,a,+=," FLAG_P); break; TEMPLATE_ALU_C (0x30, "+", FLAG_C FLAG_AC FLAG_OV FLAG_P) /* 0x34..0x2f addc a,.. */ case 0x42: /* orl direct, a */ h (XR(A) XI(IB1, "|")); break; case 0x43: /* orl direct, imm */ h (XR(L2) XI(IB1, "|")); break; TEMPLATE_ALU (0x40, "|", FLAG_P) /* 0x44..0x4f orl a,.. */ case 0x52: /* anl direct, a */ h (XR(A) XI(IB1, "&")); break; case 0x53: /* anl direct, imm */ h (XR(L2) XI(IB1, "&")); break; TEMPLATE_ALU (0x50, "&", FLAG_P) /* 0x54..0x5f anl a,.. */ case 0x62: /* xrl direct, a */ h (XR(A) XI(IB1, "^")); break; case 0x63: /* xrl direct, imm */ h (XR(L2) XI(IB1, "^")); break; TEMPLATE_ALU (0x60, "^", FLAG_P) /* 0x64..0x6f xrl a,.. */ case 0x72: /* orl C, bit */ k (BIT_R XI(C, "|")); break; case 0x73: /* jmp @a+dptr */ emit ("dptr,a,+,pc,="); break; case 0x74: /* mov a, imm */ h (XR(L1) XW(A) FLAG_P); break; case 0x75: /* mov direct, imm */ h (XR(L2) XW(IB1)); break; case 0x76: case 0x77: /* mov @Ri, imm */ j (XR(L1) XW(RI)); break; case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: /* mov Rn, imm */ h (XR(L1) XW(RN)); break; case 0x82: /* anl C, bit */ k (BIT_R XI(C, "&")); break; case 0x83: /* movc a, @a+pc */ emit ("a,pc,--,+,[1]," XW(A) FLAG_P); break; case 0x84: /* div ab */ emit ("b,!,OV,=,0,a,b,a,/=,a,b,*,-,-,b,=,0,c,="); break; case 0x85: /* mov direct, direct */ h (XR(IB1) XW(IB2)); break; case 0x86: case 0x87: /* mov direct, @Ri */ j (XR(RI) XW(IB1)); break; case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: /* mov direct, Rn */ h (XR(RN) XW(IB1)); break; case 0x90: /* mov dptr, imm */ h (XR(L16) XW(DP)); break; case 0x92: /* mov bit, C */ k (BIT_C BIT_MASK XR(BIT) "&,|," XW(BIT)); break; case 0x93: /* movc a, @a+dptr */ h ("a,dptr,+,[1]," XW(A) FLAG_P); break; TEMPLATE_ALU_C (0x90, "-", FLAG_B FLAG_AB FLAG_OB FLAG_P) /* 0x94..0x9f subb a,.. */ case 0xA0: /* orl C, /bit */ k (BIT_R "!," XI(C, "|")); break; case 0xA2: /* mov C, bit */ k (BIT_R XW(C)); break; case 0xA3: /* inc dptr */ h (XI(DP, "++")); break; case 0xA4: /* mul ab */ emit ("8,a,b,*,NUM,>>,NUM,!,!,ov,=,b,=,a,=,0,c,="); break; case 0xA5: /* "reserved" */ emit ("0,trap"); break; case 0xA6: case 0xA7: /* mov @Ri, direct */ j (XR(IB1) XW(RI)); break; case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: /* mov Rn, direct */ h (XR(IB1) XW(RN)); break; case 0xB0: /* anl C, /bit */ k (BIT_R "!," XI(C, "&")); break; case 0xB2: /* cpl bit */ k (BIT_SET XI(BIT, "^")); break; case 0xB3: /* cpl C */ h ("1," XI(C, "^")); break; case 0xB4: /* cjne a, imm, offset */ h (XR(L1) XR(A) "-," CJMP); break; case 0xB5: /* cjne a, direct, offset */ h (XR(IB1) XR(A) "-," CJMP); break; case 0xB6: case 0xB7: /* cjne @ri, imm, offset */ j (XR(L1) XR(RI) "-," CJMP); break; case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: /* cjne Rn, imm, offset */ h (XR(L1) XR(RN) "-," CJMP); break; case 0xC0: /* push direct */ h (XR(IB1) PUSH1); break; case 0xC2: /* clr bit */ k (BIT_MASK XI(BIT, "&")); break; case 0xC3: /* clr C */ h ("0," XW(C)); break; case 0xC4: /* swap a */ h ("0xff,4,a,0x101,*,>>,&," XW(A) FLAG_P); break; case 0xC5: /* xch a, direct */ h (XR(A) "0,+," XR(IB1) XW(A) XW(IB1) FLAG_P); break; case 0xC6: case 0xC7: /* xch a, @Ri */ j (XR(A) "0,+," XR(RI) XW(A) XW(RI) FLAG_P); break; case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: /* xch a, Rn */ h (XR(A) "0,+," XR(RN) XW(A) XW(RN) FLAG_P); break; case 0xD0: /* pop direct */ h (POP1 XW(IB1)); break; case 0xD2: /* setb bit */ k (BIT_SET XI(BIT, "|")); break; case 0xD3: /* setb C */ h ("1," XW(C)); break; case 0xD4: /* da a */ // BCD adjust after add: // if (lower nibble > 9) or (AC == 1) add 6 // if (higher nibble > 9) or (C == 1) add 0x60 // carry |= carry caused by this operation emit ("a,0x0f,&,9,<,ac,|,?{,6,a,+=,$c7,c,|=,},a,0xf0,&,0x90,<,c,|,?{,0x60,a,+=,$c7,c,|=,}," FLAG_P); break; case 0xD5: /* djnz direct, offset */ h (XI(IB1, "--") XR(IB1) "0,==,!," CJMP); break; case 0xD6: case 0xD7: /* xchd a, @Ri*/ j (XR(A) "0xf0,&," XR(RI) "0x0f,&,|," XR(RI) "0xf0,&," XR(A) "0x0f,&,|," XW(RI) XW(A) FLAG_P); break; case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF: /* djnz Rn, offset */ h (XI(RN, "--") XR(RN) "0,==,!," CJMP); break; case 0xE0: /* movx a, @dptr */ h (XR(DPX) XW(A) FLAG_P); break; case 0xE2: case 0xE3: /* movx a, @Ri */ j (XR(R0X) XW(A) FLAG_P); break; case 0xE4: /* clr a */ emit ("0," XW(A) FLAG_P); break; case 0xE5: /* mov a, direct */ h (XR(IB1) XW(A) FLAG_P); break; case 0xE6: case 0xE7: /* mov a, @Ri */ j (XR(RI) XW(A) FLAG_P); break; case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED: case 0xEE: case 0xEF: /* mov a, Rn */ h (XR(RN) XW(A) FLAG_P); break; case 0xF0: /* movx @dptr, a */ h (XR(A) XW(DPX)); break; case 0xF2: case 0xF3: /* movx @Ri, a */ j (XR(A) XW(R0X)); break; case 0xF4: /* cpl a */ h ("255," XI(A, "^") FLAG_P); break; case 0xF5: /* mov direct, a */ h (XR(A) XW(IB1)); break; case 0xF6: case 0xF7: /* mov @Ri, a */ j (XR(A) XW(RI)); break; case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: case 0xFF: /* mov Rn, a */ h (XR(A) XW(RN)); break; default: break; } }
inline void TrsmRLT ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE CallStackEntry entry("internal::TrsmRLT"); if( orientation == NORMAL ) LogicError("TrsmRLT expects a (Conjugate)Transpose option"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,VR, STAR> L21_VR_STAR(g); DistMatrix<F,STAR,MR > L21AdjOrTrans_STAR_MR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); X1_VC_STAR.AlignWith( X2 ); X1Trans_STAR_MC.AlignWith( X2 ); L21_VR_STAR.AlignWith( X2 ); L21AdjOrTrans_STAR_MR.AlignWith( X2 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, orientation, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L21_VR_STAR = L21; if( orientation == ADJOINT ) L21AdjOrTrans_STAR_MR.AdjointFrom( L21_VR_STAR ); else L21AdjOrTrans_STAR_MR.TransposeFrom( L21_VR_STAR ); // X2[MC,MR] -= X1[MC,*] (L21[MR,*])^(T/H) // = X1^T[* ,MC] (L21^(T/H))[*,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, L21AdjOrTrans_STAR_MR, F(1), X2 ); //--------------------------------------------------------------------// SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } }
inline void TrsmRUN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRUN"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,STAR,MR > U12_STAR_MR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); X1_VC_STAR.AlignWith( X2 ); X1Trans_STAR_MC.AlignWith( X2 ); U12_STAR_MR.AlignWith( X2 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); U12_STAR_MR = U12; // X2[MC,MR] -= X1[MC,* ] U12[* ,MR] // = X1^T[* ,MC] U12[* ,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, U12_STAR_MR, F(1), X2 ); //--------------------------------------------------------------------// X1_VC_STAR.FreeAlignments(); X1Trans_STAR_MC.FreeAlignments(); U12_STAR_MR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmRLNCOld ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& L, DistMatrix<T>& X ) { #ifndef RELEASE PushCallStack("internal::TrmmRLNCOld"); if( L.Grid() != X.Grid() ) throw std::logic_error ("L and X must be distributed over the same grid"); if( L.Height() != L.Width() || X.Width() != L.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmRLNC: \n" << " L ~ " << L.Height() << " x " << L.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<T> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,STAR,STAR> L11_STAR_STAR(g); DistMatrix<T,MR, STAR> L21_MR_STAR(g); DistMatrix<T,VC, STAR> X1_VC_STAR(g); DistMatrix<T,MC, STAR> D1_MC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionDownDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionRight( X, XL, XR, 0 ); while( XR.Width() > 0 ) { LockedRepartitionDownDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); RepartitionRight ( XL, /**/ XR, X0, /**/ X1, X2 ); L21_MR_STAR.AlignWith( X2 ); D1_MC_STAR.AlignWith( X1 ); Zeros( X1.Height(), X1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// X1_VC_STAR = X1; L11_STAR_STAR = L11; LocalTrmm ( RIGHT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_VC_STAR ); X1 = X1_VC_STAR; L21_MR_STAR = L21; LocalGemm( NORMAL, NORMAL, T(1), X2, L21_MR_STAR, T(0), D1_MC_STAR ); X1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// L21_MR_STAR.FreeAlignments(); D1_MC_STAR.FreeAlignments(); SlideLockedPartitionDownDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); SlidePartitionRight ( XL, /**/ XR, X0, X1, /**/ X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrmmRUNC ( UnitOrNonUnit diag, T alpha, const DistMatrix<T>& U, DistMatrix<T>& X ) { #ifndef RELEASE CallStackEntry entry("internal::TrmmRUNC"); if( U.Grid() != X.Grid() ) throw std::logic_error ("U and X must be distributed over the same grid"); if( U.Height() != U.Width() || X.Width() != U.Height() ) { std::ostringstream msg; msg << "Nonconformal TrmmRUNC: \n" << " U ~ " << U.Height() << " x " << U.Width() << "\n" << " X ~ " << X.Height() << " x " << X.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<T> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<T> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<T,MR, STAR> U12Trans_MR_STAR(g); DistMatrix<T,STAR,STAR> U11_STAR_STAR(g); DistMatrix<T,VC, STAR> X1_VC_STAR(g); DistMatrix<T,MC, STAR> X1_MC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1_MC_STAR.AlignWith( X2 ); U12Trans_MR_STAR.AlignWith( X2 ); X1_VC_STAR.AlignWith( X1 ); //--------------------------------------------------------------------// X1_MC_STAR = X1; U12Trans_MR_STAR.TransposeFrom( U12 ); LocalGemm ( NORMAL, TRANSPOSE, T(1), X1_MC_STAR, U12Trans_MR_STAR, T(1), X2 ); U11_STAR_STAR = U11; X1_VC_STAR = X1_MC_STAR; LocalTrmm ( RIGHT, UPPER, NORMAL, diag, T(1), U11_STAR_STAR, X1_VC_STAR ); X1 = X1_VC_STAR; //--------------------------------------------------------------------// X1_MC_STAR.FreeAlignments(); U12Trans_MR_STAR.FreeAlignments(); X1_VC_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } }
inline void TrsmRUT ( Orientation orientation, UnitOrNonUnit diag, F alpha, const DistMatrix<F>& U, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRUT"); if( orientation == NORMAL ) throw std::logic_error("TrsmRUT expects a (Conjugate)Transpose option"); #endif const Grid& g = U.Grid(); // Matrix views DistMatrix<F> UTL(g), UTR(g), U00(g), U01(g), U02(g), UBL(g), UBR(g), U10(g), U11(g), U12(g), U20(g), U21(g), U22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,VR, STAR> U01_VR_STAR(g); DistMatrix<F,STAR,MR > U01AdjOrTrans_STAR_MR(g); DistMatrix<F,STAR,STAR> U11_STAR_STAR(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( U, UTL, UTR, UBL, UBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( UTL, /**/ UTR, U00, U01, /**/ U02, /**/ U10, U11, /**/ U12, /*************/ /******************/ UBL, /**/ UBR, U20, U21, /**/ U22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1_VC_STAR.AlignWith( X0 ); X1Trans_STAR_MC.AlignWith( X0 ); U01_VR_STAR.AlignWith( X0 ); U01AdjOrTrans_STAR_MR.AlignWith( X0 ); //--------------------------------------------------------------------// U11_STAR_STAR = U11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, UPPER, orientation, diag, F(1), U11_STAR_STAR, X1_VC_STAR, checkIfSingular ); X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); U01_VR_STAR = U01; if( orientation == ADJOINT ) U01AdjOrTrans_STAR_MR.AdjointFrom( U01_VR_STAR ); else U01AdjOrTrans_STAR_MR.TransposeFrom( U01_VR_STAR ); // X0[MC,MR] -= X1[MC,* ] (U01[MR,* ])^(T/H) // = X1^T[* ,MC] (U01^(T/H))[* ,MR] LocalGemm ( TRANSPOSE, NORMAL, F(-1), X1Trans_STAR_MC, U01AdjOrTrans_STAR_MR, F(1), X0 ); //--------------------------------------------------------------------// X1_VC_STAR.FreeAlignments(); X1Trans_STAR_MC.FreeAlignments(); U01_VR_STAR.FreeAlignments(); U01AdjOrTrans_STAR_MR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( UTL, /**/ UTR, U00, /**/ U01, U02, /*************/ /******************/ /**/ U10, /**/ U11, U12, UBL, /**/ UBR, U20, /**/ U21, U22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void TrsmRLN ( UnitOrNonUnit diag, F alpha, const DistMatrix<F>& L, DistMatrix<F>& X, bool checkIfSingular ) { #ifndef RELEASE PushCallStack("internal::TrsmRLN"); #endif const Grid& g = L.Grid(); // Matrix views DistMatrix<F> LTL(g), LTR(g), L00(g), L01(g), L02(g), LBL(g), LBR(g), L10(g), L11(g), L12(g), L20(g), L21(g), L22(g); DistMatrix<F> XL(g), XR(g), X0(g), X1(g), X2(g); // Temporary distributions DistMatrix<F,MR, STAR> L10Trans_MR_STAR(g); DistMatrix<F,STAR,STAR> L11_STAR_STAR(g); DistMatrix<F,STAR,MC > X1Trans_STAR_MC(g); DistMatrix<F,VC, STAR> X1_VC_STAR(g); // Start the algorithm Scale( alpha, X ); LockedPartitionUpDiagonal ( L, LTL, LTR, LBL, LBR, 0 ); PartitionLeft( X, XL, XR, 0 ); while( XL.Width() > 0 ) { LockedRepartitionUpDiagonal ( LTL, /**/ LTR, L00, L01, /**/ L02, /**/ L10, L11, /**/ L12, /*************/ /******************/ LBL, /**/ LBR, L20, L21, /**/ L22 ); RepartitionLeft ( XL, /**/ XR, X0, X1, /**/ X2 ); X1Trans_STAR_MC.AlignWith( X0 ); L10Trans_MR_STAR.AlignWith( X0 ); //--------------------------------------------------------------------// L11_STAR_STAR = L11; X1_VC_STAR = X1; LocalTrsm ( RIGHT, LOWER, NORMAL, diag, F(1), L11_STAR_STAR, X1_VC_STAR, checkIfSingular ); // X0[MC,MR] -= X1[MC,* ] L10[*,MR] // = X1^T[* ,MC] L10^T[MR,* ] X1Trans_STAR_MC.TransposeFrom( X1_VC_STAR ); X1.TransposeFrom( X1Trans_STAR_MC ); L10Trans_MR_STAR.TransposeFrom( L10 ); LocalGemm ( TRANSPOSE, TRANSPOSE, F(-1), X1Trans_STAR_MC, L10Trans_MR_STAR, F(1), X0 ); //--------------------------------------------------------------------// X1Trans_STAR_MC.FreeAlignments(); L10Trans_MR_STAR.FreeAlignments(); SlideLockedPartitionUpDiagonal ( LTL, /**/ LTR, L00, /**/ L01, L02, /*************/ /******************/ /**/ L10, /**/ L11, L12, LBL, /**/ LBR, L20, /**/ L21, L22 ); SlidePartitionLeft ( XL, /**/ XR, X0, /**/ X1, X2 ); } #ifndef RELEASE PopCallStack(); #endif }
static void compute_coords(GeometricGraph &G, short TreeColor1, short TreeColor2, tbrin RootBrin, svector<tbrin> &Father0, svector<tbrin> &Father1, svector<tbrin> &Father2, svector<short> &ecolor, svector<int> &x, svector<int> &y, svector<Tpoint> &Ebend) { svector<int> marked(1,G.ne(),0); marked.SetName("marked"); svector<int> Descendants(1,G.nv(),1); Descendants.SetName("Descendants"); tbrin b; tvertex u, f0, f1, f2; tvertex v0, v1, v2; // roots of T0,T1,T2 svector<int> Lmax1(1,G.nv(),0); svector<int> Lmax2(1,G.nv(),0); svector<int> XR(1,G.nv(),0); svector<int> XL(1,G.nv(),-1); tbrin RootBrin1, RootBrin2; RootBrin2 = RootBrin; RootBrin1 = -RootBrin; b = RootBrin1; v0 = G.vin[RootBrin2]; v1 = G.vin[-RootBrin2]; v2 = G.vin[-G.acir[-RootBrin2]]; y[v0] = 0; x[v0] = 0; x[v2] = 0; // Computes ordinates while (1) {do {b = G.cir[b]; if(b == RootBrin1) break; }while (ecolor[b.GetEdge()] != TreeColor1 && !marked[b.GetEdge()]); if (b == RootBrin1) break; if (!marked[b.GetEdge()]) // Montee dans l'arbre {marked[b.GetEdge()] = 1; b = -b; } else // Descente {u = G.vin[b]; f0 =G.vin[Father0[u]]; f1 =G.vin[Father1[u]]; f2 =G.vin[Father2[u]]; if (f0 != 0) y[u] = Max(y[u],y[f0]+1); y[u] = Max(y[u],Min(Lmax1[u],Lmax2[u])+1); if (f2 != 0) y[f2] = Max(y[f2], y[u]); if (f1 != 0) y[f1] = Max(y[f1], y[u]); if (f2 != 0) y[f2] = Max(y[f2], Lmax1[u]+1); if (f1 != 0) y[f1] = Max(y[f1], Lmax2[u]+1); if (f1 != 0) Lmax1[f1] = Max(Lmax1[f1], y[u]); if (f2 != 0) Lmax2[f2] = Max(Lmax2[f2], y[u]); b = -b; } } y[v1] = Max(y[v1], Lmax2[v2]+1); // Computes Bends and abscissa Fill(marked, 0); int current_x=1; b = RootBrin2; while (1) {do {b = G.cir[b]; if(b == RootBrin2)break; }while (ecolor[b.GetEdge()] != TreeColor2 && !marked[b.GetEdge()]); if (b == RootBrin2) break; if (!marked[b.GetEdge()]) // Montee dans l'arbre {marked[b.GetEdge()] = 1; b = -b; } else // Descente {u = G.vin[b]; f0 =G.vin[Father0[u]]; f1 =G.vin[Father1[u]]; f2 =G.vin[Father2[u]]; if (Descendants[u] == 1) {x[u] = current_x; XL[u] = current_x; XR[u] = current_x; current_x++; } else {if (y[u] == Lmax2[u]) x[u] = XR[u]; else x[u] = XL[u]; } if (f0 != 0) {XR[f0]=XR[u]; if (XL[f0] == -1) XL[f0] = XL[u]; Ebend[b.GetEdge()] = Tpoint(x[u],y[f0]+1); } if (f1 != 0 && x[u] != XR[u]) Ebend[Father1[u].GetEdge()] = Tpoint(XR[u],y[u]); if (f2 != 0 && x[u] != XL[u]) Ebend[Father2[u].GetEdge()] = Tpoint(XL[u],y[u]); Descendants[f0] += Descendants[u]; b = -b; } } x[v1] = current_x; Ebend[RootBrin2.GetEdge()] = Tpoint(x[v1], y[v0]+1); }