void Bezier::recalc(QPointF p) { // http://www.flong.com/texts/code/shapers_bez/ // http://www.lemoda.net/maths/bezier-length/index.html, // arbitrary but reasonable t-values for interior control points double t0 = 0.3; double t1 = 0.7; if (m_drag_cp0) { double x = (p.x() - m_endpoint0.x() * B0(t0) - m_cp1.x() * B2(t0) - m_endpoint1.x() * B3(t0)) / B1(t0); double y = (p.y() - m_endpoint0.y() * B0(t0) - m_cp1.y() * B2(t0) - m_endpoint1.y() * B3(t0)) / B1(t0); m_cp0 = QPointF(x, y); } else { double x = (p.x() - m_endpoint0.x() * B0(t1) - m_cp0.x() * B1(t1) - m_endpoint1.x() * B3(t1)) / B2(t1); double y = (p.y() - m_endpoint0.y() * B0(t1) - m_cp0.y() * B1(t1) - m_endpoint1.y() * B3(t1)) / B2(t1); m_cp1 = QPointF(x, y); } /* DebugDialog::debug(QString("ix:%1 p0x:%2,p0y:%3 p1x:%4,p1y:%5 px:%6,py:%7") .arg(m_drag_cp0) .arg(m_endpoint0.x()) .arg(m_endpoint0.y()) .arg(m_endpoint1.x()) .arg(m_endpoint1.y()) .arg(p.x()) .arg(p.y()) ); */ m_isEmpty = false; }
static int reallyroutespline (Pedge_t *edges, int edgen, Ppoint_t *inps, int inpn, Ppoint_t ev0, Ppoint_t ev1) { Ppoint_t p1, p2, cp1, cp2, p; Pvector_t v1, v2, splitv, splitv1, splitv2; double maxd, d, t; int maxi, i, spliti; static tna_t *tnas; static int tnan; if (tnan < inpn) { if (!tnas) { if (!(tnas = (tna_t *)malloc (sizeof (tna_t) * inpn))) return -1; } else { if (!(tnas = (tna_t *)realloc (tnas, sizeof (tna_t) * inpn))) return -1; } tnan = inpn; } tnas[0].t = 0; for (i = 1; i < inpn; i++) tnas[i].t = tnas[i - 1].t + dist (inps[i], inps[i - 1]); for (i = 1; i < inpn; i++) tnas[i].t /= tnas[inpn - 1].t; for (i = 0; i < inpn; i++) { tnas[i].a[0] = scale (ev0, B1 (tnas[i].t)); tnas[i].a[1] = scale (ev1, B2 (tnas[i].t)); } if (mkspline (inps, inpn, tnas, ev0, ev1, &p1, &v1, &p2, &v2) == -1) return -1; if (splinefits (edges, edgen, p1, v1, p2, v2, (inpn == 2 ? 1 : 0))) return 0; cp1 = add (p1, scale (v1, 1 / 3.0)); cp2 = sub (p2, scale (v2, 1 / 3.0)); for (maxd = -1, maxi = -1, i = 1; i < inpn - 1; i++) { t = tnas[i].t; p.x = B0 (t) * p1.x + B1 (t) * cp1.x + B2 (t) * cp2.x + B3 (t) * p2.x; p.y = B0 (t) * p1.y + B1 (t) * cp1.y + B2 (t) * cp2.y + B3 (t) * p2.y; if ((d = dist (p, inps[i])) > maxd) maxd = d, maxi = i; } spliti = maxi; splitv1 = normv (sub (inps[spliti], inps[spliti - 1])); splitv2 = normv (sub (inps[spliti + 1], inps[spliti])); splitv = normv (add (splitv1, splitv2)); reallyroutespline (edges, edgen, inps, spliti + 1, ev0, splitv); reallyroutespline (edges, edgen, &inps[spliti], inpn - spliti, splitv, ev1); return 0; }
// Shortest cubic spline through 4 on-curve points(chord approximation) void Bezier::FitSpline(Vec3 p[]) { // use chord length for shortest(best) cubic spline approximation float c3 = (p[1] - p[0]).Magnitude(); float c2 = (p[2] - p[1]).Magnitude(); float c1 = (p[3] - p[2]).Magnitude(); // cases where p[1] is close to p[2] might lead to instabilities(need some heuristic) if (50 * c2 < c1 + c3) { p[1] = p[0] + (p[1] - p[0]) * 0.98; p[2] = p[3] + (p[2] - p[3]) * 0.98; c3 = c3 * 0.98; c2 = (p[2] - p[1]).Magnitude(); c1 = c1 * 0.98; } float t1 = c1 / (c1 + c2 + c3); float t2 = (c1 + c2) / (c1 + c2 + c3); // Solve M * x = y float m00 = B1(t1); float m01 = B2(t1); float m10 = B1(t2); float m11 = B2(t2); float detM = m00 * m11 - m01 * m10; if (fabs(detM) > 1E-3) { // y = p - p0 * B0(t) - p3 * B3(t) Vec3 y1 = p[1] - p[0] * B0(t1) - p[3] * B3(t1); Vec3 y2 = p[2] - p[0] * B0(t2) - p[3] * B3(t2); // Minv float s = 1 / detM; float n00 = s * m11; float n01 = -s * m01; float n10 = -s * m10; float n11 = s * m00; // x = Minv * y Vec3 x1 = y1 * n00 + y2 * n01; Vec3 x2 = y1 * n10 + y2 * n11; p[1] = x1; p[2] = x2; } }
complex LoopToolsWrapper::PV_B0(const double mu2, const double p2, const double m02, const double m12) const { setmudim(mu2); std::complex<double> B0val = B0(p2, m02, m12); return complex( B0val.real(), B0val.imag(), false ); }
int huddraw_help() { fg_color = B0(COLOR_BLACK); bg_color = B3(COLOR_WHITE); int i,l; for(i=0; HUD_HELP[i].title; i++) { if (!strcmp(hud_help_page, HUD_HELP[i].title)) { huddraw_help_text(HUD_HELP[i].text); return 1; } } char text[4096]; sprintf(text, "No help available for topic \n" "%s\n" "\n" "Use #hud help to see the list of\n" "available commands and topics \n" "\n" "Use #hud help <topic> for help \n" "on specific topic \n", hud_help_page); huddraw_help_text(text); return 1; }
void huddraw_info_nav(int r) { char text[256]; fg_color = B0(COLOR_BLACK); bg_color = B3(COLOR_SAND_YELLOW); draw_rect(0,r,128,35,1); fg_color = B2(COLOR_SAND_YELLOW); draw_rect(35,r+2,12,14,0); draw_rect(76,r+2,12,14,0); // coordinates fg_color = B3(COLOR_REDSTONE_RED); bg_color = COLOR_TRANSPARENT; int32_t x = (int32_t)floor(gs.own.x); int32_t z = (int32_t)floor(gs.own.z); int32_t x_= (gs.world==&gs.nether) ? x*8 : x/8; int32_t z_= (gs.world==&gs.nether) ? z*8 : z/8; char * n_= (gs.world==&gs.nether) ? "Overworld" : "Nether"; draw_text(3, r+ 3, "X"); draw_text(3, r+10, "Z"); draw_text(3, r+19, "Y"); draw_text(3, r+26, "DIR"); sprintf(text, "%9d", x); draw_text(11,r+3,text); sprintf(text, "%9d", x_); draw_text(53,r+3,text); sprintf(text, "%9d", z); draw_text(11,r+10,text); sprintf(text, "%9d", z_); draw_text(53,r+10,text); sprintf(text, "%9d", (int32_t)floor(gs.own.y)); draw_text(11,r+19,text); char * dir = "UNKNOWN"; switch(player_direction()) { case DIR_NORTH : dir = "NORTH"; break; case DIR_SOUTH : dir = "SOUTH"; break; case DIR_EAST : dir = "EAST"; break; case DIR_WEST : dir = "WEST"; break; } sprintf(text, "%9s", dir); draw_text(11,r+26,text); int pos = (42-(strlen(n_)*4-1))/2+49; draw_text(pos, r+26, n_); // compass huddraw_compass(108,r+17,B0(COLOR_BLACK), B3(COLOR_REDSTONE_RED)); }
/* * The IXP425 expansion bus only allows 16-bit wide acceses * when attached to a 16-bit wide device (such as the 28F128J3A), * so we can't use a memcpy_fromio as it does byte acceses. */ static void ixp425_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) { int i; u8 *dest = (u8*)to; u16 *src = (u16 *)(map->map_priv_1 + from); u16 data; for(i = 0; i < (len / 2); i++) { data = src[i]; dest[i * 2] = B0(data); dest[i * 2 + 1] = B1(data); } if(len & 1) dest[len - 1] = B0(src[i]); }
void copy_from_flash(unsigned long from, void *to,ssize_t len) { int i; u8 *dest = (u8*)to; u16 data; unsigned long remap = (unsigned long)ioremap(FLASH_BASE_ADDR,FLASH_SIZE); u16 *src = (u16 *)(remap + from); for(i = 0; i < (len / 2);i++){ data = src[i]; dest[i * 2] = B0(data); dest[i * 2 + 1] = B1(data); } if(len & 1) dest[len - 1] = B0(src[i]); iounmap(remap); }
void huddraw_info_health(int r) { char text[256]; fg_color = B0(COLOR_BLACK); bg_color = B3(COLOR_GRASS_GREEN); draw_rect(0,r,128,12,1); bg_color = COLOR_TRANSPARENT; fg_color = B3(COLOR_REDSTONE_RED); draw_blit(fonts, FONTS_ICON_HEART, 8, 8, 2, r+2); fg_color = B0(COLOR_BLACK); sprintf(text, "%.1f", gs.own.health); draw_text(12, r+3, text); fg_color = B2(COLOR_ORANGE); draw_blit(fonts, FONTS_ICON_FOOD, 8, 8, 33, r+2); fg_color = B0(COLOR_BLACK); sprintf(text, "%d (%.1f)", gs.own.food, gs.own.saturation); draw_text(43, r+3, text); }
secure_vector<byte> CCM_Mode::format_b0(size_t sz) { secure_vector<byte> B0(BS); const byte b_flags = (m_ad_buf.size() ? 64 : 0) + (((tag_size()/2)-1) << 3) + (L()-1); B0[0] = b_flags; copy_mem(&B0[1], m_nonce.data(), m_nonce.size()); encode_length(sz, &B0[m_nonce.size()+1]); return B0; }
secure_vector<uint8_t> CCM_Mode::format_b0(size_t sz) { secure_vector<uint8_t> B0(CCM_BS); const uint8_t b_flags = static_cast<uint8_t>((m_ad_buf.size() ? 64 : 0) + (((tag_size()/2)-1) << 3) + (L()-1)); B0[0] = b_flags; copy_mem(&B0[1], m_nonce.data(), m_nonce.size()); encode_length(sz, &B0[m_nonce.size()+1]); return B0; }
void CAST5decrypt(const PGPUInt8 *in, PGPUInt8 *out, const PGPUInt32 *xkey) { PGPUInt32 l, r, t; r = (PGPUInt32) in[0]<<24 | (PGPUInt32) in[1]<<16 | (PGPUInt32) in[2]<<8 | in[3]; l = (PGPUInt32) in[4]<<24 | (PGPUInt32) in[5]<<16 | (PGPUInt32) in[6]<<8 | in[7]; t = F1(l, xkey, 15); r ^= G1(t); t = F3(r, xkey, 14); l ^= G3(t); t = F2(l, xkey, 13); r ^= G2(t); t = F1(r, xkey, 12); l ^= G1(t); // Start here if only doing 12 rounds t = F3(l, xkey, 11); r ^= G3(t); t = F2(r, xkey, 10); l ^= G2(t); t = F1(l, xkey, 9); r ^= G1(t); t = F3(r, xkey, 8); l ^= G3(t); t = F2(l, xkey, 7); r ^= G2(t); t = F1(r, xkey, 6); l ^= G1(t); t = F3(l, xkey, 5); r ^= G3(t); t = F2(r, xkey, 4); l ^= G2(t); t = F1(l, xkey, 3); r ^= G1(t); t = F3(r, xkey, 2); l ^= G3(t); t = F2(l, xkey, 1); r ^= G2(t); t = F1(r, xkey, 0); l ^= G1(t); out[0] = (PGPUInt8) B0(l); out[1] = (PGPUInt8) B1(l); out[2] = (PGPUInt8) B2(l); out[3] = (PGPUInt8) B3(l); out[4] = (PGPUInt8) B0(r); out[5] = (PGPUInt8) B1(r); out[6] = (PGPUInt8) B2(r); out[7] = (PGPUInt8) B3(r); }
/* * Encrypt the 8 bytes at *in into the 8 bytes at *out using the expanded * key schedule from *xkey. */ static void CAST5encrypt(PGPByte const *in, PGPByte *out, PGPUInt32 const *xkey) { PGPUInt32 l, r, t; l = (PGPUInt32) in[0]<<24 | (PGPUInt32)in[1]<<16 | (PGPUInt32)in[2]<<8 | in[3]; r = (PGPUInt32) in[4]<<24 | (PGPUInt32)in[5]<<16 | (PGPUInt32)in[6]<<8 | in[7]; t = F1(r, xkey, 0); l ^= G1(t); t = F2(l, xkey, 1); r ^= G2(t); t = F3(r, xkey, 2); l ^= G3(t); t = F1(l, xkey, 3); r ^= G1(t); t = F2(r, xkey, 4); l ^= G2(t); t = F3(l, xkey, 5); r ^= G3(t); t = F1(r, xkey, 6); l ^= G1(t); t = F2(l, xkey, 7); r ^= G2(t); t = F3(r, xkey, 8); l ^= G3(t); t = F1(l, xkey, 9); r ^= G1(t); t = F2(r, xkey, 10); l ^= G2(t); t = F3(l, xkey, 11); r ^= G3(t); /* Stop here if only doing 12 rounds */ t = F1(r, xkey, 12); l ^= G1(t); t = F2(l, xkey, 13); r ^= G2(t); t = F3(r, xkey, 14); l ^= G3(t); t = F1(l, xkey, 15); r ^= G1(t); out[0] = B0(r); out[1] = B1(r); out[2] = B2(r); out[3] = B3(r); out[4] = B0(l); out[5] = B1(l); out[6] = B2(l); out[7] = B3(l); }
int huddraw_map() { int shading[16] = { 3, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }; if (!(hud_inv & HUDINVMASK_TUNNEL)) return 0; bg_color = B0(COLOR_BLACK); draw_clear(); int32_t x = (int32_t)floor(gs.own.x); int32_t y = (int32_t)floor(gs.own.y); int32_t z = (int32_t)floor(gs.own.z); extent_t ex = { { x-80, y-12, z-80 }, { x+80, y+3, z+80 } }; cuboid_t cb = export_cuboid_extent(ex); int r,c,i,j; int32_t off = cb.boff + 16*cb.sa.x + 16; for(r=0; r<128; r++) { for(c=0; c<128; c++) { int poff = off+r*cb.sa.x+c; for(j=0; j<16; j++) { if ( cb.data[j][poff].bid ) { int8_t color = BLOCK_COLORMAP[cb.data[j][poff].bid][cb.data[j][poff].meta]; hud_image[r*128+c] = color*4 + shading[j]; } } } } for(i=0; i<256; i++) lh_free(cb.data[i]); hud_image[64*128+64] = 126; char text[256]; bg_color = B3(COLOR_WHITE); fg_color = B3(COLOR_REDSTONE_RED); sprintf(text, "%d,%d", x, z); draw_text(2, 2, text); sprintf(text, "Y:%d", y); draw_text(2, 9, text); bg_color = COLOR_TRANSPARENT; huddraw_compass(111, 16, B3(COLOR_GOLD_YELLOW), B3(COLOR_WHITE)); return 1; }
void huddraw_info_inv_item(int id, float damage, int r) { int col = 4+31*(id%4); int row = r+2+10*(id/4); int fcol = FONTS_ICON_EQ_C+8*(id%4); int frow = FONTS_ICON_EQ_R+8*(id/4); bg_color = COLOR_TRANSPARENT; fg_color = (damage<0) ? B0(COLOR_CLAY_GRAY) : B3(COLOR_DIAMOND_BLUE); draw_blit(fonts, fcol, frow, 8, 8, col, row); fg_color = B2(COLOR_CLAY_GRAY); draw_rect(col+10, row+1, 16, 6, 1); if (damage >= 0) { fg_color = B3(COLOR_LAPIS_BLUE); if (damage < 1.0) fg_color = B3(COLOR_EMERALD_GREEN); if (damage < 0.5) fg_color = B3(COLOR_GOLD_YELLOW); if (damage < 0.25) fg_color = B3(COLOR_ORANGE); if (damage < 0.1) fg_color = B3(COLOR_REDSTONE_RED); draw_rect(col+10, row+1, 16*damage, 6, 0); } }
int test() { int Error = 0; int A0 = static_cast<int>(glm::log2(16.f)); glm::ivec1 B0(glm::log2(glm::vec1(16.f))); glm::ivec2 C0(glm::log2(glm::vec2(16.f))); glm::ivec3 D0(glm::log2(glm::vec3(16.f))); glm::ivec4 E0(glm::log2(glm::vec4(16.f))); int A1 = glm::log2(int(16)); glm::ivec1 B1 = glm::log2(glm::ivec1(16)); glm::ivec2 C1 = glm::log2(glm::ivec2(16)); glm::ivec3 D1 = glm::log2(glm::ivec3(16)); glm::ivec4 E1 = glm::log2(glm::ivec4(16)); Error += A0 == A1 ? 0 : 1; Error += glm::all(glm::equal(B0, B1)) ? 0 : 1; Error += glm::all(glm::equal(C0, C1)) ? 0 : 1; Error += glm::all(glm::equal(D0, D1)) ? 0 : 1; Error += glm::all(glm::equal(E0, E1)) ? 0 : 1; glm::uint64 A2 = glm::log2(glm::uint64(16)); glm::u64vec1 B2 = glm::log2(glm::u64vec1(16)); glm::u64vec2 C2 = glm::log2(glm::u64vec2(16)); glm::u64vec3 D2 = glm::log2(glm::u64vec3(16)); glm::u64vec4 E2 = glm::log2(glm::u64vec4(16)); Error += A2 == glm::uint64(4) ? 0 : 1; Error += glm::all(glm::equal(B2, glm::u64vec1(4))) ? 0 : 1; Error += glm::all(glm::equal(C2, glm::u64vec2(4))) ? 0 : 1; Error += glm::all(glm::equal(D2, glm::u64vec3(4))) ? 0 : 1; Error += glm::all(glm::equal(E2, glm::u64vec4(4))) ? 0 : 1; return Error; }
void huddraw_info_inv(int r) { bg_color = B3(COLOR_PINK); fg_color = B0(COLOR_BLACK); draw_rect(0,r,128,22,1); int present[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; int i,j; for(j=0; hii[j].pos>=0; j++) { hudinvitem *h = hii+j; for(i=h->ssid; i<=h->lsid; i++) { slot_t *s = gs.inv.slots+i; if (s->item == h->iid) { float damage = ((float)h->dur-(float)s->damage)/(float)h->dur; huddraw_info_inv_item(h->pos, damage, r); present[h->pos] = 1; } } } for(i=0; i<8; i++) if (!present[i]) huddraw_info_inv_item(i, -1, r); }
int test() { int Error = 0; int A0(glm::log2(10.f)); glm::ivec1 B0(glm::log2(glm::vec1(10.f))); glm::ivec2 C0(glm::log2(glm::vec2(10.f))); glm::ivec3 D0(glm::log2(glm::vec3(10.f))); glm::ivec4 E0(glm::log2(glm::vec4(10.f))); int A1 = glm::log2(int(10.f)); glm::ivec1 B1 = glm::log2(glm::ivec1(10.f)); glm::ivec2 C1 = glm::log2(glm::ivec2(10.f)); glm::ivec3 D1 = glm::log2(glm::ivec3(10.f)); glm::ivec4 E1 = glm::log2(glm::ivec4(10.f)); Error += A0 == A1 ? 0 : 1; Error += glm::all(glm::equal(B0, B1)) ? 0 : 1; Error += glm::all(glm::equal(C0, C1)) ? 0 : 1; Error += glm::all(glm::equal(D0, D1)) ? 0 : 1; Error += glm::all(glm::equal(E0, E1)) ? 0 : 1; return Error; }
/* * GenerateBezier : * Use least-squares method to find Bezier control points for region. * */ static BezierCurve GenerateBezier(Point2 *d, int first, int last, double *uPrime, Vector2 tHat1, Vector2 tHat2) { int i; Vector2 A[MAXPOINTS][2]; /* Precomputed rhs for eqn */ int nPts; /* Number of pts in sub-curve */ double C[2][2]; /* Matrix C */ double X[2]; /* Matrix X */ double det_C0_C1, /* Determinants of matrices */ det_C0_X, det_X_C1; double alpha_l, /* Alpha values, left and right */ alpha_r; Vector2 tmp; /* Utility variable */ BezierCurve bezCurve; /* RETURN bezier curve ctl pts */ bezCurve = (Point2 *)malloc(4 * sizeof(Point2)); nPts = last - first + 1; /* Compute the A's */ for (i = 0; i < nPts; i++) { Vector2 v1, v2; v1 = tHat1; v2 = tHat2; V2Scale(&v1, B1(uPrime[i])); V2Scale(&v2, B2(uPrime[i])); A[i][0] = v1; A[i][1] = v2; } /* Create the C and X matrices */ C[0][0] = 0.0; C[0][1] = 0.0; C[1][0] = 0.0; C[1][1] = 0.0; X[0] = 0.0; X[1] = 0.0; for (i = 0; i < nPts; i++) { C[0][0] += V2Dot(&A[i][0], &A[i][0]); C[0][1] += V2Dot(&A[i][0], &A[i][1]); /* C[1][0] += V2Dot(&A[i][0], &A[i][1]);*/ C[1][0] = C[0][1]; C[1][1] += V2Dot(&A[i][1], &A[i][1]); tmp = V2SubII(d[first + i], V2AddII( V2ScaleIII(d[first], B0(uPrime[i])), V2AddII( V2ScaleIII(d[first], B1(uPrime[i])), V2AddII( V2ScaleIII(d[last], B2(uPrime[i])), V2ScaleIII(d[last], B3(uPrime[i])))))); X[0] += V2Dot(&A[i][0], &tmp); X[1] += V2Dot(&A[i][1], &tmp); } /* Compute the determinants of C and X */ det_C0_C1 = C[0][0] * C[1][1] - C[1][0] * C[0][1]; det_C0_X = C[0][0] * X[1] - C[1][0] * X[0]; det_X_C1 = X[0] * C[1][1] - X[1] * C[0][1]; /* Finally, derive alpha values */ alpha_l = (det_C0_C1 < ZERO_TOLERANCE) ? 0.0 : det_X_C1 / det_C0_C1; alpha_r = (det_C0_C1 < ZERO_TOLERANCE) ? 0.0 : det_C0_X / det_C0_C1; /* If alpha negative, use the Wu/Barsky heuristic (see text) */ /* (if alpha is 0, you get coincident control points that lead to * divide by zero in any subsequent NewtonRaphsonRootFind() call. */ double segLength = V2DistanceBetween2Points(&d[last], &d[first]); double epsilon = 1.0e-6 * segLength; if (alpha_l < epsilon || alpha_r < epsilon) { /* fall back on standard (probably inaccurate) formula, and subdivide further if needed. */ double dist = segLength / 3.0; bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, dist), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, dist), &bezCurve[2]); return (bezCurve); } /* First and last control points of the Bezier curve are */ /* positioned exactly at the first and last data points */ /* Control points 1 and 2 are positioned an alpha distance out */ /* on the tangent vectors, left and right, respectively */ bezCurve[0] = d[first]; bezCurve[3] = d[last]; V2Add(&bezCurve[0], V2Scale(&tHat1, alpha_l), &bezCurve[1]); V2Add(&bezCurve[3], V2Scale(&tHat2, alpha_r), &bezCurve[2]); return (bezCurve); }
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
/* * GenerateBezier : * Use least-squares method to find Bezier control points for region. * */ QPointF* GenerateBezier(const QList<QPointF> &points, int first, int last, qreal *uPrime, FitVector tHat1, FitVector tHat2) { int i; int nPts; /* Number of pts in sub-curve */ qreal C[2][2]; /* Matrix C */ qreal X[2]; /* Matrix X */ qreal det_C0_C1, /* Determinants of matrices */ det_C0_X, det_X_C1; qreal alpha_l, /* Alpha values, left and right */ alpha_r; FitVector tmp; /* Utility variable */ QPointF *curve; curve = new QPointF[4]; nPts = last - first + 1; /* Precomputed rhs for eqn */ // FitVector A[nPts][2] QVector< QVector<FitVector> > A(nPts, QVector<FitVector>(2)); /* Compute the A's */ for (i = 0; i < nPts; ++i) { FitVector v1, v2; v1 = tHat1; v2 = tHat2; v1.scale(B1(uPrime[i])); v2.scale(B2(uPrime[i])); A[i][0] = v1; A[i][1] = v2; } /* Create the C and X matrices */ C[0][0] = 0.0; C[0][1] = 0.0; C[1][0] = 0.0; C[1][1] = 0.0; X[0] = 0.0; X[1] = 0.0; for (i = 0; i < nPts; ++i) { C[0][0] += (A[i][0]).dot(A[i][0]); C[0][1] += A[i][0].dot(A[i][1]); /* C[1][0] += V2Dot(&A[i][0], &A[i][1]);*/ C[1][0] = C[0][1]; C[1][1] += A[i][1].dot(A[i][1]); FitVector vfirstp1(points.at(first + i)); FitVector vfirst(points.at(first)); FitVector vlast(points.at(last)); tmp = VectorSub(vfirstp1, VectorAdd( VectorScale(vfirst, B0(uPrime[i])), VectorAdd( VectorScale(vfirst, B1(uPrime[i])), VectorAdd( VectorScale(vlast, B2(uPrime[i])), VectorScale(vlast, B3(uPrime[i])))))); X[0] += A[i][0].dot(tmp); X[1] += A[i][1].dot(tmp); } /* Compute the determinants of C and X */ det_C0_C1 = C[0][0] * C[1][1] - C[1][0] * C[0][1]; det_C0_X = C[0][0] * X[1] - C[0][1] * X[0]; det_X_C1 = X[0] * C[1][1] - X[1] * C[0][1]; /* Finally, derive alpha values */ if (qFuzzyCompare(det_C0_C1, qreal(0.0))) { det_C0_C1 = (C[0][0] * C[1][1]) * 10e-12; if (qFuzzyCompare(det_C0_C1, qreal(0.0))) { det_C0_C1 = Zero; } } alpha_l = det_X_C1 / det_C0_C1; alpha_r = det_C0_X / det_C0_C1; /* If alpha negative, use the Wu/Barsky heuristic (see text) */ /* (if alpha is 0, you get coincident control points that lead to * divide by zero in any subsequent NewtonRaphsonRootFind() call. */ if (alpha_l < 1.0e-6 || alpha_r < 1.0e-6) { qreal dist = distance(points.at(last), points.at(first)) / 3.0; curve[0] = points.at(first); curve[3] = points.at(last); tHat1.scale(dist); tHat2.scale(dist); curve[1] = tHat1 + curve[0]; curve[2] = tHat2 + curve[3]; return curve; } /* First and last control points of the Bezier curve are */ /* positioned exactly at the first and last data points */ /* Control points 1 and 2 are positioned an alpha distance out */ /* on the tangent vectors, left and right, respectively */ curve[0] = points.at(first); curve[3] = points.at(last); tHat1.scale(alpha_l); tHat2.scale(alpha_r); curve[1] = tHat1 + curve[0]; curve[2] = tHat2 + curve[3]; return (curve); }
inline void GemmTTA ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTA expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC > B1_STAR_MC(g); DistMatrix<T,MR, STAR> D1_MR_STAR(g); DistMatrix<T,MR, MC > D1_MR_MC(g); DistMatrix<T> D1(g); B1_STAR_MC.AlignWith( A ); D1_MR_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionRight( C, CL, CR, 0 ); while( BB.Height() > 0 ) { LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); D1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), D1_MR_STAR ); //--------------------------------------------------------------------// B1_STAR_MC = B1; // B1[*,MC] <- B1[MC,MR] // D1[MR,*] := alpha (A[MC,MR])^T (B1[*,MC])^T // = alpha (A^T)[MR,MC] (B1^T)[MC,*] LocalGemm ( orientationOfA, orientationOfB, alpha, A, B1_STAR_MC, T(0), D1_MR_STAR ); // C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols D1_MR_MC.SumScatterFrom( D1_MR_STAR ); D1 = D1_MR_MC; Axpy( T(1), D1, C1 ); //--------------------------------------------------------------------// D1.FreeAlignments(); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNDot: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } B1_VC_STAR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); Zeros( C11.Height(), C11.Width(), C11_STAR_STAR ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, T(0), C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } A1_STAR_VR.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC,STAR> A1_MC_STAR(g); DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); A1_MC_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; B1Trans_MR_STAR.TransposeFrom( B1 ); // C[MC,MR] += alpha A1[MC,*] (B1^T[MR,*])^T // = alpha A1[MC,*] B1[*,MR] LocalGemm ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionDown( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLLA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T,MC,STAR> B1_MC_STAR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T> Z1(g); DistMatrix<T,MC,STAR> Z1_MC_STAR(g); DistMatrix<T,MR,STAR> Z1_MR_STAR(g); DistMatrix<T,MR,MC > Z1_MR_MC(g); B1_MC_STAR.AlignWith( A ); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); Z1_MC_STAR.AlignWith( A ); Z1_MR_STAR.AlignWith( A ); Scale( beta, C ); LockedPartitionRight ( B, BL, BR, 0 ); PartitionRight ( C, CL, CR, 0 ); while( CL.Width() < C.Width() ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Z1.AlignWith( C1 ); Zeros( C1.Height(), C1.Width(), Z1_MC_STAR ); Zeros( C1.Height(), C1.Width(), Z1_MR_STAR ); //--------------------------------------------------------------------// B1_MC_STAR = B1; B1_VR_STAR = B1_MC_STAR; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); LocalSymmetricAccumulateLL ( TRANSPOSE, alpha, A, B1_MC_STAR, B1Trans_STAR_MR, Z1_MC_STAR, Z1_MR_STAR ); Z1_MR_MC.SumScatterFrom( Z1_MR_STAR ); Z1 = Z1_MR_MC; Z1.SumScatterUpdate( T(1), Z1_MC_STAR ); Axpy( T(1), Z1, C1 ); //--------------------------------------------------------------------// Z1.FreeAlignments(); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Her2kLN ( T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::Her2kLN"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() || B.Height() != C.Height() || B.Height() != C.Width() || A.Width() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal Her2kLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T,MC,MR> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MC, STAR> B1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Adj_STAR_MR(g); DistMatrix<T,STAR,MR > B1Adj_STAR_MR(g); A1_MC_STAR.AlignWith( C ); B1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); B1_VR_STAR.AlignWith( C ); A1Adj_STAR_MR.AlignWith( C ); B1Adj_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Adj_STAR_MR.AdjointFrom( A1_VR_STAR ); B1_VR_STAR = B1_MC_STAR = B1; B1Adj_STAR_MR.AdjointFrom( B1_VR_STAR ); LocalTrr2k ( LOWER, alpha, A1_MC_STAR, B1Adj_STAR_MR, B1_MC_STAR, A1Adj_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void Syr2kUT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kUT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kUT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, UPPER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( UPPER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void GemmTTC ( Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTTC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL || orientationOfB == NORMAL ) throw std::logic_error ("GemmTTC expects A and B to be (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Height() != C.Width() || A.Height() != B.Width() ) { std::ostringstream msg; msg << "Nonconformal GemmTTC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( C ); B1_VR_STAR.AlignWith( C ); B1AdjOrTrans_STAR_MR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; B1_VR_STAR = B1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); // C[MC,MR] += alpha (A1[*,MC])^[T/H] (B1[MR,*])^[T/H] // = alpha (A1^[T/H])[MC,*] (B1^[T/H])[*,MR] LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void SymmLLC ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::SymmLLC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> ATL(g), ATR(g), A00(g), A01(g), A02(g), AColPan(g), ABL(g), ABR(g), A10(g), A11(g), A12(g), ARowPan(g), A20(g), A21(g), A22(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CAbove(g), CB(g), C1(g), CBelow(g), C2(g); // Temporary distributions DistMatrix<T,MC, STAR> AColPan_MC_STAR(g); DistMatrix<T,STAR,MC > ARowPan_STAR_MC(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); B1Trans_MR_STAR.AlignWith( C ); // Start the algorithm Scale( beta, C ); LockedPartitionDownDiagonal ( A, ATL, ATR, ABL, ABR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( CB.Height() > 0 ) { LockedRepartitionDownDiagonal ( ATL, /**/ ATR, A00, /**/ A01, A02, /*************/ /******************/ /**/ A10, /**/ A11, A12, ABL, /**/ ABR, A20, /**/ A21, A22 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedView1x2( ARowPan, A10, A11 ); LockedView2x1 ( AColPan, A11, A21 ); View2x1 ( CAbove, C0, C1 ); View2x1 ( CBelow, C1, C2 ); AColPan_MC_STAR.AlignWith( CBelow ); ARowPan_STAR_MC.AlignWith( CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR = AColPan; ARowPan_STAR_MC = ARowPan; MakeTrapezoidal( LEFT, LOWER, 0, AColPan_MC_STAR ); MakeTrapezoidal( RIGHT, LOWER, -1, ARowPan_STAR_MC ); B1Trans_MR_STAR.TransposeFrom( B1 ); LocalGemm ( NORMAL, TRANSPOSE, alpha, AColPan_MC_STAR, B1Trans_MR_STAR, T(1), CBelow ); LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, ARowPan_STAR_MC, B1Trans_MR_STAR, T(1), CAbove ); //--------------------------------------------------------------------// AColPan_MC_STAR.FreeAlignments(); ARowPan_STAR_MC.FreeAlignments(); SlideLockedPartitionDownDiagonal ( ATL, /**/ ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /*************/ /******************/ ABL, /**/ ABR, A20, A21, /**/ A22 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } #ifndef RELEASE PopCallStack(); #endif }
inline void GemmNNA ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::GemmNNA"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmNNA: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g); DistMatrix<T,MC,STAR> D1_MC_STAR(g); B1_VR_STAR.AlignWith( A ); B1Trans_STAR_MR.AlignWith( A ); D1_MC_STAR.AlignWith( A ); // Start the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); Zeros( C1.Height(), C1.Width(), D1_MC_STAR ); //--------------------------------------------------------------------// B1_VR_STAR = B1; B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR ); // D1[MC,*] := alpha A[MC,MR] B1[MR,*] LocalGemm ( NORMAL, TRANSPOSE, alpha, A, B1Trans_STAR_MR, T(0), D1_MC_STAR ); // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows C1.SumScatterUpdate( T(1), D1_MC_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } #ifndef RELEASE PopCallStack(); #endif }