void dFactorLDLT (dReal *A, dReal *d, int n, int nskip1) { int i,j; dReal sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22; if (n < 1) return; for (i=0; i<=n-2; i += 2) { /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */ dSolveL1_2 (A,A+i*nskip1,i,nskip1); /* scale the elements in a 2 x i block at A(i,0), and also */ /* compute Z = the outer product matrix that we'll need. */ Z11 = 0; Z21 = 0; Z22 = 0; ell = A+i*nskip1; dee = d; for (j=i-6; j >= 0; j -= 6) { p1 = ell[0]; p2 = ell[nskip1]; dd = dee[0]; q1 = p1*dd; q2 = p2*dd; ell[0] = q1; ell[nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[1]; p2 = ell[1+nskip1]; dd = dee[1]; q1 = p1*dd; q2 = p2*dd; ell[1] = q1; ell[1+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[2]; p2 = ell[2+nskip1]; dd = dee[2]; q1 = p1*dd; q2 = p2*dd; ell[2] = q1; ell[2+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[3]; p2 = ell[3+nskip1]; dd = dee[3]; q1 = p1*dd; q2 = p2*dd; ell[3] = q1; ell[3+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[4]; p2 = ell[4+nskip1]; dd = dee[4]; q1 = p1*dd; q2 = p2*dd; ell[4] = q1; ell[4+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[5]; p2 = ell[5+nskip1]; dd = dee[5]; q1 = p1*dd; q2 = p2*dd; ell[5] = q1; ell[5+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; ell += 6; dee += 6; } /* compute left-over iterations */ j += 6; for (; j > 0; j--) { p1 = ell[0]; p2 = ell[nskip1]; dd = dee[0]; q1 = p1*dd; q2 = p2*dd; ell[0] = q1; ell[nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; ell++; dee++; } /* solve for diagonal 2 x 2 block at A(i,i) */ Z11 = ell[0] - Z11; Z21 = ell[nskip1] - Z21; Z22 = ell[1+nskip1] - Z22; dee = d + i; /* factorize 2 x 2 block Z,dee */ /* factorize row 1 */ dee[0] = dRecip(Z11); /* factorize row 2 */ sum = 0; q1 = Z21; q2 = q1 * dee[0]; Z21 = q2; sum += q1*q2; dee[1] = dRecip(Z22 - sum); /* done factorizing 2 x 2 block */ ell[nskip1] = Z21; } /* compute the (less than 2) rows at the bottom */ switch (n-i) { case 0: break; case 1: dSolveL1_1 (A,A+i*nskip1,i,nskip1); /* scale the elements in a 1 x i block at A(i,0), and also */ /* compute Z = the outer product matrix that we'll need. */ Z11 = 0; ell = A+i*nskip1; dee = d; for (j=i-6; j >= 0; j -= 6) { p1 = ell[0]; dd = dee[0]; q1 = p1*dd; ell[0] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[1]; dd = dee[1]; q1 = p1*dd; ell[1] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[2]; dd = dee[2]; q1 = p1*dd; ell[2] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[3]; dd = dee[3]; q1 = p1*dd; ell[3] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[4]; dd = dee[4]; q1 = p1*dd; ell[4] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[5]; dd = dee[5]; q1 = p1*dd; ell[5] = q1; m11 = p1*q1; Z11 += m11; ell += 6; dee += 6; } /* compute left-over iterations */ j += 6; for (; j > 0; j--) { p1 = ell[0]; dd = dee[0]; q1 = p1*dd; ell[0] = q1; m11 = p1*q1; Z11 += m11; ell++; dee++; } /* solve for diagonal 1 x 1 block at A(i,i) */ Z11 = ell[0] - Z11; dee = d + i; /* factorize 1 x 1 block Z,dee */ /* factorize row 1 */ dee[0] = dRecip(Z11); /* done factorizing 1 x 1 block */ break; default: *((char*)0)=0; /* this should never happen! */ } }
void _dFactorLDLT (dReal *A, dReal *d, int n, int nskip1) { int i,j; dReal sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22; if (n < 1) return; for (i=0; i<=n-2; i += 2) { /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */ dSolveL1_2 (A,A+i*nskip1,i,nskip1); /* scale the elements in a 2 x i block at A(i,0), and also */ /* compute Z = the outer product matrix that we'll need. */ Z11 = 0; Z21 = 0; Z22 = 0; ell = A+i*nskip1; dee = d; #pragma kaapi loop \ reduction(reduce_sum:Z11, reduce_sum:Z21, reduce_sum:Z22) for (j=i-6; j >= 0; j -= 6, ell += 6, dee += 6) { dReal _Z11 = 0; dReal _Z21 = 0; dReal _Z22 = 0; p1 = ell[0]; p2 = ell[nskip1]; dd = dee[0]; q1 = p1*dd; q2 = p2*dd; ell[0] = q1; ell[nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[1]; p2 = ell[1+nskip1]; dd = dee[1]; q1 = p1*dd; q2 = p2*dd; ell[1] = q1; ell[1+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[2]; p2 = ell[2+nskip1]; dd = dee[2]; q1 = p1*dd; q2 = p2*dd; ell[2] = q1; ell[2+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[3]; p2 = ell[3+nskip1]; dd = dee[3]; q1 = p1*dd; q2 = p2*dd; ell[3] = q1; ell[3+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[4]; p2 = ell[4+nskip1]; dd = dee[4]; q1 = p1*dd; q2 = p2*dd; ell[4] = q1; ell[4+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; p1 = ell[5]; p2 = ell[5+nskip1]; dd = dee[5]; q1 = p1*dd; q2 = p2*dd; ell[5] = q1; ell[5+nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; } /* xkaapi does not yet update affine variables */ fixme_skip: if ((i - 6) >= 0) { static const int step = 6; const int range_size = (i - 6 + 1) - 0; int niter = range_size / step; if (range_size % step) niter += 1; j = (i - 6) - niter * 6; ell = (A + i * nskip1) + niter * 6; dee = d + niter * 6; } else { j = i - 6; } /* compute left-over iterations */ j += 6; for (; j > 0; j--) { p1 = ell[0]; p2 = ell[nskip1]; dd = dee[0]; q1 = p1*dd; q2 = p2*dd; ell[0] = q1; ell[nskip1] = q2; m11 = p1*q1; m21 = p2*q1; m22 = p2*q2; Z11 += m11; Z21 += m21; Z22 += m22; ell++; dee++; } /* solve for diagonal 2 x 2 block at A(i,i) */ Z11 = ell[0] - Z11; Z21 = ell[nskip1] - Z21; Z22 = ell[1+nskip1] - Z22; dee = d + i; /* factorize 2 x 2 block Z,dee */ /* factorize row 1 */ dee[0] = dRecip(Z11); /* factorize row 2 */ sum = 0; q1 = Z21; q2 = q1 * dee[0]; Z21 = q2; sum += q1*q2; dee[1] = dRecip(Z22 - sum); /* done factorizing 2 x 2 block */ ell[nskip1] = Z21; } /* compute the (less than 2) rows at the bottom */ switch (n-i) { case 0: break; case 1: dSolveL1_1 (A,A+i*nskip1,i,nskip1); /* scale the elements in a 1 x i block at A(i,0), and also */ /* compute Z = the outer product matrix that we'll need. */ Z11 = 0; ell = A+i*nskip1; dee = d; for (j=i-6; j >= 0; j -= 6) { p1 = ell[0]; dd = dee[0]; q1 = p1*dd; ell[0] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[1]; dd = dee[1]; q1 = p1*dd; ell[1] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[2]; dd = dee[2]; q1 = p1*dd; ell[2] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[3]; dd = dee[3]; q1 = p1*dd; ell[3] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[4]; dd = dee[4]; q1 = p1*dd; ell[4] = q1; m11 = p1*q1; Z11 += m11; p1 = ell[5]; dd = dee[5]; q1 = p1*dd; ell[5] = q1; m11 = p1*q1; Z11 += m11; ell += 6; dee += 6; } /* compute left-over iterations */ j += 6; for (; j > 0; j--) { p1 = ell[0]; dd = dee[0]; q1 = p1*dd; ell[0] = q1; m11 = p1*q1; Z11 += m11; ell++; dee++; } /* solve for diagonal 1 x 1 block at A(i,i) */ Z11 = ell[0] - Z11; dee = d + i; /* factorize 1 x 1 block Z,dee */ /* factorize row 1 */ dee[0] = dRecip(Z11); /* done factorizing 1 x 1 block */ break; default: *((char*)0)=0; /* this should never happen! */ } }