/* ************************************************************ PROCEDURE elqxq - Compute (I+c*c'/beta) * [X1, X2 * (I+c*c'/beta)]; only tril is computed, and only tril(X2) is used. INPUT beta - Householder scalar coefficient c - length m-n1 elementary Householder vector m - length of columns in X. m >= n1. n1 - number of X1-columns; the order of Householder reflection will be m-n1. UPDATED x - (m x m)-n1. On output, Xnew = (I+c*c'/beta)* [X1, X2*(I+c*c'/beta)] only tril is computed. We treat x as a (m-n1)*m matrix, but we need to add m (instead of m-n1) to get to the next column. WORK y - length m working vector, for storing c'*[X1, X2]. ************************************************************ */ void elqxq(double *x, const double beta, const double *c, const int n1, const int m, double *y) { int j,n2; double *xj, *y2, *x2; double alpha; n2 = m - n1; /* order of x2 */ y2 = y + n1; /* ------------------------------------------------------------ Compute y1 = c'*X1 ------------------------------------------------------------ */ for(j = 0, xj = x; j < n1; j++, xj += m) y[j] = realdot(c, xj, n2); x2 = xj; /* ------------------------------------------------------------ Compute y2 = c'*tril(X2); y2 += tril(X2,-1)*c, SO THAT y2 = c'*X2SYM. ------------------------------------------------------------ */ for(j = 0; j < n2; j++, xj += m+1) y2[j] = realdot(c+j, xj, n2-j); for(j = 1, xj = x2+1; j < n2; j++, xj += m+1) addscalarmul(y2+j, c[j-1], xj, n2-j); /* ------------------------------------------------------------ Below-diag block: let X1 += c*y1' / beta ------------------------------------------------------------ */ for(j = 0, xj = x; j < n1; j++, xj += m) addscalarmul(xj, y[j] / beta, c, n2); /* ------------------------------------------------------------ Lower-Right block: X2 += c*((y2'*c/beta) * c' + y2')/beta + y2*c'/beta ------------------------------------------------------------ */ alpha = realdot(y2, c, n2) / beta; for(j = 0, xj = x2; j < n2; j++, xj += m+1){ addscalarmul(xj, (alpha * c[j] + y2[j])/beta, c+j, n2-j); addscalarmul(xj, c[j] / beta, y2+j, n2-j); } }
/* ************************************************************ PROCEDURE qlmul : LORENTZ SCALE z = D(x)y (full version) z=D(x)y = [x'*y / sqrt(2); mu * x(2:n) + rdetx * y(2:n)], where mu = (z(1)+rdetx*y1) / (x(1)+ sqrt(2) * rdetx) INPUT x,y - full n x 1 rdetx - sqrt(det(x)) n - order of x,y,z. OUTPUT z - full n x 1. Let z := D(x)y. ************************************************************ */ void qlmul(double *z,const double *x,const double *y, const double rdetx,const int n) { double mu; mu = qscale(z, x,y,rdetx,n); addscalarmul(z,mu,x,n); }
/* ************************************************************ PROCEDURE fwsolve -- Solve ynew from L*y = yold, where L is lower-triangular. INPUT L - sparse lower triangular matrix xsuper - starting column in L for each (dense) supernode. nsuper - number of super nodes UPDATED y - full vector, on input y = rhs, on output y = L\rhs. WORK fwork - length max(collen[i] - superlen[i]) <= m-1, where collen[i] := L.jc[xsuper[i]+1]-L.jc[xsuper[i]] and superlen[i] := xsuper[i+1]-xsuper[i]. ************************************************************ */ void fwsolve(double *y, const int *Ljc, const int *Lir, const double *Lpr, const int *xsuper, const int nsuper, double *fwork) { int jsup,i,j,inz,jnnz; double yi,yj; /* ------------------------------------------------------------ For each supernode jsup: ------------------------------------------------------------ */ j = xsuper[0]; /* 1st col of current snode (j=0)*/ inz = Ljc[0]; /* 1st nonzero in L (inz = 0) */ for(jsup = 1; jsup <= nsuper; jsup++){ /* ------------------------------------------------------------ The first equation, 1*y=b(j), yields y(j) = b(j). ------------------------------------------------------------ */ mxAssert(inz == Ljc[j],""); yj = y[j++]; ++inz; /* jump over diagonal entry */ if(j >= xsuper[jsup]) /* ------------------------------------------------------------ If supernode is singleton, then simply set y(j+1:m)-=yj*L(j+1:m,j) ------------------------------------------------------------ */ for(; inz < Ljc[j]; inz++) y[Lir[inz]] -= yj * Lpr[inz]; else{ /* ------------------------------------------------------------ Supernode contains multiple subnodes: Remember (i,yi) = 1st subnode, then perform dense forward solve within current supernode. ------------------------------------------------------------ */ i = j; yi = yj; do{ subscalarmul(y+j, yj, Lpr+inz, xsuper[jsup] - j); inz = Ljc[j]; yj = y[j++]; ++inz; /* jump over diagonal entry */ } while(j < xsuper[jsup]); jnnz = Ljc[j] - inz; /* ------------------------------------------------------------ jnnz = number of later entries that are influenced by this supernode. Compute the update in the array fwork(jnnz) ------------------------------------------------------------ */ if(jnnz > 0){ scalarmul(fwork, yj, Lpr+inz,jnnz); while(i < j){ addscalarmul(fwork,yi,Lpr+Ljc[i]-jnnz,jnnz); yi = y[i++]; } /* ------------------------------------------------------------ Update y with fwork at the specified sparse locations ------------------------------------------------------------ */ for(i = 0; i < jnnz; i++) y[Lir[inz++]] -= fwork[i]; } } } }
/* ************************************************************ PROCEDURE qrfac - QR factorization for nxn matrix. INPUT n - order of matrix to be factored UPDATED u - Full nxn. On input, u is matrix to be factored. On output, triu(u) = uppertriangular factor; tril(u,-1) = undefined. OUTPUT beta - length n vector. kth Householder reflection is Qk = I-qk*qk' / beta[k], where qk = q(k:n-1,k). q - n x (n-1) matrix; each column is a Householder reflection. ************************************************************ */ void qrfac(double *beta, double *q, double *u, const mwIndex n) { mwIndex i,k, kcol, nmink, icol; double dk, betak, qkui, qkk; for(k = 0, kcol = 0; k < n-1; k++, kcol += n+1){ /* ------------------------------------------------------------ kth Householder reflection: dk = sign(xkk) * ||xk(k:n)||, qk(k+1:n) = x(k+1:n); qkk = xkk+dk, betak = dk*qkk, ukk = -dk. ------------------------------------------------------------ */ qkk = u[kcol]; dk = SIGN(qkk) * sqrt(realssqr(u+kcol,n-k)); memcpy(q + kcol+1, u+kcol+1, (n-k-1) * sizeof(double)); qkk += dk; betak = dk * qkk; q[kcol] = qkk; if(betak == 0.0) /* If xk is all-0 then set beta = 1. */ betak = 1.0; beta[k] = betak; u[kcol] = -dk; /* ------------------------------------------------------------ Reflect columns k+1:n-1, i.e. xi -= (qk'*xi / betak) * qk, where xi = x(k:n-1, i). ------------------------------------------------------------ */ nmink = n-k; betak = -betak; for(i = k + 1, icol = kcol + n; i < n; i++, icol += n){ qkui = realdot(q+kcol, u+icol, nmink); addscalarmul(u+icol, qkui/betak, q+kcol, nmink); } } }
/* ************************************************************ PROCEDURE prpielqxq - Compute (I+c*c'/beta) * [X1, X2 * (I+c*c'/beta)]; only tril is computed, and only tril(X2) is used. INPUT beta - Householder scalar coefficient (real length m) c,cpi - length m-n1 elementary Householder vector m - length of columns in X. m >= n1. n1 - number of X1-columns; the order of Householder reflection will be m-n1. UPDATED x,xpi - 2*((m x m)-n1). On output, Xnew = (I+c*c'/beta)* [X1, X2*(I+c*c'/beta)] only tril is computed. We treat x as a (m-n1)*m matrix, but we need to add m (instead of m-n1) to get to the next column. WORK y - length 2*m working vector, for storing c'*[X1, X2]. ************************************************************ */ void prpielqxq(double *x, double *xpi, const double beta, const double *c, const double *cpi, const int n1, const int m, double *y) { int j,n2; double *xj,*xjpi, *y2, *x2, *x2pi, *ypi, *y2pi; double alpha; n2 = m - n1; /* order of x2 */ /* ------------------------------------------------------------ Partition y into y(n1), y2(n2); ypi(n1), ypi(n2). ------------------------------------------------------------ */ ypi = y + m; y2 = y + n1; y2pi = ypi + n1; /* ------------------------------------------------------------ Compute y1 = c'*X1. NOTE: y1 is 1 x n1 row-vector. ------------------------------------------------------------ */ for(j = 0, xj = x, xjpi = xpi; j < n1; j++, xj += m, xjpi += m){ y[j] = realdot(c, xj, n2) + realdot(cpi, xjpi, n2); ypi[j] = realdot(c, xjpi, n2) - realdot(cpi, xj, n2); } x2 = xj; x2pi = xjpi; /* ------------------------------------------------------------ Compute y2 = c'*tril(X2) ------------------------------------------------------------ */ for(j = 0; j < n2; j++, xj += m+1, xjpi += m+1){ /* y2(j) = c(j:n2)'*x(j:n2,j) */ y2[j] = realdot(c+j, xj, n2-j) + realdot(cpi+j, xjpi, n2-j); y2pi[j] = realdot(c+j, xjpi, n2-j) - realdot(cpi+j, xj, n2-j); } /* ------------------------------------------------------------ Let y2 += (tril(X2,-1)*c)' = c'*triu(X2,1), SO THAT y2 = c'*X2, with X2 symmetric. NOTE: y2 = 1 x n2 row-vector. ------------------------------------------------------------ */ for(j = 1, xj = x2+1, xjpi = x2pi+1; j < n2; j++, xj += m+1, xjpi += m+1){ /* y2(j+1:n2) += conj(x(j+1:n2,j) * c(j)) */ addscalarmul(y2+j, c[j-1], xj, n2-j); /* RE */ addscalarmul(y2+j, -cpi[j-1], xjpi, n2-j); addscalarmul(y2pi+j, -cpi[j-1], xj, n2-j); /* -IM, i.e. conj */ addscalarmul(y2pi+j, -c[j-1], xjpi, n2-j); } /* ------------------------------------------------------------ Below-diag block: let X1 += c*y1 / beta, where y1 = c'*X1. NOTE: y1 is 1 x n1 row-vector. This completes X1_new = (I+c*c'/beta) * X1_old. ------------------------------------------------------------ */ for(j = 0, xj = x, xjpi = xpi; j < n1; j++, xj += m, xjpi += m){ /* x(:,j) += c * y1(j) / beta */ addscalarmul(xj, y[j] / beta, c, n2); /* RE */ addscalarmul(xj, -ypi[j] / beta, cpi, n2); addscalarmul(xjpi, y[j] / beta, cpi, n2); /* IM */ addscalarmul(xjpi, ypi[j] / beta, c, n2); } /* ------------------------------------------------------------ Lower-Right block: X2 += c*((y2*c/beta) * c' + y2)/beta + y2'*c'/beta where y2 = c'*X2. This completes X2new = (I+c*c'/beta) * X2 * (I+c*c'/beta) NOTE: since X2 = X2', we have y2*c = c'*X2*c is real. ------------------------------------------------------------ */ /* alpha = y2 * c / beta, which is real.*/ alpha = (realdot(y2, c, n2) - realdot(y2pi, cpi, n2)) / beta; for(j = 0, xj = x2, xjpi = x2pi; j < n2; j++, xj += m+1, xjpi += m+1){ /* x2(j:n2,j) += c(j:n2) * (alpha*conj(c(j)) + y2(j))/beta */ addscalarmul(xj, (alpha * c[j] + y2[j])/beta, c+j, n2-j); addscalarmul(xj, (alpha * cpi[j] - y2pi[j])/beta, cpi+j, n2-j); addscalarmul(xjpi, (alpha * c[j] + y2[j])/beta, cpi+j, n2-j); addscalarmul(xjpi, (y2pi[j] - alpha * cpi[j])/beta, c+j, n2-j); /* x2(j:n2,j) += conj(c(j)*y2(j:n2)) / beta */ addscalarmul(xj, c[j] / beta, y2+j, n2-j); /* RE */ addscalarmul(xj, -cpi[j] / beta, y2pi+j, n2-j); addscalarmul(xjpi, -c[j] / beta, y2pi+j, n2-j); /* -IM, i.e. conj */ addscalarmul(xjpi, -cpi[j] / beta, y2+j, n2-j); } }