/* ************************************************************ PROCEDURE elqxq - Compute (I+c*c'/beta) * [X1, X2 * (I+c*c'/beta)]; only tril is computed, and only tril(X2) is used. INPUT beta - Householder scalar coefficient c - length m-n1 elementary Householder vector m - length of columns in X. m >= n1. n1 - number of X1-columns; the order of Householder reflection will be m-n1. UPDATED x - (m x m)-n1. On output, Xnew = (I+c*c'/beta)* [X1, X2*(I+c*c'/beta)] only tril is computed. We treat x as a (m-n1)*m matrix, but we need to add m (instead of m-n1) to get to the next column. WORK y - length m working vector, for storing c'*[X1, X2]. ************************************************************ */ void elqxq(double *x, const double beta, const double *c, const int n1, const int m, double *y) { int j,n2; double *xj, *y2, *x2; double alpha; n2 = m - n1; /* order of x2 */ y2 = y + n1; /* ------------------------------------------------------------ Compute y1 = c'*X1 ------------------------------------------------------------ */ for(j = 0, xj = x; j < n1; j++, xj += m) y[j] = realdot(c, xj, n2); x2 = xj; /* ------------------------------------------------------------ Compute y2 = c'*tril(X2); y2 += tril(X2,-1)*c, SO THAT y2 = c'*X2SYM. ------------------------------------------------------------ */ for(j = 0; j < n2; j++, xj += m+1) y2[j] = realdot(c+j, xj, n2-j); for(j = 1, xj = x2+1; j < n2; j++, xj += m+1) addscalarmul(y2+j, c[j-1], xj, n2-j); /* ------------------------------------------------------------ Below-diag block: let X1 += c*y1' / beta ------------------------------------------------------------ */ for(j = 0, xj = x; j < n1; j++, xj += m) addscalarmul(xj, y[j] / beta, c, n2); /* ------------------------------------------------------------ Lower-Right block: X2 += c*((y2'*c/beta) * c' + y2')/beta + y2*c'/beta ------------------------------------------------------------ */ alpha = realdot(y2, c, n2) / beta; for(j = 0, xj = x2; j < n2; j++, xj += m+1){ addscalarmul(xj, (alpha * c[j] + y2[j])/beta, c+j, n2-j); addscalarmul(xj, c[j] / beta, y2+j, n2-j); } }
/* ************************************************************ PROCEDURE bwsolve -- Solve y from L'*y = b, where L is lower-triangular. INPUT Ljc, Lir, Lpr - sparse lower triangular matrix xsuper - starting column in L for each (dense) supernode. nsuper - number of super nodes UPDATED y - full xsuper[nsuper]-vector, yOUTPUT = L' \ yINPUT. WORKING ARRAY fwork - length max(collen[i] - superlen[i]) <= m-1, where collen[i] := L.jc[xsuper[i]+1]-L.jc[xsuper[i]] and superlen[i] := xsuper[i+1]-xsuper[i]. ************************************************************ */ void bwsolve(double *y, const mwIndex *Ljc, const mwIndex *Lir, const double *Lpr, const mwIndex *xsuper, const mwIndex nsuper, double *fwork) { mwIndex jsup,i,j,inz,k,jnnz; double yj; /* ------------------------------------------------------------ For each supernode jsup: ------------------------------------------------------------ */ j = xsuper[nsuper]; /* column after current snode (j=m)*/ for(jsup = nsuper; jsup > 0; jsup--){ i = j; mxAssert(j == xsuper[jsup],""); inz = Ljc[--j]; inz++; /* jump over diagonal entry */ if(j <= xsuper[jsup-1]){ /* ------------------------------------------------------------ If supernode is singleton j, then simply y[j] -= L(j+1:m,j)'*y(j+1:m) ------------------------------------------------------------ */ if(inz < Ljc[i]){ yj = Lpr[inz] * y[Lir[inz]]; for(++inz; inz < Ljc[i]; inz++) yj += Lpr[inz] * y[Lir[inz]]; y[j] -= yj; } } else{ /* ------------------------------------------------------------ For a "real" supernode: Let fwork = sparse(y(i:m)), then let y[j] -= L(i:m,j)'*fwork for all j in supernode ------------------------------------------------------------ */ for(jnnz = 0; inz < Ljc[i]; inz++) fwork[jnnz++] = y[Lir[inz]]; if(jnnz > 0) while(i > xsuper[jsup-1]){ yj = realdot(Lpr+Ljc[i]-jnnz, fwork, jnnz); mxAssert(i>0,""); y[--i] -= yj; } k = 1; do{ /* ------------------------------------------------------------ It remains to do a dense bwsolve on nodes j-1:-1:xsuper[jsup] The equation L(:,j)'*yNEW = yOLD(j), yields y(j) -= L(j+(1:k),j)'*y(j+(1:k)), k=1:i-xsuper[jsup]-1. ------------------------------------------------------------ */ mxAssert(j>0,""); --j; y[j] -= realdot(Lpr+Ljc[j]+1, y+j+1, k++); } while(j > xsuper[jsup-1]); } } }
/* ************************************************************ PROCEDURE cholnopiv - U'*U factorization for nxn matrix, without pivoting. INPUT n - order of matrix to be factored UPDATED u - Full nxn. Input: Matrix to be factored. Output: Cholesky factor, X=triu(U)'*triu(U). NOTE: tril(U,-1) is not affected by this function. RETURNS: 0 = "success", 1 = "X is NOT positive definite" ************************************************************ */ mwIndex cholnopiv(double *u,const mwIndex n) { mwIndex i,j; double uij,ujj; double *ui,*uj; if(n < 1) return 0; /* ------------------------------------------------------------ Solve the columns of U, for j=0:n-1 ------------------------------------------------------------ */ for(uj = u, j = 0; j < n; j++, uj+=n){ /* ------------------------------------------------------------ Solve "uij" from the identity uii * uij = xij - u(1:i-1,i)'*u(1:i-1,j) ------------------------------------------------------------ */ for(ui = u, i = 0, ujj = 0.0; i < j; i++, ui+=n){ uij = (uj[i] = (uj[i] - realdot(ui,uj,i)) / ui[i]); ujj += SQR(uij); } ujj = uj[j] - ujj; /* ------------------------------------------------------------ By now, "ujj" should contain the final u(j,j)^2. Check whether it is positive. If not, then X was not p.d., thus fail. ------------------------------------------------------------ */ if(ujj <= 0.0) return 1; /* X is not positive definite */ uj[j] = sqrt(ujj); } return 0; /* success */ }
/* ************************************************************ PROCEDURE selbwsolve -- Solve ynew from L'*y = yold, where L is lower-triangular and y is SPARSE. INPUT Ljc, Lir, Lpr - sparse lower triangular matrix xsuper - length nsuper+1, start of each (dense) supernode. nsuper - number of super nodes snode - length m array, mapping each node to the supernode containing it. yir - length ynnz array, listing all possible nonzeros entries in y. ynnz - number of nonzeros in y (from symbbwslv). UPDATED y - full vector, on input y = rhs, on output y = L'\rhs. only the yir(0:ynnz-1) entries are used and defined. ************************************************************ */ void selbwsolve(double *y, const mwIndex *Ljc, const mwIndex *Lir, const double *Lpr, const mwIndex *xsuper, const mwIndex nsuper, const mwIndex *snode, const mwIndex *yir, const mwIndex ynnz) { mwIndex jsup,j,inz,jnz,nk, k; double yj; if(ynnz <= 0) return; /* ------------------------------------------------------------ Backward solve on each nonzero supernode snode[yir[jnz]] (=jsup-1). ------------------------------------------------------------ */ jnz = ynnz; /* point just beyond last nonzero (super)node in y */ while(jnz > 0){ j = yir[--jnz]; /* j is last subnode to be used */ jsup = snode[j]; nk = j - xsuper[jsup]; /* nk+1 = length supernode jsup in y */ jnz -= nk; /* point just beyond prev. nonzero supernode */ for(k = 0; k <= nk; k++, j--){ /* ------------------------------------------------------------ The equation L(:,j)'*yNEW = yOLD(j), yields y(j) -= L(j+1:m,j)'*y. ------------------------------------------------------------ */ inz = Ljc[j]; inz++; /* jump over diagonal entry */ yj = realdot(Lpr+inz, y+j+1, k); /* super-nodal part */ for(inz += k; inz < Ljc[j+1]; inz++) yj += Lpr[inz] * y[Lir[inz]]; /* sparse part */ y[j] -= yj; } } }
/********************************************************** * B dense * A = nxp, B = nxp * P(i,j) = <ith column of A, jth column of B> **********************************************************/ void prod1(int m, int n, int p, double *A, mwIndex *irA, mwIndex *jcA, int isspA, double *B, double *P, mwIndex *irP, mwIndex *jcP, int *list1, int *list2, int len) { int j, k, r, t, rn, jn, jold, kstart, kend, idx, count; double tmp; jold = -1; count = 0; for (t=0; t<len; ++t) { r = list1[t]; j = list2[t]; if (j != jold) { jn = j*n; jold = j; } if (!isspA) { rn = r*n; tmp = realdot(A,rn,B,jn,n); } else { tmp = 0; kstart = jcA[r]; kend = jcA[r+1]; for (k=kstart; k<kend; ++k) { idx = irA[k]; tmp += A[k]*B[idx+jn]; } } P[count] = tmp; irP[count] = r; jcP[j+1]++; count++; } for (k=0; k<p; k++) { jcP[k+1] += jcP[k]; } return; }
/************************************************************* PROCEDURE fwsolve -- Solve y from U'*y = x. **************************************************************/ void fwsolve(double *y,const double *u,const double *x,const int n) { int k; /*----------------------------------------------- u(1:k-1,k)'*y(1:k-1) + u(k,k)*y(k) = x(k). -----------------------------------------------*/ for (k = 0; k < n; k++, u += n) y[k] = (x[k] - realdot(y,u,k)) / u[k]; }
/* ************************************************************ PROCEDURE cholpivot - U'*U factorization for nxn matrix, with column pivotting. Yields U with 1 >= \|U(i,i+1:n)\| forall i. INPUT n - order of matrix to be factored x - Full nxn. Matrix to be factored. OUTPUT u - Full nxn, Cholesky factor: X=U'*U with U(:,perm) upper triangular. perm - Column permutation for maximal stability. WORK d - length n vector; the positive diagonal diag(U(:,perm)).^2, has decreasing order. RETURNS: 0 = "success", 1 = "X is NOT positive definite" ************************************************************ */ int cholpivot(double *u,int *perm, const double *x, const int n, double *d) { int i,j,k,imax, icol; double *uk, *rowuk; double dk, uki; const double *xk; /* ------------------------------------------------------------ Initialize: d = diag(x), perm = 0:n-1. ------------------------------------------------------------ */ for(xk = x, k = 0; k < n; xk += n, k++) d[k] = xk[k]; for(j = 0; j < n; j++) perm[j] = j; /* ------------------------------------------------------------ Pivot in step k=0:n-1 on imax: ------------------------------------------------------------ */ for(k = 0; k < n; k++){ /* ------------------------------------------------------------ Let [imax,dk] = max(d(k:m)) ------------------------------------------------------------ */ dk = d[k]; imax = k; for(i = k + 1; i < n; i++) if(d[i] > dk){ imax = i; dk = d[i]; } /* ------------------------------------------------------------ k-th pivot is j=perm[imax]. ------------------------------------------------------------ */ d[imax] = d[k]; j = perm[imax]; /* original node number */ uk = u + j * n; rowuk = u + k; perm[imax] = perm[k]; perm[k] = j; /* ------------------------------------------------------------ Let uk[k] = (dk := sqrt(dk)) ------------------------------------------------------------ */ if(dk <= 0.0) return 1; /* Matrix is not positive definite */ dk = sqrt(dk); uk[k] = dk; /* ------------------------------------------------------------ Let u(k,:) = (x(k,:)-uk'*u(0:k-1,:)) / dk, then d(k+1:n) -= u(k,:).^2. ------------------------------------------------------------ */ for(i = k + 1; i < n; i++){ icol = perm[i] * n; uki = (x[icol + j] - realdot(uk, u+icol, k)) / dk; rowuk[icol] = uki; d[i] -= SQR(uki); /* d(:) -= u(k,:).^2 */ } } return 0; /* success */ }
/************************************************************* PROCEDURE lbsolve -- Solve y from U'*y = x. INPUT u - n x n full matrix with only upper-triangular entries x,n - length n vector. OUTPUT y - length n vector, y = U'\x. **************************************************************/ void lbsolve(double *y,const double *u,const double *x,const int n) { int k; /*------------------------------------------------------------ The first equation, u(:,1)'*y=x(1), yields y(1) = x(1)/u(1,1). For k=2:n, we solve u(1:k-1,k)'*y(1:k-1) + u(k,k)*y(k) = x(k). -------------------------------------------------------------*/ for (k = 0; k < n; k++, u += n) y[k] = (x[k] - realdot(y,u,k)) / u[k]; }
/* ************************************************************ PROCEDURE prpicholnopiv - Computes triu matrix U s.t. U'*U = X. U, X in SeDuMi's complex format: X = [RE X, IM X]. No pivoting. INPUT: x - full 2*(n x n), should be Hermitian. n - order of u, x. UPDATED: u,upi - Both full nxn. Input: Matrix to be factored (real, imaginary). Output: Cholesky factor, X=U'*U with U=triu(u) + i*triu(upi,1). NOTE: tril(u,-1) and tril(upi) are not affected by this function. RETURNS: 0 = "success", 1 = "X is NOT positive definite" ************************************************************ */ mwIndex prpicholnopiv(double *u, double *upi, const mwIndex n) { mwIndex i,j; double uii,uij,ujj; double *ui,*uipi, *uj, *ujpi; if(n < 1) return 0; /* ------------------------------------------------------------ Solve the columns of U, for j=0:n-1 ------------------------------------------------------------ */ for(uj=u, ujpi=upi, j = 0; j < n; j++, uj+=n, ujpi+=n){ ujj = 0.0; for(ui=u, uipi=upi, i = 0; i < j; i++, ui+=n, uipi+=n){ /* ------------------------------------------------------------ Solve "uij" from the identity uii * uij = xij - u(1:i-1,i)'*u(1:i-1,j) ------------------------------------------------------------ */ uii = ui[i]; uij = uj[i]; /* real part */ uij -= realdot(ui,uj,i) + realdot(uipi,ujpi,i); uj[i] = (uij /= uii); ujj += SQR(uij); uij = ujpi[i]; /* imaginary part */ uij += realdot(uipi,uj,i) - realdot(ui,ujpi,i); ujpi[i] = (uij /= uii); ujj += SQR(uij); } ujj = uj[j] - ujj; /* ------------------------------------------------------------ By now, "ujj" should contain the final u(j,j)^2. Check whether it is positive. If not, then X was not p.d., thus fail. ------------------------------------------------------------ */ if(ujj <= 0.0) return 1; /* X is not positive definite */ uj[j] = sqrt(ujj); } return 0; }
/* ************************************************************ PROCEDURE: triudotprod - Computes trace(X*Y), where X and Y are supposed to be symmetric; only the triu's are used. INPUT x - full n x n, only triu(x) used y - full n x n, only triu(x) used n - order RETURNS the inner product between X and Y, trace(X*Y). ************************************************************ */ double triudotprod(const double *x, const double *y, const mwIndex n) { mwIndex i, j; double z; if(n <= 0) return 0.0; z = x[0] * y[0]; for(j = 1; j < n; j++){ x += n; y += n; /* point to x(:,j) and y(:,j) */ z += 2 * realdot(x,y,j) + x[j] * y[j]; } return z; }
/* striudotprod: z = tr(X*Y), where diag(X)=0. only triu(X,1) and triu(Y,1) are used. For the imaginary part into the real inner product. */ double striudotprod(const double *x, const double *y, const mwIndex n) { mwIndex i, j; double z; if(n <= 1) return 0.0; z = 0.0; for(j = 1; j < n; j++){ x += n; y += n; /* point to x(:,j) and y(:,j) */ z += realdot(x,y,j); } return 2 * z; }
/* ************************************************************ PROCEDURE qscale : LORENTZ SCALE D(x)y = z + mu * x (full version) mu = (y1+alpha)/sqrt(2), z = rdetx * [alpha; y(2:n)], where alpha = (x(2:n)'*y(2:n)) / (x(1)+ sqrt(2) * rdetx) INPUT x,y - full n x 1 rdetx - sqrt(det(x)) n - order of x,y,z. OUTPUT z - full n x 1. Let z := rdetx * [alpha; y(2:n)]. RETURNS mu = (y1+alpha)/sqrt(2). ************************************************************ */ double qscale(double *z,const double *x,const double *y, const double rdetx,const int n) { double alpha, mu; /* ------------------------------------------------------------ alpha = (x(2:n)'*y(2:n)) / (x(1)+ sqrt(2) * rdetx) ------------------------------------------------------------ */ alpha = realdot(x+1,y+1,n-1) / (x[0] + M_SQRT2 * rdetx); /* ------------------------------------------------------------ z = rdetx * [alpha; y(2:n)]. ------------------------------------------------------------ */ z[0] = rdetx * alpha; scalarmul(z+1,rdetx,y+1,n-1); /* ------------------------------------------------------------ RETURN mu = (y1+alpha)/sqrt(2). ------------------------------------------------------------ */ return (y[0] + alpha) / M_SQRT2; }
/* ------------------------------------------------------------ PROCEDURE bwipr1 - I(dentity) P(lus) R(ank)1 forward solve. INPUT: p - length m floats beta - length n floats m, n - order of p and beta, resp. (n <= m) UPDATED: y - Length m. On input, contains the rhs. On output, the solution to L(p,beta)'*yNEW = yOLD. This updates only y(0:n-1). ------------------------------------------------------------ */ void bwipr1(double *y, const double *p, const double *beta, const mwIndex m, const mwIndex n) { mwIndex i; double yi,t; if(n < 1) /* If L = I, y remains the same */ return; /* ------------------------------------------------------------ Let t = p(n:m-1)' * y ------------------------------------------------------------ */ t = realdot(p+n, y+n, m-n); /* ------------------------------------------------------------ Solve yi for i = n-1:-1:0, from (eye(m)+triu(beta*p',1)) * yNEW = yOLD, i.e. yNEW(i) + betai*t = yOLD(i), with t := p(i+1:n-1)'*y. ------------------------------------------------------------ */ for(i = n; i > 0; i--){ yi = (y[i-1] -= t * beta[i-1]); t += p[i-1] * yi; } }
/* ************************************************************ PROCEDURE qrfac - QR factorization for nxn matrix. INPUT n - order of matrix to be factored UPDATED u - Full nxn. On input, u is matrix to be factored. On output, triu(u) = uppertriangular factor; tril(u,-1) = undefined. OUTPUT beta - length n vector. kth Householder reflection is Qk = I-qk*qk' / beta[k], where qk = q(k:n-1,k). q - n x (n-1) matrix; each column is a Householder reflection. ************************************************************ */ void qrfac(double *beta, double *q, double *u, const mwIndex n) { mwIndex i,k, kcol, nmink, icol; double dk, betak, qkui, qkk; for(k = 0, kcol = 0; k < n-1; k++, kcol += n+1){ /* ------------------------------------------------------------ kth Householder reflection: dk = sign(xkk) * ||xk(k:n)||, qk(k+1:n) = x(k+1:n); qkk = xkk+dk, betak = dk*qkk, ukk = -dk. ------------------------------------------------------------ */ qkk = u[kcol]; dk = SIGN(qkk) * sqrt(realssqr(u+kcol,n-k)); memcpy(q + kcol+1, u+kcol+1, (n-k-1) * sizeof(double)); qkk += dk; betak = dk * qkk; q[kcol] = qkk; if(betak == 0.0) /* If xk is all-0 then set beta = 1. */ betak = 1.0; beta[k] = betak; u[kcol] = -dk; /* ------------------------------------------------------------ Reflect columns k+1:n-1, i.e. xi -= (qk'*xi / betak) * qk, where xi = x(k:n-1, i). ------------------------------------------------------------ */ nmink = n-k; betak = -betak; for(i = k + 1, icol = kcol + n; i < n; i++, icol += n){ qkui = realdot(q+kcol, u+icol, nmink); addscalarmul(u+icol, qkui/betak, q+kcol, nmink); } } }
/********************************************************** * B sparse. * A = nxm, B = nxp * P(i,j) = <ith column of A, jth column of B> **********************************************************/ void prod2(int m, int n, int p, double *A, mwIndex *irA, mwIndex *jcA, int isspA, double *B, mwIndex *irB, mwIndex *jcB, int isspB, double *P, mwIndex *irP, mwIndex *jcP, double *Btmp, int *list1, int *list2, int len) { int j, k, r, t, rn, jn, jold, kstart, kend, idx, count; double tmp; jold = -1; count = 0; for (t=0; t<len; ++t) { r = list1[t]; j = list2[t]; if (j != jold) { jn = j*n; /***** copy j-th column of sparse B to Btmp *****/ for (k=0; k<n; ++k) { Btmp[k] = 0; } kstart = jcB[j]; kend = jcB[j+1]; for (k=kstart; k<kend; ++k) { idx = irB[k]; Btmp[idx] = B[k]; } jold = j; } if (!isspA) { rn = r*n; tmp = realdot(A,rn,Btmp,0,n); } else { tmp = 0; kstart = jcA[r]; kend = jcA[r+1]; for (k=kstart; k<kend; ++k) { idx = irA[k]; tmp += A[k]*Btmp[idx]; } } P[count] = tmp; irP[count] = r; jcP[j+1]++; count++; } for (k=0; k<p; k++) { jcP[k+1] += jcP[k]; } return; }
/* ************************************************************ PROCEDURE prpielqxq - Compute (I+c*c'/beta) * [X1, X2 * (I+c*c'/beta)]; only tril is computed, and only tril(X2) is used. INPUT beta - Householder scalar coefficient (real length m) c,cpi - length m-n1 elementary Householder vector m - length of columns in X. m >= n1. n1 - number of X1-columns; the order of Householder reflection will be m-n1. UPDATED x,xpi - 2*((m x m)-n1). On output, Xnew = (I+c*c'/beta)* [X1, X2*(I+c*c'/beta)] only tril is computed. We treat x as a (m-n1)*m matrix, but we need to add m (instead of m-n1) to get to the next column. WORK y - length 2*m working vector, for storing c'*[X1, X2]. ************************************************************ */ void prpielqxq(double *x, double *xpi, const double beta, const double *c, const double *cpi, const int n1, const int m, double *y) { int j,n2; double *xj,*xjpi, *y2, *x2, *x2pi, *ypi, *y2pi; double alpha; n2 = m - n1; /* order of x2 */ /* ------------------------------------------------------------ Partition y into y(n1), y2(n2); ypi(n1), ypi(n2). ------------------------------------------------------------ */ ypi = y + m; y2 = y + n1; y2pi = ypi + n1; /* ------------------------------------------------------------ Compute y1 = c'*X1. NOTE: y1 is 1 x n1 row-vector. ------------------------------------------------------------ */ for(j = 0, xj = x, xjpi = xpi; j < n1; j++, xj += m, xjpi += m){ y[j] = realdot(c, xj, n2) + realdot(cpi, xjpi, n2); ypi[j] = realdot(c, xjpi, n2) - realdot(cpi, xj, n2); } x2 = xj; x2pi = xjpi; /* ------------------------------------------------------------ Compute y2 = c'*tril(X2) ------------------------------------------------------------ */ for(j = 0; j < n2; j++, xj += m+1, xjpi += m+1){ /* y2(j) = c(j:n2)'*x(j:n2,j) */ y2[j] = realdot(c+j, xj, n2-j) + realdot(cpi+j, xjpi, n2-j); y2pi[j] = realdot(c+j, xjpi, n2-j) - realdot(cpi+j, xj, n2-j); } /* ------------------------------------------------------------ Let y2 += (tril(X2,-1)*c)' = c'*triu(X2,1), SO THAT y2 = c'*X2, with X2 symmetric. NOTE: y2 = 1 x n2 row-vector. ------------------------------------------------------------ */ for(j = 1, xj = x2+1, xjpi = x2pi+1; j < n2; j++, xj += m+1, xjpi += m+1){ /* y2(j+1:n2) += conj(x(j+1:n2,j) * c(j)) */ addscalarmul(y2+j, c[j-1], xj, n2-j); /* RE */ addscalarmul(y2+j, -cpi[j-1], xjpi, n2-j); addscalarmul(y2pi+j, -cpi[j-1], xj, n2-j); /* -IM, i.e. conj */ addscalarmul(y2pi+j, -c[j-1], xjpi, n2-j); } /* ------------------------------------------------------------ Below-diag block: let X1 += c*y1 / beta, where y1 = c'*X1. NOTE: y1 is 1 x n1 row-vector. This completes X1_new = (I+c*c'/beta) * X1_old. ------------------------------------------------------------ */ for(j = 0, xj = x, xjpi = xpi; j < n1; j++, xj += m, xjpi += m){ /* x(:,j) += c * y1(j) / beta */ addscalarmul(xj, y[j] / beta, c, n2); /* RE */ addscalarmul(xj, -ypi[j] / beta, cpi, n2); addscalarmul(xjpi, y[j] / beta, cpi, n2); /* IM */ addscalarmul(xjpi, ypi[j] / beta, c, n2); } /* ------------------------------------------------------------ Lower-Right block: X2 += c*((y2*c/beta) * c' + y2)/beta + y2'*c'/beta where y2 = c'*X2. This completes X2new = (I+c*c'/beta) * X2 * (I+c*c'/beta) NOTE: since X2 = X2', we have y2*c = c'*X2*c is real. ------------------------------------------------------------ */ /* alpha = y2 * c / beta, which is real.*/ alpha = (realdot(y2, c, n2) - realdot(y2pi, cpi, n2)) / beta; for(j = 0, xj = x2, xjpi = x2pi; j < n2; j++, xj += m+1, xjpi += m+1){ /* x2(j:n2,j) += c(j:n2) * (alpha*conj(c(j)) + y2(j))/beta */ addscalarmul(xj, (alpha * c[j] + y2[j])/beta, c+j, n2-j); addscalarmul(xj, (alpha * cpi[j] - y2pi[j])/beta, cpi+j, n2-j); addscalarmul(xjpi, (alpha * c[j] + y2[j])/beta, cpi+j, n2-j); addscalarmul(xjpi, (y2pi[j] - alpha * cpi[j])/beta, c+j, n2-j); /* x2(j:n2,j) += conj(c(j)*y2(j:n2)) / beta */ addscalarmul(xj, c[j] / beta, y2+j, n2-j); /* RE */ addscalarmul(xj, -cpi[j] / beta, y2pi+j, n2-j); addscalarmul(xjpi, -c[j] / beta, y2pi+j, n2-j); /* -IM, i.e. conj */ addscalarmul(xjpi, -cpi[j] / beta, y2+j, n2-j); } }
/* ************************************************************ PROCEDURE prpiqrfac - QR factorization for nxn matrix. INPUT n - order of matrix to be factored UPDATED u - Full nxn. On input, u is matrix to be factored. On output, triu(u) = uppertriangular factor; tril(u,-1) = undefined. OUTPUT beta - length n vector. kth Householder reflection is Qk = I-qk*qk' / beta[k], where qk = q(k:n-1,k). q,qpi - n x n matrix; each of the first n-1 columns is a Householder reflection. The n-th column gives Qn = diag(q(:,n)), which is NOT Hermitian (viz. diag complex rotations). We have u_IN = Q_1*Q_2*.. *Q_n * triu(u_OUT), u_OUT = Q_n' * Q_{n-1}* .. * Q_2*Q_1*u_IN. ************************************************************ */ void prpiqrfac(double *beta, double *q, double *qpi, double *u, double *upi, const mwIndex n) { mwIndex i,j,k, kcol, nmink, icol; double betak, qkui,qkuiim, absxkk, normxk, xkk,xkkim; double *ui,*uipi, *qk, *qkpi; for(k = 0, kcol = 0; k < n-1; k++, kcol += n+1){ qk = q+kcol; qkpi = qpi + kcol; /* ------------------------------------------------------------ kth Householder reflection: Set absxkk = |xkk| and normxk = norm(xk(k:n)), then ukk = -sign(xkk) * normxk, qkk = xkk - ukk, qk(k+1:n) = xk(k+1:n). Remark: sign(xkk) := xkk/|xkk|, a complex number. ------------------------------------------------------------ */ xkk = u[kcol]; xkkim = upi[kcol]; absxkk = SQR(xkk) + SQR(xkkim); normxk = absxkk + realssqr(u+kcol+1,n-k-1) + realssqr(upi+kcol+1,n-k-1); memcpy(qk+1, u+kcol+1, (n-k-1) * sizeof(double)); /* real */ memcpy(qkpi+1, upi+kcol+1, (n-k-1) * sizeof(double)); /* imag */ absxkk = sqrt(absxkk); normxk = sqrt(normxk); if(absxkk > 0.0){ u[kcol] = -(xkk / absxkk) * normxk; /* ukk = -sign(xkk) * normxk */ upi[kcol] = -(xkkim / absxkk) * normxk; } else u[kcol] = -normxk; /* sign(0) := 1 */ qk[0] = xkk - u[kcol]; /* qkk = xkk - ukk */ qkpi[0] = xkkim - upi[kcol]; /* ------------------------------------------------------------ betak = normxk * (normxk + absxkk) EXCEPTION: if xk is all-0 then set beta = 1 (to avoid division by 0). ------------------------------------------------------------ */ if(normxk == 0.0) betak = 1.0; else betak = normxk * (normxk + absxkk); beta[k] = betak; /* ------------------------------------------------------------ Reflect columns k+1:n-1, i.e. xi -= (qk'*xi / betak) * qk, where xi = x(k:n-1, i). ------------------------------------------------------------ */ nmink = n-k; for(i = k + 1, icol = kcol + n; i < n; i++, icol += n){ ui = u + icol; uipi = upi+icol; qkui = realdot(qk, ui, nmink) + realdot(qkpi, uipi, nmink); qkuiim = realdot(qk, uipi, nmink) - realdot(qkpi, ui, nmink); qkui /= betak; qkuiim /= betak; /* for all j, we have x(j,i) -= (qkui + i * qkuiim) * (qk[j] + i*qkpi[j]) */ for(j = 0; j < nmink; j++){ ui[j] -= qkui * qk[j] - qkuiim * qkpi[j]; uipi[j] -= qkui * qkpi[j] + qkuiim * qk[j]; } } } /* k = 0:n-2 */ /* ------------------------------------------------------------ The Q*R decomposition is now done, but IM diag(u) may be nonzero. Therefore, we multiply each row with conj(sign(u_ii)) = conj(u_ii)/|u_ii|. Let q(1:n,n) = sign(diag(u)) ------------------------------------------------------------ */ mxAssert(n>=0,""); if(n > 0){ kcol = n * (n-1); /* sign column q(:,n) */ qk = q + kcol; qkpi = qpi + kcol; /* Let icol point to (i,i) entry */ for(i = 0, icol = 0; i < n; i++, icol += n+1){ xkk = u[icol]; /* get u(i,i) */ xkkim = upi[icol]; absxkk = sqrt(SQR(xkk) + SQR(xkkim)); qk[i] = xkk / absxkk; /* q(i) = sign(u(i,i)) */ qkpi[i] = xkkim / absxkk; u[icol] = absxkk; /* new u(i,i) = |uOLD(i,i)| */ upi[icol] = 0.0; } /* ------------------------------------------------------------ Let Unew = Q_n'*Uold, i.e. u(i,k) = conj(qn(i)) * uOLD(i,k), i < k ------------------------------------------------------------ */ for(k = 1, kcol = n; k < n; k++, kcol += n) isconjhadamul(u+kcol, upi+kcol, qk,qkpi,k); } }
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { double *Q, *R, *Rout, *indextmp, *normrout, *nreout; mwIndex subs[2]; mwSize nsubs=2; mwIndex *index; mwSize n, k1, k, j, nre, method; double alpha, normr, normrold; /* CHECK FOR PROPER NUMBER OF ARGUMENTS */ if (nrhs < 3) { mexErrMsgTxt("mexreorth: requires at least 2 input arguments."); } if (nlhs > 4) { mexErrMsgTxt("mexreorth: requires at most 4 output argument."); } /* CHECK THE DIMENSIONS */ n = mxGetM(prhs[0]); k1 = mxGetN(prhs[0]); if (mxIsSparse(prhs[0])) { mexErrMsgTxt("mexreorth: sparse Q not allowed."); } Q = mxGetPr(prhs[0]); if (mxIsSparse(prhs[1])) { mexErrMsgTxt("mexreorth: sparse R not allowed."); } if (mxGetM(prhs[1])!=n || mxGetN(prhs[1])!=1) { mexErrMsgTxt("mexreorth: R is not compatible."); } R = mxGetPr(prhs[1]); if (nrhs < 3) { normr = sqrt(realdot(R,R,n)); } else { normr = *mxGetPr(prhs[2]); } if (nrhs < 4) { k = k1; index = mxCalloc(k,sizeof(mwSize)); for (j=0; j<k; j++) { index[j]=j; } } else { indextmp = mxGetPr(prhs[3]); if (indextmp == NULL) { mexErrMsgTxt("mexreorth: index is empty"); } k = MAX(mxGetM(prhs[3]),mxGetN(prhs[3])); index = mxCalloc(k,sizeof(mwSize)); for (j=0; j<k; j++) { index[j] = (mwSize) (indextmp[j]-1); } } if (nrhs< 5) { alpha = 0.5; } else { alpha = *mxGetPr(prhs[4]); } if (nrhs==5) { method = (mwSize)*mxGetPr(prhs[5]); } else { method = 0; } /***** create return argument *****/ plhs[0] = mxCreateDoubleMatrix(n,1,mxREAL); Rout = mxGetPr(plhs[0]); plhs[1] = mxCreateDoubleMatrix(1,1,mxREAL); normrout = mxGetPr(plhs[1]); plhs[2] = mxCreateDoubleMatrix(1,1,mxREAL); nreout = mxGetPr(plhs[2]); memcpy(mxGetPr(plhs[0]),mxGetPr(prhs[1]),(n)*sizeof(double)); /***** main *****/ normrold = 0; nre = 0; while ((normr<alpha*normrold) || (nre==0)) { mgs(n,Q,Rout,index,k); normrold = normr; normr = sqrt(realdot(Rout,Rout,n)); nre = nre+1; if (nre > 4) { /** printf("R is in span(Q)"); **/ for (j=0; j<n; j++) { Rout[j] = 0; } normr = 0; break; } } normrout[0] = normr; nreout[0] = nre; return; }