/** * Matrix exponential - based on the _corrected_ code for Octave's expm function. * * @param x real square matrix to exponentiate * * @return matrix exponential of x */ SEXP dgeMatrix_exp(SEXP x) { const double one = 1.0, zero = 0.0; const int i1 = 1; int *Dims = INTEGER(GET_SLOT(x, Matrix_DimSym)); const int n = Dims[1], nsqr = n * n, np1 = n + 1; SEXP val = PROTECT(duplicate(x)); int i, ilo, ilos, ihi, ihis, j, sqpow; int *pivot = Calloc(n, int); double *dpp = Calloc(nsqr, double), /* denominator power Pade' */ *npp = Calloc(nsqr, double), /* numerator power Pade' */ *perm = Calloc(n, double), *scale = Calloc(n, double), *v = REAL(GET_SLOT(val, Matrix_xSym)), *work = Calloc(nsqr, double), inf_norm, m1_j/*= (-1)^j */, trshift; R_CheckStack(); if (n < 1 || Dims[0] != n) error(_("Matrix exponential requires square, non-null matrix")); if(n == 1) { v[0] = exp(v[0]); UNPROTECT(1); return val; } /* Preconditioning 1. Shift diagonal by average diagonal if positive. */ trshift = 0; /* determine average diagonal element */ for (i = 0; i < n; i++) trshift += v[i * np1]; trshift /= n; if (trshift > 0.) { /* shift diagonal by -trshift */ for (i = 0; i < n; i++) v[i * np1] -= trshift; } /* Preconditioning 2. Balancing with dgebal. */ F77_CALL(dgebal)("P", &n, v, &n, &ilo, &ihi, perm, &j); if (j) error(_("dgeMatrix_exp: LAPACK routine dgebal returned %d"), j); F77_CALL(dgebal)("S", &n, v, &n, &ilos, &ihis, scale, &j); if (j) error(_("dgeMatrix_exp: LAPACK routine dgebal returned %d"), j); /* Preconditioning 3. Scaling according to infinity norm */ inf_norm = F77_CALL(dlange)("I", &n, &n, v, &n, work); sqpow = (inf_norm > 0) ? (int) (1 + log(inf_norm)/log(2.)) : 0; if (sqpow < 0) sqpow = 0; if (sqpow > 0) { double scale_factor = 1.0; for (i = 0; i < sqpow; i++) scale_factor *= 2.; for (i = 0; i < nsqr; i++) v[i] /= scale_factor; } /* Pade' approximation. Powers v^8, v^7, ..., v^1 */ AZERO(npp, nsqr); AZERO(dpp, nsqr); m1_j = -1; for (j = 7; j >=0; j--) { double mult = padec[j]; /* npp = m * npp + padec[j] *m */ F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, npp, &n, &zero, work, &n); for (i = 0; i < nsqr; i++) npp[i] = work[i] + mult * v[i]; /* dpp = m * dpp + (m1_j * padec[j]) * m */ mult *= m1_j; F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, dpp, &n, &zero, work, &n); for (i = 0; i < nsqr; i++) dpp[i] = work[i] + mult * v[i]; m1_j *= -1; } /* Zero power */ for (i = 0; i < nsqr; i++) dpp[i] *= -1.; for (j = 0; j < n; j++) { npp[j * np1] += 1.; dpp[j * np1] += 1.; } /* Pade' approximation is solve(dpp, npp) */ F77_CALL(dgetrf)(&n, &n, dpp, &n, pivot, &j); if (j) error(_("dgeMatrix_exp: dgetrf returned error code %d"), j); F77_CALL(dgetrs)("N", &n, &n, dpp, &n, pivot, npp, &n, &j); if (j) error(_("dgeMatrix_exp: dgetrs returned error code %d"), j); Memcpy(v, npp, nsqr); /* Now undo all of the preconditioning */ /* Preconditioning 3: square the result for every power of 2 */ while (sqpow--) { F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, v, &n, v, &n, &zero, work, &n); Memcpy(v, work, nsqr); } /* Preconditioning 2: apply inverse scaling */ for (j = 0; j < n; j++) for (i = 0; i < n; i++) v[i + j * n] *= scale[i]/scale[j]; /* 2 b) Inverse permutation (if not the identity permutation) */ if (ilo != 1 || ihi != n) { /* Martin Maechler's code */ #define SWAP_ROW(I,J) F77_CALL(dswap)(&n, &v[(I)], &n, &v[(J)], &n) #define SWAP_COL(I,J) F77_CALL(dswap)(&n, &v[(I)*n], &i1, &v[(J)*n], &i1) #define RE_PERMUTE(I) \ int p_I = (int) (perm[I]) - 1; \ SWAP_COL(I, p_I); \ SWAP_ROW(I, p_I) /* reversion of "leading permutations" : in reverse order */ for (i = (ilo - 1) - 1; i >= 0; i--) { RE_PERMUTE(i); } /* reversion of "trailing permutations" : applied in forward order */ for (i = (ihi + 1) - 1; i < n; i++) { RE_PERMUTE(i); } } /* Preconditioning 1: Trace normalization */ if (trshift > 0.) { double mult = exp(trshift); for (i = 0; i < nsqr; i++) v[i] *= mult; } /* Clean up */ Free(work); Free(scale); Free(perm); Free(npp); Free(dpp); Free(pivot); UNPROTECT(1); return val; }
/* Matrix exponential exp(x), where x is an (n x n) matrix. Result z * is an (n x n) matrix. Mostly lifted from the core of fonction * expm() of package Matrix, which is itself based on the function of * the same name in Octave. */ void expm(double *x, int n, double *z, precond_type precond_kind) { if (n == 1) z[0] = exp(x[0]); /* scalar exponential */ else { /* Constants */ const double one = 1.0, zero = 0.0; const int i1 = 1, nsqr = n * n, np1 = n + 1; /* Variables */ int i, j, is_uppertri = TRUE;; int ilo, ihi, iloscal, ihiscal, info, sqrpowscal; double infnorm, trshift, m1pj = -1; /* Arrays */ int *pivot = (int *) R_alloc(n, sizeof(int)); /* pivot vector */ double *scale; /* scale array */ double *perm = (double *) R_alloc(n, sizeof(double));/* permutation/sc array */ double *work = (double *) R_alloc(nsqr, sizeof(double)); /* workspace array */ double *npp = (double *) R_alloc(nsqr, sizeof(double)); /* num. power Pade */ double *dpp = (double *) R_alloc(nsqr, sizeof(double)); /* denom. power Pade */ Memcpy(z, x, nsqr); /* Check if matrix x is upper triangular; stop checking as * soon as a non-zero value is found below the diagonal. */ for (i = 0; i < n - 1 && is_uppertri; i++) for (j = i + 1; j < n; j++) if (!(is_uppertri = x[i * n + j] == 0.0)) break; /* Step 1 of preconditioning: shift diagonal by average diagonal. */ trshift = 0.0; for (i = 0; i < n; i++) trshift += x[i * np1]; trshift /= n; /* average diagonal element */ if (trshift > 0.0) for (i = 0; i < n; i++) z[i * np1] -= trshift; /* Step 2 of preconditioning: balancing with dgebal. */ if(precond_kind == Ward_2 || precond_kind == Ward_buggy_octave) { if (is_uppertri) { /* no need to permute if x is upper triangular */ ilo = 1; ihi = n; } else { F77_CALL(dgebal)("P", &n, z, &n, &ilo, &ihi, perm, &info); if (info) error(_("LAPACK routine dgebal returned info code %d when permuting"), info); } scale = (double *) R_alloc(n, sizeof(double)); F77_CALL(dgebal)("S", &n, z, &n, &iloscal, &ihiscal, scale, &info); if (info) error(_("LAPACK routine dgebal returned info code %d when scaling"), info); } else if(precond_kind == Ward_1) { F77_CALL(dgebal)("B", &n, z, &n, &ilo, &ihi, perm, &info); if (info) error(_("LAPACK' dgebal(\"B\",.) returned info code %d"), info); } else { error(_("invalid 'precond_kind: %d"), precond_kind); } /* Step 3 of preconditioning: Scaling according to infinity * norm (a priori always needed). */ infnorm = F77_CALL(dlange)("I", &n, &n, z, &n, work); sqrpowscal = (infnorm > 0) ? imax2((int) 1 + log(infnorm)/M_LN2, 0) : 0; if (sqrpowscal > 0) { double scalefactor = R_pow_di(2, sqrpowscal); for (i = 0; i < nsqr; i++) z[i] /= scalefactor; } /* Pade approximation (p = q = 8): compute x^8, x^7, x^6, * ..., x^1 */ for (i = 0; i < nsqr; i++) { npp[i] = 0.0; dpp[i] = 0.0; } for (j = 7; j >= 0; j--) { /* npp = z * npp + padec88[j] * z */ F77_CALL(dgemm) ("N", "N", &n, &n, &n, &one, z, &n, npp, &n, &zero, work, &n); /* npp <- work + padec88[j] * z */ for (i = 0; i < nsqr; i++) npp[i] = work[i] + padec88[j] * z[i]; /* dpp = z * dpp + (-1)^j * padec88[j] * z */ F77_CALL(dgemm) ("N", "N", &n, &n, &n, &one, z, &n, dpp, &n, &zero, work, &n); for (i = 0; i < nsqr; i++) dpp[i] = work[i] + m1pj * padec88[j] * z[i]; m1pj *= -1; /* (-1)^j */ } /* power 0 */ for (i = 0; i < nsqr; i++) dpp[i] *= -1.0; for (j = 0; j < n; j++) { npp[j * np1] += 1.0; dpp[j * np1] += 1.0; } /* Pade approximation is (dpp)^-1 * npp. */ F77_CALL(dgetrf) (&n, &n, dpp, &n, pivot, &info); if (info) error(_("LAPACK routine dgetrf returned info code %d"), info); F77_CALL(dgetrs) ("N", &n, &n, dpp, &n, pivot, npp, &n, &info); if (info) error(_("LAPACK routine dgetrs returned info code %d"), info); Memcpy(z, npp, nsqr); /* Now undo all of the preconditioning */ /* Preconditioning 3: square the result for every power of 2 */ while (sqrpowscal--) { F77_CALL(dgemm)("N", "N", &n, &n, &n, &one, z, &n, z, &n, &zero, work, &n); Memcpy(z, work, nsqr); } /* Preconditioning 2: Inversion of 'dgebal()' : * ------------------ Note that dgebak() seems *not* applicable */ /* Step 2 a) apply inverse scaling */ if(precond_kind == Ward_2 || precond_kind == Ward_buggy_octave) { for (j = 0; j < n; j++) for (i = 0; i < n; i++) z[i + j * n] *= scale[i]/scale[j]; } else if(precond_kind == Ward_1) { /* here, perm[ilo:ihi] contains scale[] */ for (j = 0; j < n; j++) { double sj = ((ilo-1 <= j && j < ihi)? perm[j] : 1.); for (i = 0; i < ilo-1; i++) z[i + j * n] /= sj; for (i = ilo-1; i < ihi; i++) z[i + j * n] *= perm[i]/sj; for (i = ihi+1; i < n; i++) z[i + j * n] /= sj; } } /* 2 b) Inverse permutation (if not the identity permutation) */ if (ilo != 1 || ihi != n) { if(precond_kind == Ward_buggy_octave) { /* inverse permutation vector */ int *invP = (int *) R_alloc(n, sizeof(int)); /* balancing permutation vector */ for (i = 0; i < n; i++) invP[i] = i; /* identity permutation */ /* leading permutations applied in forward order */ for (i = 0; i < (ilo - 1); i++) { int p_i = (int) (perm[i]) - 1; int tmp = invP[i]; invP[i] = invP[p_i]; invP[p_i] = tmp; } /* trailing permutations applied in reverse order */ for (i = n - 1; i >= ihi; i--) { int p_i = (int) (perm[i]) - 1; int tmp = invP[i]; invP[i] = invP[p_i]; invP[p_i] = tmp; } /* construct inverse balancing permutation vector */ Memcpy(pivot, invP, n); for (i = 0; i < n; i++) invP[pivot[i]] = i; /* apply inverse permutation */ Memcpy(work, z, nsqr); for (j = 0; j < n; j++) for (i = 0; i < n; i++) z[i + j * n] = work[invP[i] + invP[j] * n]; } else if(precond_kind == Ward_2 || precond_kind == Ward_1) { /* ---- new code by Martin Maechler ----- */ #define SWAP_ROW(I,J) F77_CALL(dswap)(&n, &z[(I)], &n, &z[(J)], &n) #define SWAP_COL(I,J) F77_CALL(dswap)(&n, &z[(I)*n], &i1, &z[(J)*n], &i1) #define RE_PERMUTE(I) \ int p_I = (int) (perm[I]) - 1; \ SWAP_COL(I, p_I); \ SWAP_ROW(I, p_I) /* reversion of "leading permutations" : in reverse order */ for (i = (ilo - 1) - 1; i >= 0; i--) { RE_PERMUTE(i); } /* reversion of "trailing permutations" : applied in forward order */ for (i = (ihi + 1) - 1; i < n; i++) { RE_PERMUTE(i); } } /* else if(precond_kind == Ward_1) { */ /* } */ } /* Preconditioning 1: Trace normalization */ if (trshift > 0) { double mult = exp(trshift); for (i = 0; i < nsqr; i++) z[i] *= mult; } } }