void expm(int row, double *A) { int i; int m_vals[] = {3, 5, 7, 9, 13}; double theta[] = {0.01495585217958292, 0.2539398330063230, 0.9504178996162932, 2.097847961257068, 5.371920351148152}; int lentheta = 5; double normA = onenorm(row, row, A); if (normA <= theta[4]) { for (i = 0; i < lentheta; i++) { if (normA <= theta[i]) { padeapprox(m_vals[i], row, A); break; } } } else { int s; double t = frexp(normA / (theta[4]), &s); s = s - (t == 0.5); t = pow(2, -s); int row2 = row * row; /* int i1 = 1;*/ // dscal_(&row2, &t, A, &i1); dscal_3l(row2, t, A); padeapprox(m_vals[4], row, A); double *temp = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) temp[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 0; for (i = 0; i < s; i++) { // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, // &row, &beta, temp, &row); dgemm_nn_3l(row, row, row, A, row, A, row, temp, row); dmcopy(row, row, temp, row, A, row); } free(temp); } }
/************************************************ Mass-spring system: nx/2 masses connected each other with springs (in a row), and the first and the last one to walls. nu (<=nx) controls act on the first nu masses. The system is sampled with sampling time Ts. ************************************************/ void mass_spring_system(double Ts, int nx, int nu, double *A, double *B, double *b, double *x0) { int nx2 = nx * nx; int info = 0; int pp = nx / 2; // number of masses /************************************************ * build the continuous time system ************************************************/ double *T; d_zeros(&T, pp, pp); int ii; for (ii = 0; ii < pp; ii++) T[ii * (pp + 1)] = -2; for (ii = 0; ii < pp - 1; ii++) T[ii * (pp + 1) + 1] = 1; for (ii = 1; ii < pp; ii++) T[ii * (pp + 1) - 1] = 1; double *Z; d_zeros(&Z, pp, pp); double *I; d_zeros(&I, pp, pp); for (ii = 0; ii < pp; ii++) I[ii * (pp + 1)] = 1.0; // = eye(pp); double *Ac; d_zeros(&Ac, nx, nx); dmcopy(pp, pp, Z, pp, Ac, nx); dmcopy(pp, pp, T, pp, Ac + pp, nx); dmcopy(pp, pp, I, pp, Ac + pp * nx, nx); dmcopy(pp, pp, Z, pp, Ac + pp * (nx + 1), nx); free(T); free(Z); free(I); d_zeros(&I, nu, nu); for (ii = 0; ii < nu; ii++) I[ii * (nu + 1)] = 1.0; // I = eye(nu); double *Bc; d_zeros(&Bc, nx, nu); dmcopy(nu, nu, I, nu, Bc + pp, nx); free(I); /************************************************ * compute the discrete time system ************************************************/ double *bb; d_zeros(&bb, nx, 1); dmcopy(nx, 1, bb, nx, b, nx); dmcopy(nx, nx, Ac, nx, A, nx); dscal_3l(nx2, Ts, A); expm(nx, A); d_zeros(&T, nx, nx); d_zeros(&I, nx, nx); for (ii = 0; ii < nx; ii++) I[ii * (nx + 1)] = 1.0; // I = eye(nx); dmcopy(nx, nx, A, nx, T, nx); daxpy_3l(nx2, -1.0, I, T); dgemm_nn_3l(nx, nu, nx, T, nx, Bc, nx, B, nx); int *ipiv = (int *)malloc(nx * sizeof(int)); dgesv_3l(nx, nu, Ac, nx, ipiv, B, nx, &info); free(ipiv); free(Ac); free(Bc); free(bb); /************************************************ * initial state ************************************************/ if (nx == 4) { x0[0] = 5; x0[1] = 10; x0[2] = 15; x0[3] = 20; } else { int jj; for (jj = 0; jj < nx; jj++) x0[jj] = 1; } }
/* computes the Pade approximation of degree m of the matrix A */ void padeapprox(int m, int row, double *A) { int row2 = row * row; /* int i1 = 1;*/ /* double d0 = 0;*/ /* double d1 = 1;*/ /* double dm1 = -1;*/ double *U = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) U[ii] = 0.0; double *V = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) V[ii] = 0.0; if (m == 3) { double c[] = {120, 60, 12, 1}; double *A0 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A0[ii] = 0.0; for (int ii = 0; ii < row; ii++) A0[ii * (row + 1)] = 1.0; double *A2 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A2[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 0; // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, // &beta, A2, &row); dgemm_nn_3l(row, row, row, A, row, A, row, A2, row); double *temp = malloc(row * row * sizeof(double)); // dscal_(&row2, &d0, temp, &i1); dscal_3l(row2, 0, temp); // daxpy_(&row2, &c[3], A2, &i1, temp, &i1); daxpy_3l(row2, c[3], A2, temp); // daxpy_(&row2, &c[1], A0, &i1, temp, &i1); daxpy_3l(row2, c[1], A0, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, // &row, &beta, U, &row); dgemm_nn_3l(row, row, row, A, row, temp, row, U, row); // dscal_(&row2, &d0, V, &i1); dscal_3l(row2, 0, V); // daxpy_(&row2, &c[2], A2, &i1, V, &i1); daxpy_3l(row2, c[2], A2, V); // daxpy_(&row2, &c[0], A0, &i1, V, &i1); daxpy_3l(row2, c[0], A0, V); free(A0); free(A2); free(temp); } else if (m == 5) { double c[] = {30240, 15120, 3360, 420, 30, 1}; double *A0 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A0[ii] = 0.0; for (int ii = 0; ii < row; ii++) A0[ii * (row + 1)] = 1.0; double *A2 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A2[ii] = 0.0; double *A4 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A4[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 0; // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, // &beta, A2, &row); dgemm_nn_3l(row, row, row, A, row, A, row, A2, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, // &beta, A4, &row); dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row); dmcopy(row, row, A4, row, V, row); double *temp = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) temp[ii] = 0.0; dmcopy(row, row, A4, row, temp, row); // daxpy_(&row2, &c[3], A2, &i1, temp, &i1); daxpy_3l(row2, c[3], A2, temp); // daxpy_(&row2, &c[1], A0, &i1, temp, &i1); daxpy_3l(row2, c[1], A0, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, // &row, &beta, U, &row); dgemm_nn_3l(row, row, row, A, row, temp, row, U, row); // dscal_(&row2, &c[4], V, &i1); dscal_3l(row2, c[4], V); // daxpy_(&row2, &c[2], A2, &i1, V, &i1); daxpy_3l(row2, c[2], A2, V); // daxpy_(&row2, &c[0], A0, &i1, V, &i1); daxpy_3l(row2, c[0], A0, V); free(A0); free(A2); free(A4); free(temp); } else if (m == 7) { double c[] = {17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1}; double *A0 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A0[ii] = 0.0; for (int ii = 0; ii < row; ii++) A0[ii * (row + 1)] = 1.0; double *A2 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A2[ii] = 0.0; double *A4 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A4[ii] = 0.0; double *A6 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A6[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 1; // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, // &beta, A2, &row); dgemm_nn_3l(row, row, row, A, row, A, row, A2, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, // &beta, A4, &row); dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, // &beta, A6, &row); dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row); double *temp = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) temp[ii] = 0.0; // dscal_(&row2, &d0, temp, &i1); dscal_3l(row2, 0, temp); // daxpy_(&row2, &c[3], A2, &i1, temp, &i1); daxpy_3l(row2, c[3], A2, temp); // daxpy_(&row2, &c[1], A0, &i1, temp, &i1); daxpy_3l(row2, c[1], A0, temp); // daxpy_(&row2, &c[5], A4, &i1, temp, &i1); daxpy_3l(row2, c[5], A4, temp); // daxpy_(&row2, &c[7], A6, &i1, temp, &i1); daxpy_3l(row2, c[7], A6, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, // &row, &beta, U, &row); dgemm_nn_3l(row, row, row, A, row, temp, row, U, row); // dscal_(&row2, &d0, V, &i1); dscal_3l(row2, 0, V); // daxpy_(&row2, &c[2], A2, &i1, V, &i1); daxpy_3l(row2, c[2], A2, V); // daxpy_(&row2, &c[0], A0, &i1, V, &i1); daxpy_3l(row2, c[0], A0, V); // daxpy_(&row2, &c[4], A4, &i1, V, &i1); daxpy_3l(row2, c[4], A4, V); // daxpy_(&row2, &c[6], A6, &i1, V, &i1); daxpy_3l(row2, c[6], A6, V); free(A0); free(A2); free(A4); free(A6); free(temp); } else if (m == 9) { double c[] = {17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1}; double *A0 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A0[ii] = 0.0; for (int ii = 0; ii < row; ii++) A0[ii * (row + 1)] = 1.0; double *A2 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A2[ii] = 0.0; double *A4 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A4[ii] = 0.0; double *A6 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A6[ii] = 0.0; double *A8 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A8[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 0; // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, // &beta, A2, &row); dgemm_nn_3l(row, row, row, A, row, A, row, A2, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, // &beta, A4, &row); dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, // &beta, A6, &row); dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, A2, &row, // &beta, A8, &row); dgemm_nn_3l(row, row, row, A6, row, A2, row, A8, row); dmcopy(row, row, A8, row, V, row); double *temp = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) temp[ii] = 0.0; dmcopy(row, row, A8, row, temp, row); // daxpy_(&row2, &c[3], A2, &i1, temp, &i1); daxpy_3l(row2, c[3], A2, temp); // daxpy_(&row2, &c[1], A0, &i1, temp, &i1); daxpy_3l(row2, c[1], A0, temp); // daxpy_(&row2, &c[5], A4, &i1, temp, &i1); daxpy_3l(row2, c[5], A4, temp); // daxpy_(&row2, &c[7], A6, &i1, temp, &i1); daxpy_3l(row2, c[7], A6, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, // &row, &beta, U, &row); dgemm_nn_3l(row, row, row, A, row, temp, row, U, row); // dscal_(&row2, &c[8], V, &i1); dscal_3l(row2, c[8], V); // daxpy_(&row2, &c[2], A2, &i1, V, &i1); daxpy_3l(row2, c[2], A2, V); // daxpy_(&row2, &c[0], A0, &i1, V, &i1); daxpy_3l(row2, c[0], A0, V); // daxpy_(&row2, &c[4], A4, &i1, V, &i1); daxpy_3l(row2, c[4], A4, V); // daxpy_(&row2, &c[6], A6, &i1, V, &i1); daxpy_3l(row2, c[6], A6, V); free(A0); free(A2); free(A4); free(A6); free(A8); free(temp); } else if (m == 13) { // tested double c[] = {64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1}; double *A0 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A0[ii] = 0.0; for (int ii = 0; ii < row; ii++) A0[ii * (row + 1)] = 1.0; double *A2 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A2[ii] = 0.0; double *A4 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A4[ii] = 0.0; double *A6 = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) A6[ii] = 0.0; // char ta = 'n'; double alpha = 1; double beta = 0; // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, A, &row, // &beta, A2, &row); dgemm_nn_3l(row, row, row, A, row, A, row, A2, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A2, &row, A2, &row, // &beta, A4, &row); dgemm_nn_3l(row, row, row, A2, row, A2, row, A4, row); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A4, &row, A2, &row, // &beta, A6, &row); dgemm_nn_3l(row, row, row, A4, row, A2, row, A6, row); dmcopy(row, row, A2, row, U, row); double *temp = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) temp[ii] = 0.0; // dscal_(&row2, &c[9], U, &i1); dscal_3l(row2, c[9], U); // daxpy_(&row2, &c[11], A4, &i1, U, &i1); daxpy_3l(row2, c[11], A4, U); // daxpy_(&row2, &c[13], A6, &i1, U, &i1); daxpy_3l(row2, c[13], A6, U); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, U, &row, // &beta, temp, &row); dgemm_nn_3l(row, row, row, A6, row, U, row, temp, row); // daxpy_(&row2, &c[7], A6, &i1, temp, &i1); daxpy_3l(row2, c[7], A6, temp); // daxpy_(&row2, &c[5], A4, &i1, temp, &i1); daxpy_3l(row2, c[5], A4, temp); // daxpy_(&row2, &c[3], A2, &i1, temp, &i1); daxpy_3l(row2, c[3], A2, temp); // daxpy_(&row2, &c[1], A0, &i1, temp, &i1); daxpy_3l(row2, c[1], A0, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A, &row, temp, // &row, &beta, U, &row); dgemm_nn_3l(row, row, row, A, row, temp, row, U, row); dmcopy(row, row, A2, row, temp, row); // dscal_(&row2, &c[8], V, &i1); dscal_3l(row2, c[8], V); // daxpy_(&row2, &c[12], A6, &i1, temp, &i1); daxpy_3l(row2, c[12], A6, temp); // daxpy_(&row2, &c[10], A4, &i1, temp, &i1); daxpy_3l(row2, c[10], A4, temp); // dgemm_(&ta, &ta, &row, &row, &row, &alpha, A6, &row, temp, // &row, &beta, V, &row); dgemm_nn_3l(row, row, row, A6, row, temp, row, V, row); // daxpy_(&row2, &c[6], A6, &i1, V, &i1); daxpy_3l(row2, c[6], A6, V); // daxpy_(&row2, &c[4], A4, &i1, V, &i1); daxpy_3l(row2, c[4], A4, V); // daxpy_(&row2, &c[2], A2, &i1, V, &i1); daxpy_3l(row2, c[2], A2, V); // daxpy_(&row2, &c[0], A0, &i1, V, &i1); daxpy_3l(row2, c[0], A0, V); free(A0); free(A2); free(A4); free(A6); free(temp); } else { printf("%s\n", "Wrong Pade approximatin degree"); exit(1); } double *D = malloc(row * row * sizeof(double)); for (int ii = 0; ii < row * row; ii++) D[ii] = 0.0; // dcopy_(&row2, V, &i1, A, &i1); dmcopy(row, row, V, row, A, row); // daxpy_(&row2, &d1, U, &i1, A, &i1); daxpy_3l(row2, 1.0, U, A); // dcopy_(&row2, V, &i1, D, &i1); dmcopy(row, row, V, row, D, row); // daxpy_(&row2, &dm1, U, &i1, D, &i1); daxpy_3l(row2, -1.0, U, D); int *ipiv = (int *) calloc(row, sizeof(int)); int info = 0; // dgesv_(&row, &row, D, &row, ipiv, A, &row, &info); dgesv_3l(row, row, D, row, ipiv, A, row, &info); free(ipiv); free(D); free(U); free(V); }