void phi(float *A, float *E, int n) { float *I; int i, *piv; culaStatus status; // we want AX = e^A - I // set up identity I = (float*)malloc(n*n*sizeof(float)); memset(I, 0, n*n*sizeof(float)); for (i = 0; i < n; i++) { I[i*n + i] = 1; } padeExp(A, E, n); status = culaSgemm('n', 'n', n, n, n, -1, I, n, I, n, 1, E, n); checkCulaStatus(status); // now E = e^A - I // Find X s.t. AX = E piv = (int*)malloc(n*sizeof(int)); memset(piv, 0, n*sizeof(int)); status = culaSgesv(n, n, A, n, piv, E, n); checkCulaStatus(status); // now E = X // cleanup free(piv); free(I); }
// a is the matrix you have, e is the one you'll fill in void padeExp(float *A, float *E, int n) { culaStatus status; float s; float *Q; float *I; float *A2; float *P; int i, j, f, *piv, scaled = 0; float c[POL + 1]; c[0] = 1; for (i = 0; i < POL; i++) { c[i + 1] = c[i]*((double)(POL - i)/((i + 1)*(2*POL - i))); } // scale here s = infinity_norm(A, n); if (s > 0.5) { scaled = 1; f = (int) (log(s)/log(2)); s = MAX(0,f + 2); status = culaSgemm('n', 'n', n, n, n, 0, A, n, A, n, pow(2, -s), A, n); checkCulaStatus(status); } // set up identity I = (float*)malloc(n*n*sizeof(float)); memset(I, 0, n*n*sizeof(float)); for (i = 0; i < n; i++) { I[i*n + i] = 1; } Q = (float*)malloc(n*n*sizeof(float)); P = (float*)malloc(n*n*sizeof(float)); memcpy(Q, I, n*n*sizeof(float)); memcpy(P, I, n*n*sizeof(float)); // allocate space for A2; no need to initialize memory A2 = (float*)malloc(n*n*sizeof(float)); status = culaSgemm('n', 'n', n, n, n, 1, A, n, A, n, 0, A2, n); checkCulaStatus(status); status = culaSgemm('n', 'n', n, n, n, 0, Q, n, Q, n, c[POL], Q, n); checkCulaStatus(status); status = culaSgemm('n', 'n', n, n, n, 0, P, n, P, n, c[POL - 1], P, n); checkCulaStatus(status); int odd = 1; for (i = POL - 2; i >= 0; i--) { if (odd == 1) { // Q = Q*A2 + c[k]*I; status = culaSgemm('n', 'n', n, n, n, 1, Q, n, A2, n, 0, Q, n); checkCulaStatus(status); status = culaSgemm('n', 'n', n, n, n, c[i], I, n, I, n, 1, Q, n); checkCulaStatus(status); } else { // P = P*A2 + c[k]*I status = culaSgemm('n', 'n', n, n, n, 1, P, n, A2, n, 0, P, n); checkCulaStatus(status); status = culaSgemm('n', 'n', n, n, n, c[i], I, n, I, n, 1, P, n); checkCulaStatus(status); } odd = 1-odd; } if (odd == 1) { // Q = Q*A status = culaSgemm('n', 'n', n, n, n, 1, Q, n, A, n, 0, Q, n); checkCulaStatus(status); } else { // P = P*A status = culaSgemm('n', 'n', n, n, n, 1, P, n, A, n, 0, P, n); checkCulaStatus(status); } // Q = Q - P status = culaSgemm('n', 'n', n, n, n, -1, P, n, I, n, 1, Q, n); checkCulaStatus(status); // Find X s.t. QX = P piv = (int*)malloc(n*sizeof(int)); memset(piv, 0, n*sizeof(int)); status = culaSgesv(n, n, Q, n, piv, P, n); checkCulaStatus(status); // now P = X memcpy(E, I, n*n*sizeof(float)); if (odd == 0) status = culaSgemm('n', 'n', n, n, n, 2, I, n, P, n, 1, E, n); else status = culaSgemm('n', 'n', n, n, n, -2, I, n, P, n, -1, E, n); checkCulaStatus(status); for(i = 0; i < s; i++) { status = culaSgemm('n', 'n', n, n, n, 1, E, n, E, n, 0, E, n); checkCulaStatus(status); } if (scaled == 1) { status = culaSgemm('n', 'n', n, n, n, 0, A, n, A, n, 1./pow(2, -s), A, n); checkCulaStatus(status); } free(I); free(A2); free(P); free(Q); free(piv); return; }
void culaFloatExample() { #ifdef NDEBUG int N = 8192; #else int N = 1024; #endif int NRHS = 1; int i; culaStatus status; culaFloat* A = NULL; culaFloat* B = NULL; culaFloat* X = NULL; culaInt* IPIV = NULL; culaFloat one = 1.0f; culaFloat thresh = 1e-6f; culaFloat diff; printf("-------------------\n"); printf(" SGESV\n"); printf("-------------------\n"); printf("Allocating Matrices\n"); A = (culaFloat*)malloc(N*N*sizeof(culaFloat)); B = (culaFloat*)malloc(N*sizeof(culaFloat)); X = (culaFloat*)malloc(N*sizeof(culaFloat)); IPIV = (culaInt*)malloc(N*sizeof(culaInt)); if(!A || !B || !IPIV) exit(EXIT_FAILURE); printf("Initializing CULA\n"); status = culaInitialize(); checkStatus(status); // Set A to the identity matrix memset(A, 0, N*N*sizeof(culaFloat)); for(i = 0; i < N; ++i) A[i*N+i] = one; // Set B to a random matrix (see note at top) for(i = 0; i < N; ++i) B[i] = (culaFloat)rand(); memcpy(X, B, N*sizeof(culaFloat)); memset(IPIV, 0, N*sizeof(culaInt)); printf("Calling culaSgesv\n"); status = culaSgesv(N, NRHS, A, N, IPIV, X, N); checkStatus(status); printf("Verifying Result\n"); for(i = 0; i < N; ++i) { diff = X[i] - B[i]; if(diff < 0.0f) diff = -diff; if(diff > thresh) printf("Result check failed: i=%d X[i]=%f B[i]=%f", i, X[i], B[i]); } printf("Shutting down CULA\n\n"); culaShutdown(); free(A); free(B); free(IPIV); }