Esempio n. 1
0
void phi(float *A, float *E, int n) {

    float *I;
    int i, *piv;
    culaStatus status;
 
    // we want AX = e^A - I

    // set up identity
    I = (float*)malloc(n*n*sizeof(float));
    memset(I, 0, n*n*sizeof(float));
    for (i = 0; i < n; i++) {
        I[i*n + i] = 1;
    }

    padeExp(A, E, n);

    status = culaSgemm('n', 'n', n, n, n, -1, I, n, I, n, 1, E, n);
    checkCulaStatus(status);

    // now E = e^A - I

  	// Find X s.t. AX = E
  	piv = (int*)malloc(n*sizeof(int));
  	memset(piv, 0, n*sizeof(int));


  	status = culaSgesv(n, n, A, n, piv, E, n);
  	checkCulaStatus(status);
  	// now E = X

    // cleanup
    free(piv);
    free(I);

}
Esempio n. 2
0
// a is the matrix you have, e is the one you'll fill in
void padeExp(float *A, float *E, int n) {
    culaStatus status;
    float s;
    float *Q;
    float *I;	
    float *A2;
    float *P;
    int i, j, f, *piv, scaled = 0;
    float c[POL + 1];

    c[0] = 1;
    for (i = 0; i < POL; i++) {
      	c[i + 1] = c[i]*((double)(POL - i)/((i + 1)*(2*POL - i)));
    } 
    
    // scale here
    s = infinity_norm(A, n);
    if (s > 0.5) {
        scaled = 1;
        f = (int) (log(s)/log(2));  
        s = MAX(0,f + 2);
        status = culaSgemm('n', 'n', n, n, n, 0, A, n, A, n, pow(2, -s), A, n);
        checkCulaStatus(status);
    }

    // set up identity
    I = (float*)malloc(n*n*sizeof(float));
    memset(I, 0, n*n*sizeof(float));
    for (i = 0; i < n; i++) {
        I[i*n + i] = 1;
    }
    
    Q = (float*)malloc(n*n*sizeof(float));
    P = (float*)malloc(n*n*sizeof(float));
    memcpy(Q, I, n*n*sizeof(float));
    memcpy(P, I, n*n*sizeof(float));
    
    // allocate space for A2; no need to initialize memory
    A2 = (float*)malloc(n*n*sizeof(float));
 
    status = culaSgemm('n', 'n', n, n, n, 1, A, n, A, n, 0, A2, n);
    checkCulaStatus(status);

    status = culaSgemm('n', 'n', n, n, n, 0, Q, n, Q, n, c[POL], Q, n); 
    checkCulaStatus(status);
    status = culaSgemm('n', 'n', n, n, n, 0, P, n, P, n, c[POL - 1], P, n); 
    checkCulaStatus(status);

    int odd = 1;
    for (i = POL - 2; i >= 0; i--) {
        if (odd == 1) {
            // Q = Q*A2 + c[k]*I;
            status = culaSgemm('n', 'n', n, n, n, 1, Q, n, A2, n, 0, Q, n); 
            checkCulaStatus(status);
            
            status = culaSgemm('n', 'n', n, n, n, c[i], I, n, I, n, 1, Q, n); 
            checkCulaStatus(status); 
        }
        else {
            // P = P*A2 + c[k]*I 
            status = culaSgemm('n', 'n', n, n, n, 1, P, n, A2, n, 0, P, n); 
            checkCulaStatus(status);
            
            status = culaSgemm('n', 'n', n, n, n, c[i], I, n, I, n, 1, P, n); 
            checkCulaStatus(status);
        }
        
        odd = 1-odd;
  	}  

  	if (odd == 1) {
    	// Q = Q*A
    	status = culaSgemm('n', 'n', n, n, n, 1, Q, n, A, n, 0, Q, n); 
    	checkCulaStatus(status);
  	}
  	else {
    	// P = P*A
    	status = culaSgemm('n', 'n', n, n, n, 1, P, n, A, n, 0, P, n); 
    	checkCulaStatus(status);
  	}
  
  	// Q = Q - P
  	status = culaSgemm('n', 'n', n, n, n, -1, P, n, I, n, 1, Q, n); 
  	checkCulaStatus(status);

  	// Find X s.t. QX = P
  	piv = (int*)malloc(n*sizeof(int));
  	memset(piv, 0, n*sizeof(int));
  
  	status = culaSgesv(n, n, Q, n, piv, P, n);
  	checkCulaStatus(status);
  
  	// now P = X
 
  	memcpy(E, I, n*n*sizeof(float));
  	if (odd == 0) status = culaSgemm('n', 'n', n, n, n, 2, I, n, P, n, 1, E, n);
  	else status = culaSgemm('n', 'n', n, n, n, -2, I, n, P, n, -1, E, n);
  
  	checkCulaStatus(status);

  	for(i = 0; i < s; i++) {
    	status = culaSgemm('n', 'n', n, n, n, 1, E, n, E, n, 0, E, n);
        checkCulaStatus(status);
    }


    if (scaled == 1) {
        status = culaSgemm('n', 'n', n, n, n, 0, A, n, A, n, 1./pow(2, -s), A, n);
        checkCulaStatus(status);
    }

    free(I);
    free(A2);
    free(P);
    free(Q);
    free(piv);
    return;
}
Esempio n. 3
0
void culaFloatExample()
{
#ifdef NDEBUG
    int N = 8192;
#else
    int N = 1024;
#endif
    int NRHS = 1;
    int i;

    culaStatus status;
    
    culaFloat* A = NULL;
    culaFloat* B = NULL;
    culaFloat* X = NULL;
    culaInt* IPIV = NULL;

    culaFloat one = 1.0f;
    culaFloat thresh = 1e-6f;
    culaFloat diff;

    printf("-------------------\n");
    printf("       SGESV\n");
    printf("-------------------\n");

    printf("Allocating Matrices\n");
    A = (culaFloat*)malloc(N*N*sizeof(culaFloat));
    B = (culaFloat*)malloc(N*sizeof(culaFloat));
    X = (culaFloat*)malloc(N*sizeof(culaFloat));
    IPIV = (culaInt*)malloc(N*sizeof(culaInt));
    if(!A || !B || !IPIV)
        exit(EXIT_FAILURE);

    printf("Initializing CULA\n");
    status = culaInitialize();
    checkStatus(status);

    // Set A to the identity matrix
    memset(A, 0, N*N*sizeof(culaFloat));
    for(i = 0; i < N; ++i)
        A[i*N+i] = one;
    
    // Set B to a random matrix (see note at top)
    for(i = 0; i < N; ++i)
        B[i] = (culaFloat)rand();
    memcpy(X, B, N*sizeof(culaFloat));

    memset(IPIV, 0, N*sizeof(culaInt));

    printf("Calling culaSgesv\n");
    status = culaSgesv(N, NRHS, A, N, IPIV, X, N);
    checkStatus(status);

    printf("Verifying Result\n");
    for(i = 0; i < N; ++i)
    {
        diff = X[i] - B[i];
        if(diff < 0.0f)
            diff = -diff;
        if(diff > thresh)
            printf("Result check failed:  i=%d  X[i]=%f  B[i]=%f", i, X[i], B[i]);
    }
    
    printf("Shutting down CULA\n\n");
    culaShutdown();

    free(A);
    free(B);
    free(IPIV);
}