void init_mat(void) { matrix_begin(); matrix_t* A = matrix_matrix(2, 2); ELEMENT(A, 0, 0) = 1.0; ELEMENT(A, 0, 0) = 3.0; ELEMENT(A, 0, 0) = 2.0; ELEMENT(A, 0, 0) = 4.0; sq_init_matrix(A); matrix_end(); }
void gmres(double *A, double *D, double *x, double *b, int N, int max_restart, int max_iter, double tol) { int i, j, k, l, m, N2; double resid, *normb, *beta, *temp_nrm, temp, *r, *q, *Aq, *qA, *Dq, *w, *cs, *sn, *s, *y, *Q, *H, *res; normb = (double *) malloc(1*sizeof(double)); beta = (double *) malloc(1*sizeof(double)); temp_nrm = (double *) malloc(1*sizeof(double)); Q = (double *) malloc(N*N*(max_iter+1)*sizeof(double)); H = (double *) malloc((N+1)*max_iter*sizeof(double)); r = (double *) malloc(N*N*sizeof(double)); q = (double *) malloc(N*N*sizeof(double)); Aq = (double *) malloc(N*N*sizeof(double)); qA = (double *) malloc(N*N*sizeof(double)); Dq = (double *) malloc(N*N*sizeof(double)); w = (double *) malloc(N*N*sizeof(double)); cs = (double *) malloc((max_iter+1)*sizeof(double)); sn = (double *) malloc((max_iter+1)*sizeof(double)); s = (double *) malloc((max_iter+1)*sizeof(double)); y = (double *) malloc((max_iter+1)*sizeof(double)); res = (double *) malloc(max_iter*sizeof(double)); N2 = N*N; norm(b, normb, N2); for (k=0; k<N2; k++) r[k] = b[k]; norm(r, beta, N2); if ((resid = *beta / *normb) <= tol) { tol = resid; max_iter = 0; } for (m=0; m<max_restart; m++) { for (i=0; i<N2; i++) Q[i] = r[i] / *beta; for (i=0; i<max_iter; i++) s[i+1] = 0.0; s[0] = *beta; for (i = 0; i<max_iter; i++) { q_subQ(q, Q, N2, i); matrix_matrix(A, q, Aq, N); matrix_matrix(q, A, qA, N); matrix_matrix(D, q, Dq, N); for (k=0; k<N2; k++) w[k] = Aq[k] + qA[k] + Dq[k]; for (k=0; k<=i; k++) { q_subQ(q, Q, N2, k); H[max_iter*k+i] = inner_product(q, w, N2); w_shift(w, q, H[max_iter*k+i], N2); } /* for (k=0; k<=i; k++) { H[max_iter*k+i] = 0.0; for (j=0; j<N2; j++) H[max_iter*k+i] += Q[N2*k+j]*w[j]; } for (k=0; k<=i; k++) { for (j=0; j<N2; j++) w[j] = w[j] - H[max_iter*k+i]*Q[N2*k+j]; } */ norm(w, temp_nrm, N2); H[max_iter*(i+1)+i] = *temp_nrm; subQ_v(Q, w, N2, i+1, H[max_iter*(i+1)+i]); for (k = 0; k < i; k++) { //ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k)) temp = cs[k]*H[max_iter*k+i] + sn[k]*H[max_iter*(k+1)+i]; H[max_iter*(k+1)+i] = -1.0*sn[k]*H[max_iter*k+i] + cs[k]*H[max_iter*(k+1)+i]; H[max_iter*k+i] = temp; } GeneratePlaneRotation(H[max_iter*i+i], H[max_iter*(i+1)+i], cs, sn, i); //ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i)) H[max_iter*i+i] = cs[i]*H[max_iter*i+i] + sn[i]*H[max_iter*(i+1)+i]; H[max_iter*(i+1)+i] = 0.0; //ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i)); temp = cs[i]*s[i]; s[i+1] = -1.0*sn[i]*s[i]; s[i] = temp; resid = fabs(s[i+1] / *beta); res[i] = resid; if (resid < tol) { // backsolve(H, s, y, N, max_iter, i); for (k=0; k<max_iter+1; k++) y[k] = s[k]; cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, i, H, max_iter, y, 1); for(j=0; j<N; j++) { for (l=0; l<N; l++) { for(k=0; k<=i; k++) { x[N*j+l] += Q[N2*k+N*j+l]*y[k]; } } } break; } }//end inside for if (resid < tol) { printf(" resid = %e \n", resid); printf(" Converges at %d cycle %d step. \n", m, i+1); break; } // Caution : i = i + 1. i = i - 1; backsolve(H, s, y, N, max_iter, i); for(j=0; j<N; j++) { for (l=0; l<N; l++) { for(k=0; k<=i; k++) { x[N*j+l] += Q[N2*k+N*j+l]*y[k]; } } } matrix_matrix(A, x, Aq, N); matrix_matrix(x, A, qA, N); matrix_matrix(D, x, Dq, N); for (j=0; j<N2; j++) r[j] = b[j] - (Aq[j] + qA[j] + Dq[j]); norm(r, beta, N2); s[i+1] = *beta; resid = s[i+1] / *normb; if ( resid < tol) { printf(" resid = %e \n", resid); printf(" Converges at %d cycle %d step. \n", m, i); break; } }//end outside for }