double *cgsolve(int k) { int i, first_i, last_i; int n = k * k; int maxiters = 1000 > 5*k ? 1000 : k; // partition data if (n % size) { first_i = (n / size + 1) * rank; last_i = (rank != size-1 ? first_i+n/size+1 : n); } else { first_i = n / size * rank; last_i = n / size * (rank + 1); } double *b_vec = (double *)malloc(n * sizeof(double)); double *r_vec = (double *)malloc(n * sizeof(double)); double *d_vec = (double *)malloc(n * sizeof(double)); double *A_vec = (double *)malloc(n * sizeof(double)); double *x_vec = (double *)malloc(n * sizeof(double)); for (i=0; i<n; i++) { double tmp = cs240_getB(i, n); b_vec[i] = tmp; r_vec[i] = tmp; d_vec[i] = tmp; x_vec[i] = 0; } double normb = sqrt(ddot(b_vec+first_i, b_vec+first_i, last_i-first_i)); double rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i); double relres = 1; i = 0; while (relres > 1e-6 && i++ < maxiters) { /*while (i++ < 1) {*/ matvec(A_vec, d_vec, k); double alpha = rtr / ddot(d_vec+first_i, A_vec+first_i, last_i-first_i); daxpy(x_vec, d_vec, 1, alpha, n); daxpy(r_vec, A_vec, 1, -1*alpha, n); double rtrold = rtr; rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i); double beta = rtr / rtrold; daxpy(d_vec, r_vec, beta, 1, n); relres = sqrt(rtr) / normb; } return x_vec; }