void cg(eval_t A, Matrix b, double tolerance, void* ctx) { Matrix r = createMatrix(b->rows, b->cols); Matrix p = createMatrix(b->rows, b->cols); Matrix buffer = createMatrix(b->rows, b->cols); double dotp = 1000; double rdr = dotp; copyVector(r->as_vec,b->as_vec); fillVector(b->as_vec, 0.0); int i=0; while (i < b->as_vec->len && rdr > tolerance) { ++i; if (i == 1) { copyVector(p->as_vec,r->as_vec); dotp = innerproduct(r->as_vec,r->as_vec); } else { double dotp2 = innerproduct(r->as_vec,r->as_vec); double beta = dotp2/dotp; dotp = dotp2; scaleVector(p->as_vec,beta); axpy(p->as_vec,r->as_vec,1.0); } A(buffer,p,ctx); double alpha = dotp/innerproduct(p->as_vec,buffer->as_vec); axpy(b->as_vec,p->as_vec,alpha); axpy(r->as_vec,buffer->as_vec,-alpha); rdr = sqrt(innerproduct(r->as_vec,r->as_vec)); } printf("%i iterations\n",i); freeMatrix(r); freeMatrix(p); freeMatrix(buffer); }
void cgsolver(int size, double *matrix, double *rhs, double *solution, int maxiteration, double tolerance) { int ii, jj, kk; double alpha=0.0, beta=0.0, temp1, temp2, res0tol=0.0; double *res, *p, *Ax, *Ap; res = (double*) malloc(size*sizeof(double)); p = (double*) malloc(size*sizeof(double)); Ax = (double*) malloc(size*sizeof(double)); Ap = (double*) malloc(size*sizeof(double)); multiply(size, matrix, solution, Ax); for (ii=0; ii<size; ii++) { res[ii] = rhs[ii]-Ax[ii]; p[ii] = res[ii]; } res0tol = innerproduct(res, res, size); printf("[CG] Conjugate gradient is started.\n"); for (ii=0; ii<maxiteration; ii++) { if ((ii%20==0)&&(ii!=0)) printf("[CG] mse %e with a tolerance criteria of %e at %5d iterations.\n", sqrt(temp2/res0tol), tolerance, ii); temp1 = innerproduct(res, res, size); multiply(size, matrix, p, Ap); temp2 = innerproduct(Ap, p, size); alpha=temp1/temp2; for (jj=0; jj<size; jj++) { solution[jj] = solution[jj] + alpha*p[jj]; res[jj] = res[jj] - alpha*Ap[jj]; } temp2 = innerproduct(res, res, size); if (sqrt(temp2/res0tol) < tolerance) break; beta = temp2/temp1; for (jj=0; jj<size; jj++) p[jj]= res[jj] + beta*p[jj]; } printf("[CG] Finished with total iteration = %d, mse = %e.\n", ii, sqrt(temp2/res0tol)); free(res); free(p); free(Ax); free(Ap); }
/*************************************************************************** //Calculate the dihedral between plane p1-p2-p3 and p2-p3-p4 //p1[x, y, z], p2[x, y, z], p3[x, y, z], p4[x, y, z] ***************************************************************************/ double Dihedral(double *p1, double *p2, double *p3, double *p4) { double vector1[3]; subtract(p1, p2, vector1); double vector2[3]; subtract(p2, p3, vector2); double vector3[3]; subtract(p3, p4, vector3); double v1[3]; crossproduct(vector2, vector1, v1); double v2[3]; crossproduct(vector3, vector2, v2); norm (v1); norm (v2); double dihedral = innerproduct(v1, v2); if (dihedral>1 && dihedral<1+EXTRA) { dihedral=1; } else if(dihedral<-1 && dihedral>-1-EXTRA) { dihedral=-1; } else if(dihedral>1+EXTRA || dihedral<-1-EXTRA) { cout<<"Error, double Dihedral()\n"; exit(-1); } double v5[3]; crossproduct(v2, v1, v5); double direction = innerproduct(v5, vector2); if (direction>0) { return (acos(dihedral)/PI)*180; } else { return -(acos(dihedral)/PI)*180; } }
int main(int argc, char** argv) { if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); return 1; } int N=atoi(argv[1]); int K=atoi(argv[2]); Matrix A = createMatrix(N,N); // identity matrix for (int i=0;i<N;++i) A->data[i][i] = 1.0; Matrix v = createMatrix(N,K); // fill with column number for (int i=0;i<K;++i) for (int j=0;j<N;++j) v->data[i][j] = i; Matrix v2 = createMatrix(N,K); double time = WallTime(); MxM(A, v, v2, 1.0, 0.0); double sum = innerproduct(v->as_vec, v2->as_vec); printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); freeMatrix(v2); freeMatrix(v); freeMatrix(A); return 0; }
// perform an innerproduct double myinnerproduct(Vector u, Vector v) { double result = innerproduct(u, v); #ifdef HAVE_MPI double r2=result; MPI_Allreduce(&r2, &result, 1, MPI_DOUBLE, MPI_SUM, *u->comm); #endif return result; }
double dosum(double** A, double** v, int K, int N) { double alpha=0; double temp[N]; for( int i=0;i<K;++i ) { MxV(temp,A,v[i],N); alpha += innerproduct(temp,v[i],N); } return alpha; }
//calculate rotate angle between two vector(vector1: p1,p2 vector2: p3,p4), (-180,180) double MyRotateAngle(const vector<double> &p1, const vector<double> &p2, const vector<double> &p3, const vector<double> &p4) { double vector1[3]; Mysubtract(p1, p2, vector1); double vector2[3]; Mysubtract(p3, p4, vector2); norm(vector1);norm(vector2); double angle = innerproduct(vector1, vector2); return (acos(angle)/PI)*180; }
void tridiagonalize(int n, matrix a, vector d, vector e) { int i, j, k; double s, t, p, q; vector v, w; for (k = 0; k < n - 2; k++) { v = a[k]; d[k] = v[k]; e[k] = house(n - k - 1, &v[k + 1]); if (e[k] == 0) continue; for (i = k + 1; i < n; i++) { s = 0; for (j = k + 1; j < i; j++) s += a[j][i] * v[j]; for (j = i; j < n; j++) s += a[i][j] * v[j]; d[i] = s; } t = innerproduct(n-k-1, &v[k+1], &d[k+1]) / 2; for (i = n - 1; i > k; i--) { p = v[i]; q = d[i] - t * p; d[i] = q; for (j = i; j < n; j++) a[i][j] -= p * d[j] + q * v[j]; } } if (n >= 2) { d[n - 2] = a[n - 2][n - 2]; e[n - 2] = a[n - 2][n - 1]; } if (n >= 1) d[n - 1] = a[n - 1][n - 1]; for (k = n - 1; k >= 0; k--) { v = a[k]; if (k < n - 2) { for (i = k + 1; i < n; i++) { w = a[i]; t = innerproduct(n-k-1, &v[k+1], &w[k+1]); for (j = k + 1; j < n; j++) w[j] -= t * v[j]; } } for (i = 0; i < n; i++) v[i] = 0; v[k] = 1; } }
double house(int n, vector x) /* Householder変換 */ { int i; double s, t; s = sqrt(innerproduct(n, x, x)); /* 内積の平方根 = 大きさ */ if (s != 0) { if (x[0] < 0) s = -s; x[0] += s; t = 1 / sqrt(x[0] * s); for (i = 0; i < n; i++) x[i] *= t; } return -s; }
double dosum(double** A, double** v, int K, int N) { double alpha=0; /* CHANGED */ double** temp = createMatrix(K,N); for( int i=0; i<K; ++i ) { /* CHANGED */ MxV(temp[i],A,v[i],N); alpha += innerproduct(temp[i],v[i],N); } return alpha; }
double dosum(double** A, double** v, int K, int N) { double alpha=0; double** temp = createMatrix(K,N); #pragma omp parallel for schedule(static) \ reduction(+:alpha) for( int i=0;i<K;++i ) { MxV(temp[i],A,v[i],N); alpha += innerproduct(temp[i],v[i],N); } return alpha; }
double doSum(Vector vec){ double sum=0; double one=1.; //Generating a vector of one's, to take advantace of blas-ddot? Vector oneVec = createPointerVector(vec->len); for (int i = 0; i < vec->len; ++i) { oneVec->data[i]=*&one; } for (int i = 0; i < vec->len; ++i) { sum = innerproduct(vec, oneVec); } freeVector(vec); return sum; }
void GS(Matrix u, double tolerance, int maxit) { int it=0; Matrix b = cloneMatrix(u); Matrix e = cloneMatrix(u); Matrix v = cloneMatrix(u); int* sizes, *displ; splitVector(u->rows-2, 2*max_threads(), &sizes, &displ); copyVector(b->as_vec, u->as_vec); fillVector(u->as_vec, 0.0); double max = tolerance+1; while (max > tolerance && ++it < maxit) { copyVector(e->as_vec, u->as_vec); copyVector(u->as_vec, b->as_vec); for (int color=0;color<2;++color) { for (int i=1;i<u->cols-1;++i) { #pragma omp parallel { int cnt=displ[get_thread()*2+color]+1; for (int j=0;j<sizes[get_thread()*2+color];++j, ++cnt) { u->data[i][cnt] += v->data[i][cnt-1]; u->data[i][cnt] += v->data[i][cnt+1]; u->data[i][cnt] += v->data[i-1][cnt]; u->data[i][cnt] += v->data[i+1][cnt]; u->data[i][cnt] /= 4.0; v->data[i][cnt] = u->data[i][cnt]; } } } } axpy(e->as_vec, u->as_vec, -1.0); max = sqrt(innerproduct(e->as_vec, e->as_vec)); } printf("number of iterations %i %f\n", it, max); freeMatrix(b); freeMatrix(e); freeMatrix(v); free(sizes); free(displ); }
//calculate dihedral between two plane(plane1: p1,p2,p3 plane2: p4,p5,p6), (-180,180) double MyDihedral(const vector<double> &p1, const vector<double> &p2, const vector<double> &p3, const vector<double> &p4, const vector<double> &p5, const vector<double> &p6) { double vector1[3]; Mysubtract(p1, p2, vector1); double vector2[3]; Mysubtract(p2, p3, vector2); double v1[3]; crossproduct(vector1, vector2, v1); double vector3[3]; Mysubtract(p4, p5, vector3); double vector4[3]; Mysubtract(p5, p6, vector4); double v2[3]; crossproduct(vector3, vector4, v2); norm(v1);norm(v2); double dihedral=innerproduct(v1,v2); return (acos(dihedral)/PI)*180; }
void cgsolver(int size, int myrank, int ncpus, double *matrix, double *rhs, double *solution, int maxiteration, double tolerance) { int ii, jj, kk, rank[3]; int *load, *sindex, maxsize; double alpha=0.0, beta=0.0, temp1, temp2, res0tol=0.0; double *res, *p, *Ax, *Ap, *xtl, *xtr; load = (int*) malloc(ncpus*sizeof(int)); sindex = (int*) malloc(ncpus*sizeof(int)); MPI_Allgather(&size, 1, MPI_INT, load, 1, MPI_INT, MPI_COMM_WORLD); res = (double*) malloc(size*sizeof(double)); p = (double*) malloc(size*sizeof(double)); Ax = (double*) malloc(size*sizeof(double)); Ap = (double*) malloc(size*sizeof(double)); rank[0] = (myrank-1+ncpus)%ncpus; rank[1] = myrank; rank[2] = (myrank+1)%ncpus; maxsize = 0; sindex[0] = 0; for(ii=0; ii<ncpus; ii++) { if(maxsize<load[ii]) maxsize=load[ii]; if(ii!=0) sindex[ii]=sindex[ii-1]+load[ii-1]; } xtl = (double*) malloc(maxsize*sizeof(double)); xtr = (double*) malloc(maxsize*sizeof(double)); multiply(rank, load, sindex, ncpus, matrix, solution, Ax, xtl, xtr); for (ii=0; ii<size; ii++) { res[ii] = rhs[ii]-Ax[ii]; p[ii] = res[ii]; } res0tol = innerproduct(res, res, size); if(myrank == 0) printf("[CG] Conjugate gradient is started.\n"); for (ii=0; ii<maxiteration; ii++) { if ((myrank==0)&&(ii%20==0)&&(ii!=0)) printf("mse = %e with criteria of %e at %5d iterations.\n", sqrt(temp2/res0tol), tolerance, ii); temp1 = innerproduct(res, res, size); multiply(rank, load, sindex, ncpus, matrix, p, Ap, xtl, xtr); temp2 = innerproduct(Ap, p, size); alpha=temp1/temp2; for (jj=0; jj<size; jj++) { solution[jj] = solution[jj] + alpha*p[jj]; res[jj] = res[jj] - alpha*Ap[jj]; } temp2 = innerproduct(res, res, size); if (sqrt(temp2/res0tol) < tolerance) break; beta = temp2/temp1; for (jj=0; jj<size; jj++) p[jj]= res[jj] + beta*p[jj]; } if(myrank==0) printf("[CG] Finished with total iteration = %d, mse = %e.\n", ii+1, sqrt(temp2/res0tol)); free(xtl); free(xtr); free(res); free(p); free(Ax); free(Ap); free(sindex); free(load); }