int main(int argc, char** argv) { int N, K, i, j; Matrix A,v; double time, sum; if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); return 1; } N=atoi(argv[1]); K=atoi(argv[2]); A = createMatrix(N,N); // identity matrix for (i=0;i<N;++i) A->data[i][i] = 1.0; v = createMatrix(N,K); // fill with column number for (i=0;i<K;++i) for (j=0;j<N;++j) v->data[i][j] = i; time = WallTime(); sum = dosum(A,v); printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); freeMatrix(v); freeMatrix(A); return 0; }
int main(int argc, char** argv) { if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); return 1; } int N=atoi(argv[1]); int K=atoi(argv[2]); Matrix A = createMatrix(N,N); // identity matrix for (int i=0;i<N;++i) A->data[i][i] = 1.0; Matrix v = createMatrix(N,K); // fill with column number for (int i=0;i<K;++i) for (int j=0;j<N;++j) v->data[i][j] = i; Matrix v2 = createMatrix(N,K); double time = WallTime(); MxM(A, v, v2, 1.0, 0.0); double sum = innerproduct(v->as_vec, v2->as_vec); printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); freeMatrix(v2); freeMatrix(v); freeMatrix(A); return 0; }
void printDiff(){ double Sn=(M_PI*M_PI)/6; double sum=0; double time=0; for (int i = 4; i < 15; ++i) { time = WallTime(); sum = doSum(genVector(pow(2, i))); printf("Diff (n=%f) = %f,",pow(2, i), sum-Sn); printf(" Elapsed: %fs\n", WallTime()-time); } }
int main(int argc, char **argv ) { int size, rank; long i, j, l, m, n, k, iter, loop; Matrix a, b, c; double t1, t2, dt, dt1, r; init_app(argc, argv, &rank, &size); if( argc < 3 ) { if (rank == 0) printf("need atleast 2 arguments - n & flag\n"); close_app(); return 1; } k = m = n = atoi(argv[1]); a = createMatrix(m, k); b = createMatrix(k, n); c = createMatrix(m, n); for (i=0; i < m; i++) { for (l=0; l < k; l++) { a->data[l][i] = i+1.0; } } for (l=0; l < k; l++) { for (j=0; j < n; j++) { b->data[j][l] = j+1.0; } } loop = 5; t1 = WallTime(); for (iter=0; iter < loop; iter++) mxm (a,b,c,atoi(argv[2])); t2 = WallTime(); dt = t2 - t1; dt1 = dt/(m*2*k*n); dt1 = dt1/loop; r = 1.e-6/dt1; printf (" matrix-matrix : (m,k,n)= (%ld,%ld,%ld) dt= %lf (s) dt1= %le r= %lf\n" ,m, k, n, dt, dt1, r); freeMatrix(a); freeMatrix(b); freeMatrix(c); close_app(); return 0; }
void parallelPoisson(int problemSize,MPI_Comm comm) { int numberOfColumns = problemSize-1;//numberOfUnknowns int fstBufferSize = 4*problemSize; double stepSize = 1.0/problemSize; double startTime, endTime; Vector diagonal = createVector(numberOfColumns); Vector fstBuffer = createVector(fstBufferSize); ColumnMatrix local_b = createColumnMatrixMPI(numberOfColumns,&comm); ColumnMatrix local_bt = createColumnMatrixMPI(numberOfColumns,&comm); ColumnMatrix sendBuffer = createColumnMatrixMPI(numberOfColumns,&comm); ColumnMatrix recvBuffer = createColumnMatrixMPI(numberOfColumns,&comm); // MPI_Datatype columnSendType; // createMPIColumnSendType(local_b, &columnSendType); startTime = WallTime(); diagonalEigenvalues(diagonal); initRightHandSide(local_b,stepSize); fastSineTransform(local_b,fstBuffer); mpiColumnMatrixTranspose(local_bt, recvBuffer, local_b, sendBuffer); fastSineTransformInv(local_bt,fstBuffer); systemSolver(local_bt,diagonal); fastSineTransform(local_bt,fstBuffer); mpiColumnMatrixTranspose(local_b, recvBuffer,local_bt,sendBuffer); fastSineTransformInv(local_b,fstBuffer); findAndPrintUmax(local_b); endTime = WallTime(); if(local_b->commRank ==0) printf("Runtime: %fs\n",endTime-startTime); freeVectorMPI(diagonal); freeColumnMatrixMPI(local_b); freeColumnMatrixMPI(local_bt); freeVector(fstBuffer); freeColumnMatrixMPI(sendBuffer); freeColumnMatrixMPI(recvBuffer); //MPI_Type_free(&columnSendType); }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); close_app(); return 1; } int N=atoi(argv[1]); int K=atoi(argv[2]); Matrix A = createMatrix(N,N); // identity matrix for (int i=0;i<N;++i) A->data[i][i] = 1.0; int *displ, *cols; splitVector(K, size, &cols, &displ); Matrix v = createMatrix(N,cols[rank]); // fill with column number for (int i=0;i<cols[rank];++i) for (int j=0;j<N;++j) v->data[i][j] = i+displ[rank]; double time = WallTime(); double sum = dosum(A,v); if (rank == 0) { printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); } char s[128]; sprintf(s,"vec-%i.asc", rank); saveVectorSerial(s, v->as_vec); sprintf(s,"mat-%i.asc", rank); saveMatrixSerial(s, v); sprintf(s,"vec.asc"); saveVectorMPI(s, v->as_vec); freeMatrix(v); freeMatrix(A); free(displ); free(cols); close_app(); return 0; }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1.0; if (argc > 2) L = atof(argv[2]); double h = L/N; poisson_info_t ctx; ctx.A = createPoisson1D(M); Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; Matrix u = createMatrix(M, M); evalMesh(u->as_vec, grid, grid, poisson_source); scaleVector(u->as_vec, h*h); double time = WallTime(); cg(evaluate, u, 1.e-6, &ctx); evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeVector(grid); freeMatrix(ctx.A); close_app(); return 0; }
int main(int argc, char** argv) { int size, rank; #ifdef HAVE_MPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif if (!(size & (size-1))==0) { printf("Number of processes must be power of two"); #ifdef HAVE_MPI MPI_Finalize(); #endif return 1; } double time = WallTime(); double Sn=(M_PI*M_PI)/6; double sum=0; for (int i = 4; i <15 ; ++i) { int n= pow(2, i); int *startIndex, *len; splitVector(n, size, &len, &startIndex); Vector vec = genVector(startIndex[rank],startIndex[rank]+len[rank]); sum = doSum(vec); #ifdef HAVE_MPI double s2=sum; MPI_Reduce(&s2, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #endif if (rank == 0) { printf("Diff (n=%d) = %f,",n, sum-Sn); printf(" Elapsed: %fs\n", WallTime()-time); } } #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char** argv) { int i, j, N, K; Matrix A, v; double time, sum; int rank, size; int *displ, *cols; init_app(argc, argv, &rank, &size); if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); close_app(); return 1; } N=atoi(argv[1]); K=atoi(argv[2]); A = createMatrix(N,N); // identity matrix for (i=0;i<N;++i) A->data[i][i] = 1.0; splitVector(K, size, &cols, &displ); v = createMatrix(N,cols[rank]); // fill with column number for (i=0;i<cols[rank];++i) for (j=0;j<N;++j) v->data[i][j] = i+displ[rank]; time = WallTime(); sum = dosum(A,v); if (rank == 0) { printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); } freeMatrix(v); freeMatrix(A); free(displ); free(cols); close_app(); return 0; }
int main(int argc, char **argv) { double pi, mypi, h, sum, x, piref, error; double t1, t2, dt; int n, rank, size, i; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (argc < 2) { if (rank == 0) printf("need at least one parameter, the number of intervals\n"); MPI_Finalize(); return 1; } n = atoi(argv[1]); if (n <= 0) { if (rank == 0) printf("Error, %i intervals make no sense, bailing\n", n); MPI_Finalize(); return 2; } t1 = WallTime(); mypi = integrate(0.0, 1.0, n, myf); MPI_Reduce (&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); t2 = WallTime(); dt = t2 - t1; if (rank == 0) { piref = 4.0 * atan(1.0); error = fabs(pi-piref); printf ("pi=%e error=%e dt=%e \n", pi, error, dt); } MPI_Finalize(); return 0; }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 3) { printf("need two parameters, the matrix size and the number of vectors\n"); close_app(); return 1; } int N=atoi(argv[1]); int K=atoi(argv[2]); int *displ, *cols; splitVector(K, size, &cols, &displ); Matrix A = createMatrixMPI(N, -1, N, N, &WorldComm); // identity matrix for (int i=0;i<A->cols;++i) A->data[i][i] = 1.0; Matrix v = createMatrixMPI(-1, K, N, K, &WorldComm); // fill with column number for (int i=0;i<v->rows;++i) for (int j=0;j<v->cols;++j) v->data[j][i] = j; double time = WallTime(); double sum = dosum(A,v); if (rank == 0) { printf("sum: %f\n", sum); printf("elapsed: %f\n", WallTime()-time); } freeMatrix(v); freeMatrix(A); close_app(); return 0; }
int main(int argc, char** argv) { int times, N, i; double* u; if (argc < 2) { printf("need two parameter, the number of times to loop and the vector length\n"); return 1; } times = atoi(argv[1]); N = atoi(argv[2]); u = (double*)malloc(N*sizeof(double)); srand(WallTime()); for (i=0;i<N;++i) u[i] = (double)rand() / RAND_MAX; printf("Performing %i loops\n", times); printf("Vector length: %i\n", N); double now = WallTime(); double b=0; for (i=0;i<times;++i) b += dot(u, u, N); printf("Used %f seconds, sum %f\n", WallTime()-now, b); free(u); return 0; }
int main(int argc, char** argv) { int i, j, N, flag; Matrix A=NULL, Q=NULL; Vector b, grid, e, lambda=NULL; double time, sum, h, tol=1e-6; int rank, size; int mpi_top_coords; int mpi_top_sizes; init_app(argc, argv, &rank, &size); if (argc < 3) { printf("need two parameters, N and flag [and tolerance]\n"); printf(" - N is the problem size (in each direction\n"); printf(" - flag = 1 -> Matrix-free Gauss-Jacobi iterations\n"); printf(" - flag = 2 -> Matrix-free red-black Gauss-Seidel iterations\n"); printf(" - flag = 3 -> Matrix-free CG iterations\n"); printf(" - flag = 4 -> Matrix-free additive schwarz preconditioned+Cholesky CG iterations\n"); printf(" - flag = 5 -> Matrix-free additive schwarz preconditioned+CG CG iterations\n"); return 1; } N=atoi(argv[1]); flag=atoi(argv[2]); if (argc > 3) tol=atof(argv[3]); if (N < 0) { if (rank == 0) printf("invalid problem size given\n"); close_app(); return 2; } if (flag < 0 || flag > 5) { if (rank == 0) printf("invalid flag given\n"); close_app(); return 3; } if (flag == 2 && (N-1)%2 != 0 && ((N-1)/size) % 2 != 0) { if (rank == 0) printf("need an even size (per process) for red-black iterations\n"); close_app(); return 4; } // setup topology mpi_top_coords = 0; mpi_top_sizes = 0; MPI_Dims_create(size, 1, &mpi_top_sizes); int periodic = 0; MPI_Comm comm; MPI_Cart_create(MPI_COMM_WORLD, 1, &mpi_top_sizes, &periodic, 0, &comm); MPI_Cart_coords(comm, rank, 1, &mpi_top_coords); b = createVectorMPI(N+1, &comm, 1, 1); e = createVectorMPI(N+1, &comm, 1, 1); grid = equidistantMesh(0.0, 1.0, N); h = 1.0/N; evalMeshDispl(b, grid, source); scaleVector(b, pow(h, 2)); evalMeshDispl(e, grid, exact); axpy(b, e, alpha); b->data[0] = b->data[b->len-1] = 0.0; if (flag == 4) { int size = b->len; if (b->comm_rank == 0) size--; if (b->comm_rank == b->comm_size-1) size--; A1D = createMatrix(size, size); A1Dfactored = 0; diag(A1D, -1, -1.0); diag(A1D, 0, 2.0+alpha); diag(A1D, 1, -1.0); } int its=-1; char method[128]; time = WallTime(); if (flag == 1) { its=GaussJacobiPoisson1D(b, tol, 1000000); sprintf(method,"Gauss-Jacobi"); } if (flag == 2) { its=GaussSeidelPoisson1Drb(b, tol, 1000000); sprintf(method,"Gauss-Seidel"); } if (flag == 3) { its=cgMatrixFree(Poisson1D, b, tol); sprintf(method,"CG"); } if (flag == 4 || flag == 5) { its=pcgMatrixFree(Poisson1D, Poisson1DPre, b, tol); sprintf(method,"PCG"); } if (rank == 0) { printf("%s used %i iterations\n", method, its); printf("elapsed: %f\n", WallTime()-time); } evalMeshDispl(e, grid, exact); axpy(b,e,-1.0); b->data[0] = b->data[b->len-1] = 0.0; h = maxNorm(b); if (rank == 0) printf("max error: %e\n", h); if (A) freeMatrix(A); if (Q) freeMatrix(Q); freeVector(grid); freeVector(b); freeVector(e); if (lambda) freeVector(lambda); if (A1D) freeMatrix(A1D); MPI_Comm_free(&comm); close_app(); return 0; }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1; if (argc > 2) L = atof(argv[2]); double h = L/N; Vector lambda = createEigenValues(M); Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; Matrix u = createMatrix(M, M); Matrix ut = createMatrix(M, M); evalMesh(u->as_vec, grid, grid, poisson_source); scaleVector(u->as_vec, h*h); int NN = 4*N; Vector z = createVector(NN); double time = WallTime(); for (int j=0; j < M; j++) fst(u->data[j], &N, z->data, &NN); transposeMatrix(ut, u); for (int i=0; i < M; i++) fstinv(ut->data[i], &N, z->data, &NN); for (int j=0; j < M; j++) for (int i=0; i < M; i++) ut->data[j][i] /= lambda->data[i]+lambda->data[j]; for (int i=0; i < M; i++) fst(ut->data[i], &N, z->data, &NN); transposeMatrix(u, ut); for (int j=0; j < M; j++) fstinv(u->data[j], &N, z->data, &NN); evalMesh2(u->as_vec, grid, grid, exact_solution, -1.0); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeMatrix(ut); freeVector(grid); freeVector(z); freeVector(lambda); close_app(); return 0; }
//============================================================================= Epetra_Time::Epetra_Time(const Epetra_Comm& Comm) : StartTime_(0.0), Comm_(&Comm) { StartTime_ = WallTime(); }
//============================================================================= double Epetra_Time::ElapsedTime(void) const { return(WallTime()-StartTime_); }
//============================================================================= void Epetra_Time::ResetStartTime(void) { StartTime_ = WallTime(); return; }
int main(int argc, char** argv){ // first input is the task number (5 = sumSlow) // second input is the maximal n int Task; if(argc > 1) { if ((atoi(argv[1])>0) && (atoi(argv[1])<6)) Task = atoi(argv[1]); else Task=4; } else Task = 4; int iterations; if(argc > 2) { if (atoi(argv[2])>0) iterations = atoi(argv[2]); else iterations=13; } else iterations = 13; //printf("%d\n",iterations); int rank = 0; int i; // Generic loop variable. double startTime; // Storing the start time while measuring. double S = (M_PI*M_PI)/6; // The limit of the series. MPI_Init(&argc,&argv); // Setting up the size of the partial sums to generate. This should be altered to read something from the command line. //Sint iterations = 13; // Number of different summing lengths. int N[iterations]; // Vector with the summetion lengths. double* Sn = (double*)malloc(iterations*sizeof(double)); double* SnSlow = (double*)malloc(iterations*sizeof(double)); // Vectors of the partial sums. basicSetup(iterations, N, Sn, SnSlow); if (Task==1) { printf("running the non-parallelized programm (Task1)\n"); printf("n \terror \t\ttime\n"); //for(i=0; i<iterations; ++i) i=iterations-1; { startTime= WallTime(); Sn[i] = sum(N[i]); printf("%d \t%e \t%e\n",N[i], S-Sn[i],startTime- WallTime()); } } if (Task==2) { printf("running the openMP-parallelized programm (Task2)\n"); printf("n \terror \t\ttime\n"); //for(i=0; i<iterations; ++i) i=iterations-1; { startTime= WallTime(); Sn[i] = sumShared(N[i]); printf("%d \t%e \t%e\n",N[i], S-Sn[i],startTime- WallTime()); } } if (Task==3) { MPI_Comm_rank(MPI_COMM_WORLD,&rank); if(rank==0) { printf("running the MPI-parallelized programm (Task3)\n"); printf("n \terror \t\ttime\n"); } //for(i=0; i<iterations; ++i) i=iterations-1; { if(rank==0) startTime= WallTime(); Sn[i] = sumDist(N[i],&rank); if(rank==0) printf("%d \t%e \t%e\n",N[i], S-Sn[i],startTime- WallTime()); } } if (Task==4) { MPI_Comm_rank(MPI_COMM_WORLD,&rank); if(rank==0) { printf("running the openMP- and MPI-parallelized programm (Task4)\n"); printf("n \terror \t\ttime\n"); } //for(i=0; i<iterations; ++i) i=iterations-1; { if(rank==0) startTime= WallTime(); Sn[i] = sumHybrid(N[i],&rank); if(rank==0) printf("%d \t%e \t%e\n",N[i], S-Sn[i],startTime- WallTime()); } } if (Task==5) { printf("running the non-parallelized programm with better summation order(Task1)\n"); printf("n \terror \t\ttime\n"); //for(i=0; i<iterations; ++i) i=iterations-1; { startTime= WallTime(); Sn[i] = sumSlow(N[i]); printf("%d \t%e \t%e\n",N[i], S-Sn[i],startTime- WallTime()); } } free(Sn); free(SnSlow); MPI_Finalize(); return 0; }
list<tuple<string,double>> TaskManager :: Timing () { /* list<tuple<string,double>>timings; double time = RunTiming ( [&] () { ParallelJob ( [] (TaskInfo ti) { ; } , TasksPerThread(1) ); }); timings.push_back (make_tuple("parallel job with 1 task per thread", time*1e9)); time = RunTiming ( [&] () { ParallelJob ( [] (TaskInfo ti) { ; } , TasksPerThread(10) ); }); timings.push_back (make_tuple("parallel job with 10 tasks per thread", time*1e9)); time = RunTiming ( [&] () { ParallelJob ( [] (TaskInfo ti) { ; } , TasksPerThread(100) ); }); timings.push_back (make_tuple("parallel job with 100 tasks per thread", time*1e9)); return timings; */ // this is the old function moved from the py-interface: list<tuple<string,double>>timings; double starttime, time; double maxtime = 0.5; size_t steps; starttime = WallTime(); steps = 0; do { for (size_t i = 0; i < 1000; i++) ParallelJob ( [] (TaskInfo ti) { ; }, TasksPerThread(1)); steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("ParallelJob 1 task/thread", time/steps*1e9)); starttime = WallTime(); steps = 0; do { for (size_t i = 0; i < 1000; i++) ParallelJob ( [] (TaskInfo ti) { ; }, TasksPerThread(100)); steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("ParallelJob 100 task/thread", time/steps*1e9)); starttime = WallTime(); steps = 0; do { for (int k = 0; k < 10000; k++) { SharedLoop2 sl(1000); steps += 1; } time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("SharedLoop init", time/steps*1e9)); starttime = WallTime(); steps = 0; do { for (int k = 0; k < 1000; k++) { SharedLoop sl(5); ParallelJob ( [&sl] (TaskInfo ti) { for (auto i : sl) (void)i; // silence warning } ); } steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("short SharedLoop", time/steps*1e9)); starttime = WallTime(); steps = 0; do { for (int k = 0; k < 1000; k++) { SharedLoop sl1(5), sl2(5), sl3(5), sl4(5), sl5(5); ParallelJob ( [&sl1, &sl2, &sl3, &sl4, &sl5] (TaskInfo ti) { for (auto i : sl1) (void)i; // silence warning for (auto i : sl2) (void)i; // silence warning for (auto i : sl3) (void)i; // silence warning for (auto i : sl4) (void)i; // silence warning for (auto i : sl5) (void)i; // silence warning } ); } steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("5 short SharedLoops", time/steps*1e9)); starttime = WallTime(); steps = 0; SharedLoop2 sl2(5); do { for (int k = 0; k < 1000; k++) { sl2.Reset(5); ParallelJob ( [&sl2] (TaskInfo ti) { for (auto i : sl2) (void)i; // silence warning } ); } steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("short SharedLoop2", time/steps*1e9)); { starttime = WallTime(); steps = 0; SharedLoop2 sl1(5), sl2(5), sl3(5), sl4(5), sl5(5); do { for (int k = 0; k < 1000; k++) { sl1.Reset(5); sl2.Reset(5); sl3.Reset(5); sl4.Reset(5); sl5.Reset(5); ParallelJob ( [&sl1,&sl2,&sl3,&sl4,&sl5] (TaskInfo ti) { for (auto i : sl1) (void)i; // silence warning for (auto i : sl2) (void)i; // silence warning for (auto i : sl3) (void)i; // silence warning for (auto i : sl4) (void)i; // silence warning for (auto i : sl5) (void)i; // silence warning } ); } steps += 1000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("5 short SharedLoop2", time/steps*1e9)); } starttime = WallTime(); steps = 0; { SharedLoop2 sl(1000); do { for (int k = 0; k < 1000; k++) { sl.Reset(1000); ParallelJob ( [&sl] (TaskInfo ti) { for (auto i : sl) (void)i; // silence warning } ); steps += 1000; } time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("SharedLoop2 1000, time per iteration", time/steps*1e9)); } { starttime = WallTime(); steps = 0; SharedLoop2 sl(1000000); do { sl.Reset(1000000); ParallelJob ( [&sl] (TaskInfo ti) { for (auto i : sl) (void)i; // silence warning } ); steps += 1000000; time = WallTime()-starttime; } while (time < maxtime); timings.push_back(make_tuple("SharedLoop2 1000000, time per iteration", time/steps*1e9)); } return timings; }
int main(int argc, char** argv) { int i, j, N, flag; Matrix A=NULL, Q=NULL; Vector b, grid, e, lambda=NULL; double time, sum, h, tol=1e-4; if (argc < 3) { printf("need two parameters, N and flag [and tolerance]\n"); printf(" - N is the problem size (in each direction\n"); printf(" - flag = 1 -> Dense LU\n"); printf(" - flag = 2 -> Dense Cholesky\n"); printf(" - flag = 3 -> Full Gauss-Jacobi iterations\n"); printf(" - flag = 4 -> Full Gauss-Jacobi iterations using BLAS\n"); printf(" - flag = 5 -> Full Gauss-Seidel iterations\n"); printf(" - flag = 6 -> Full Gauss-Seidel iterations using BLAS\n"); printf(" - flag = 7 -> Full CG iterations\n"); printf(" - flag = 8 -> Matrix-less Gauss-Jacobi iterations\n"); printf(" - flag = 9 -> Matrix-less Gauss-Seidel iterations\n"); printf(" - flag = 10 -> Matrix-less Red-Black Gauss-Seidel iterations\n"); printf(" - flag = 11 -> Diagonalization\n"); printf(" - flag = 12 -> Diagonalization - FST\n"); printf(" - flag = 13 -> Matrix-less CG iterations\n"); return 1; } N=atoi(argv[1]); flag=atoi(argv[2]); if (argc > 3) tol = atof(argv[3]); if (N < 0) { printf("invalid problem size given\n"); return 2; } if (flag < 0 || flag > 13) { printf("invalid flag given\n"); return 3; } if (flag == 10 && (N-1)%2 != 0) { printf("need an even size for red-black iterations\n"); return 4; } if (flag == 12 && (N & (N-1)) != 0) { printf("need a power-of-two for fst-based diagonalization\n"); return 5; } h = 1.0/N; grid = equidistantMesh(0.0, 1.0, N); b = createVector(N-1); e = createVector(N-1); evalMeshInternal(b, grid, source); evalMeshInternal(e, grid, exact); scaleVector(b, pow(h, 2)); axpy(b, e, alpha); if (flag < 8) { A = createMatrix(N-1,N-1); diag(A, -1, -1.0); diag(A, 0, 2.0+alpha); diag(A, 1, -1.0); } if (flag >= 11 && flag < 13) lambda = generateEigenValuesP1D(N-1); if (flag == 11) Q = generateEigenMatrixP1D(N-1); time = WallTime(); if (flag == 1) { int* ipiv=NULL; lusolve(A, b, &ipiv); free(ipiv); } else if (flag == 2) llsolve(A,b,0); else if (flag == 3) printf("Gauss-Jacobi used %i iterations\n", GaussJacobi(A, b, tol, 10000000)); else if (flag == 4) printf("Gauss-Jacobi used %i iterations\n", GaussJacobiBlas(A, b, tol, 10000000)); else if (flag == 5) printf("Gauss-Seidel used %i iterations\n", GaussSeidel(A, b, tol, 10000000)); else if (flag == 6) printf("Gauss-Seidel used %i iterations\n", GaussSeidelBlas(A, b, tol, 10000000)); else if (flag == 7) printf("CG used %i iterations\n", cg(A, b, 1e-8)); else if (flag == 8) printf("Gauss-Jacobi used %i iterations\n", GaussJacobiPoisson1D(b, tol, 10000000)); else if (flag == 9) printf("Gauss-Jacobi used %i iterations\n", GaussSeidelPoisson1D(b, tol, 10000000)); else if (flag == 10) printf("Gauss-Jacobi used %i iterations\n", GaussSeidelPoisson1Drb(b, tol, 10000000)); else if (flag == 11) DiagonalizationPoisson1D(b,lambda,Q); else if (flag == 12) DiagonalizationPoisson1Dfst(b,lambda); else if (flag == 13) printf("CG used %i iterations\n", cgMatrixFree(Poisson1D, b, tol)); printf("elapsed: %f\n", WallTime()-time); evalMeshInternal(e, grid, exact); axpy(b,e,-1.0); printf("max error: %e\n", maxNorm(b)); if (A) freeMatrix(A); if (Q) freeMatrix(Q); freeVector(grid); freeVector(b); freeVector(e); if (lambda) freeVector(lambda); return 0; }
int main(int argc, char** argv) { int rank, size; init_app(argc, argv, &rank, &size); if (argc < 2) { printf("usage: %s <N> [L]\n",argv[0]); close_app(); return 1; } /* the total number of grid points in each spatial direction is (N+1) */ /* the total number of degrees-of-freedom in each spatial direction is (N-1) */ int N = atoi(argv[1]); int M = N-1; double L=1.0; if (argc > 2) L = atof(argv[2]); double h = L/N; Vector grid = createVector(M); for (int i=0;i<M;++i) grid->data[i] = (i+1)*h; int coords[2] = {0}; int sizes[2] = {1}; #ifdef HAVE_MPI sizes[0] = sizes[1] = 0; MPI_Dims_create(size,2,sizes); int periodic[2]; periodic[0] = periodic[1] = 0; MPI_Comm comm; MPI_Cart_create(MPI_COMM_WORLD,2,sizes,periodic,0,&comm); MPI_Cart_coords(comm,rank,2,coords); #endif int* len[2]; int* displ[2]; splitVector(M, sizes[0], &len[0], &displ[0]); splitVector(M, sizes[1], &len[1], &displ[1]); #ifdef HAVE_MPI Matrix u = createMatrixMPI(len[0][coords[0]]+2, len[1][coords[1]]+2, M, M, &comm); #else Matrix u = createMatrix(M+2, M+2); #endif evalMeshDispl(u, grid, grid, poisson_source, displ[0][coords[0]], displ[1][coords[1]]); scaleVector(u->as_vec, h*h); double time = WallTime(); GS(u, 1e-6, 5000); evalMesh2Displ(u, grid, grid, exact_solution, -1.0, displ[0][coords[0]], displ[1][coords[1]]); double max = maxNorm(u->as_vec); if (rank == 0) { printf("elapsed: %f\n", WallTime()-time); printf("max: %f\n", max); } freeMatrix(u); freeVector(grid); for (int i=0;i<2;++i) { free(len[i]); free(displ[i]); } close_app(); return 0; }