double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, const double precision, const int maxmin,int tslice, const int nstore) { double returnvalue; static int allocated = 0; #ifdef HAVE_LAPACK int verbosity = 1, converged = 0, blocksize = 1 , blockwise=0; int solver_it_max = 50, j_max, j_min; double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2; volatile int v0dim = 0; matrix_mult_su3vect f; int N=SPACEVOLUME, N2=(SPACEVOLUME + SPACERAND); su3_vector * max_eigenvector_ = NULL, *max_eigenvector; int returncode=0; int returncode2=0; su3_vector *s; double sqnorm; char filename[200]; char eigvl_filename[200]; // int dims[]={T*g_nproc_t, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z}; int dims[]={1, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z}; FILE *efp; #ifdef MPI double atime, etime; MPI_File fp; MPI_Offset siteSize=3*2*sizeof(double); LemonRecordHeader *header; LemonWriter *writer; #else FILE *fp; int siteSize=3*2*sizeof(double); #endif f = &Jacobi; evlength_su3v = N2; if(g_proc_id == g_stdio_proc && g_debug_level >0) { printf("Number of %s eigenvalues to compute = %d\n", maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues)); printf("Using Jacobi-Davidson method! \n"); } if((*nr_of_eigenvalues) < 8){ j_max = 15; j_min = 8; } else{ j_max = 2*(*nr_of_eigenvalues); j_min = (*nr_of_eigenvalues); } if(precision < 1.e-14){ prec = 1.e-14; } else{ prec = precision; } max_eigenvector_= calloc(N2, sizeof(su3_vector)); max_eigenvector = max_eigenvector_; if(allocated == 0) { allocated = 1; eigenvectors_su3v = calloc(N2*(*nr_of_eigenvalues), sizeof(su3_vector));; eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); inv_eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); } solver_it_max = 64; /* compute the maximal one first */ /* DEBUG jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), 50., 1.e-12, 1, 15, 8, max_iterations, 1, 0, 0, NULL, CG, solver_it_max, threshold_max, decay_max, verbosity, &converged, (complex*) max_eigenvector, (double*) &max_eigenvalue_su3v, &returncode2, JD_MAXIMAL, 1,tslice,f); */ #ifdef MPI atime = MPI_Wtime(); #endif /* (re-) compute minimal eigenvalues */ converged = 0; solver_it_max = 256; if(maxmin) jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), 50., prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v, CG, solver_it_max, threshold_max, decay_max, verbosity, &converged, (complex*) eigenvectors_su3v, eigenvls_su3v, &returncode, JD_MAXIMAL, 1,tslice, f); else jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), 0., prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v, CG, solver_it_max, threshold_min, decay_min, verbosity, &converged, (complex*) eigenvectors_su3v, eigenvls_su3v, &returncode, JD_MINIMAL, 1,tslice, f); #ifdef MPI etime = MPI_Wtime(); if(g_proc_id == 0) { printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime); } #endif /* Printout eigenvalues. */ if(g_proc_id == 0) { sprintf(eigvl_filename,"eigenvalues.%.3d.%.4d", tslice, nstore); efp=fopen(eigvl_filename,"w"); for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { fprintf(efp,"%e\n",eigenvls_su3v[v0dim]); } fclose(efp); } /* Printout eigenvectors. */ for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { sprintf(filename, "eigenvector.%.3d.%.3d.%.4d", v0dim, tslice, nstore); s=(su3_vector*)&eigenvectors_su3v[v0dim*N2]; #ifdef MPI # ifdef HAVE_LIBLEMON // SEGNO: dovrebbe stampare 8*2*3*SPACEVOLUME data per file, ma ne stampa 8*2*4n*SPACEVOLUME (n=4-1 per ev 0-3) MPI_File_open(g_cart_grid, filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp); writer = lemonCreateWriter(&fp, g_cart_grid); header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(complex)); lemonWriteRecordHeader(header, writer); lemonDestroyHeader(header); lemonWriteLatticeParallel(writer, s, siteSize, dims); lemonWriterCloseRecord(writer); lemonDestroyWriter(writer); MPI_File_close(&fp); # else if(g_proc_id == 0) { printf("Cannot write eigenvectors: you need LEMON for writing eigenvectors with MPI\n"); } # endif #else fp=fopen(filename,"wb"); fwrite(s,siteSize,SPACEVOLUME,fp); fclose(fp); #endif // MPI sqnorm=square_norm_su3vect(s,SPACEVOLUME,1); if(g_proc_id == 0) { printf("wrote eigenvector | |^2 = %e \n",sqnorm); } } returnvalue=eigenvls_su3v[0]; free(max_eigenvector_); #else fprintf(stderr, "lapack not available, so JD method for EV computation not available \n"); #endif // LAPACK return(returnvalue); }
int main(int argc, char **argv) { MPI_File fp; LemonWriter *w; LemonReader *r; LemonRecordHeader *h; double *data; double tick, tock; double *timesRead; double *timesWrite; double stdRead = 0.0; double stdWrite = 0.0; int mpisize; int rank; char const *type; int ldsize; unsigned long long int fsize; int *hashMatch, *hashMatchAll; double const rscale = 1.0 / RAND_MAX; int ME_flag=1, MB_flag=1, status=0; int latDist[] = {0, 0, 0, 0}; int periods[] = {1, 1, 1, 1}; int locSizes[4]; int latSizes[4]; int localVol = 1; int latVol = localVol; MPI_Comm cartesian; int i, j; md5_state_t state; md5_byte_t before[16]; md5_byte_t after[16]; int L; int iters; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpisize); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (argc != 3) { usage(rank, argv); MPI_Finalize(); return 1; } L = atoi(argv[1]); if (L <= 0) usage(rank, argv); iters = atoi(argv[2]); if (iters <= 0) usage(rank, argv); timesWrite = (double*)calloc(iters, sizeof(double)); if (timesWrite == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } timesRead = (double*)calloc(iters, sizeof(double)); if (timesRead == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } hashMatch = (int*)calloc(iters, sizeof(int)); if (hashMatch == (int*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } hashMatchAll = (int*)calloc(iters, sizeof(int)); if (hashMatchAll == (int*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } /* Construct a Cartesian topology, adjust lattice sizes where needed */ MPI_Dims_create(mpisize, 4, latDist); for (i = 0; i < 4; ++i) { int div = (i == 3 ? (2 * L) : L) / latDist[i]; locSizes[i] = div ? div : 1; localVol *= locSizes[i]; latSizes[i] = locSizes[i] * latDist[i]; } latVol = mpisize * localVol; ldsize = localVol * 72 * sizeof(double); fsize = (unsigned long long int)latVol * 72 * sizeof(double); MPI_Cart_create(MPI_COMM_WORLD, 4, latDist, periods, 1, &cartesian); MPI_Comm_rank(cartesian, &rank); if (rank == 0) { fprintf(stdout, "Benchmark on a block of data %s in size,\n", humanForm(fsize)); fprintf(stdout, "representing a %u x %u x %u x %u lattice", latSizes[0], latSizes[1], latSizes[2], latSizes[3]); if (mpisize == 1) fprintf(stdout, ".\n\n"); else { fprintf(stdout, ",\ndistributed over %u MPI processes\n", mpisize); fprintf(stdout, "for a local %u x %u x %u x %u lattice.\n\n", locSizes[0], locSizes[1], locSizes[2], locSizes[3]); } } /* Allocate a block of memory for dummy data to write */ data = (double*)malloc(ldsize); if (data == (double*)NULL) { fprintf(stderr, "ERROR: Could not allocate memory.\n"); return 1; } srand(time(NULL) + rank); /* Start of test */ for (i = 0; i < iters; ++i) { if (rank == 0) fprintf(stdout, "Measurement %d of %d.\n", i + 1, iters); /* Create a block of dummy data to write out Fill with some random numbers to make sure we don't get coincidental matches here */ for (j = 0; j < (localVol * 72); ++j) data[j] = rscale * (double)rand(); /* Calculate a hash of the data, to check integrity against */ md5_init(&state); md5_append(&state, (md5_byte_t const *)data, ldsize); md5_finish(&state, before); /* Note that the following is the only (?) way to truncate the file with MPI */ MPI_File_open(cartesian, "benchmark.test", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp); MPI_File_set_size(fp, 0); w = lemonCreateWriter(&fp, cartesian); h = lemonCreateHeader(MB_flag, ME_flag, "benchmark", latVol); status = lemonWriteRecordHeader(h, w); lemonDestroyHeader(h); MPI_Barrier(cartesian); tick = MPI_Wtime(); lemonWriteLatticeParallel(w, data, 72 * sizeof(double), latSizes); tock = MPI_Wtime(); MPI_Barrier(cartesian); timesWrite[i] = tock - tick; if (rank == 0) fprintf(stdout, "Time spent writing was %4.2g s.\n", timesWrite[i]); lemonWriterCloseRecord(w); lemonDestroyWriter(w); MPI_File_close(&fp); /* Clear data to avoid an utterly failed read giving md5 hash matches from the old data */ memset(data, 0, ldsize); /* Start of reading test */ MPI_File_open(cartesian, "benchmark.test", MPI_MODE_RDONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fp); r = lemonCreateReader(&fp, cartesian); if (lemonReaderNextRecord(r)) fprintf(stderr, "Node %d reports: next record failed.\n", rank); type = lemonReaderType(r); if (strncmp(type, "benchmark", 13)) fprintf(stderr, "Node %d reports: wrong type read.\n", rank); MPI_Barrier(cartesian); tick = MPI_Wtime(); lemonReadLatticeParallel(r, data, 72 * sizeof(double), latSizes); tock = MPI_Wtime(); timesRead[i] = tock - tick; MPI_Barrier(cartesian); if (rank == 0) fprintf(stdout, "Time spent reading was %4.2g s.\n", timesRead[i]); lemonDestroyReader(r); MPI_File_close(&fp); md5_init(&state); md5_append(&state, (md5_byte_t const *)data, ldsize); md5_finish(&state, after); hashMatch[i] = strncmp((char const *)before, (char const *)after, 16) != 0 ? 1 : 0; MPI_Reduce(hashMatch + i, hashMatchAll + i, 1, MPI_INT, MPI_SUM, 0, cartesian); if (rank == 0) { if (hashMatchAll[i] == 0) fprintf(stdout, "All nodes report that MD5 hash matches.\n\n"); else fprintf(stdout, "WARNING: MD5 hash failure detected!\n\n"); } } /* Aggregate the data */ hashMatch[0] = 0; stdWrite = timesWrite[0] * timesWrite[0]; stdRead = timesRead[0] * timesRead[0]; for (i = 1; i < iters; ++i) { hashMatchAll[0] += hashMatchAll[i]; timesWrite[0] += timesWrite[i]; stdWrite += timesWrite[i] * timesWrite[i]; timesRead[0] += timesRead[i]; stdRead += timesRead[i] * timesRead[i]; } stdWrite /= iters; stdRead /= iters; timesWrite[0] /= iters; timesRead[0] /= iters; stdWrite -= timesWrite[0] * timesWrite[0]; stdRead -= timesRead[0] * timesRead[0]; if (rank == 0) { fprintf(stdout, "Average time spent writing was %4.2e s, ", timesWrite[0]); fprintf(stdout, "with a standard deviation of %4.2e s.\n", sqrt(stdWrite)); fprintf(stdout, "Average time spent reading was %4.2e s, ", timesRead[0]); fprintf(stdout, "with a standard deviation of %4.2e s.\n\n", sqrt(stdRead)); stdWrite *= (double)fsize / (timesWrite[0] * timesWrite[0]); stdRead *= (double)fsize / (timesRead[0] * timesRead[0]); fprintf(stdout, "Average writing speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesWrite[0]))); fprintf(stdout, "Average reading speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesRead[0]))); if (hashMatchAll[0] == 0) fprintf(stdout, "All data hashed correctly.\n"); else fprintf(stdout, "WARNING: %d hash mismatches detected!.\n", hashMatchAll[0]); } MPI_Finalize(); free(data); free(timesWrite); free(timesRead); free(hashMatch); free(hashMatchAll); return(0); }