示例#1
0
double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, 
			  const double precision, const int maxmin,int tslice, 
			  const int nstore) {
  double returnvalue;
  static int allocated = 0;

#ifdef HAVE_LAPACK


  int verbosity = 1, converged = 0, blocksize = 1 , blockwise=0;
  int solver_it_max = 50, j_max, j_min;
  double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2;
volatile  int v0dim = 0;
  matrix_mult_su3vect f;
  int N=SPACEVOLUME, N2=(SPACEVOLUME + SPACERAND);
  su3_vector * max_eigenvector_ = NULL, *max_eigenvector;
  
  int returncode=0;
  int returncode2=0;
  su3_vector *s;
  double sqnorm;
  
  char filename[200];
  char eigvl_filename[200];
  //  int dims[]={T*g_nproc_t, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z};
  int dims[]={1, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z};
  FILE *efp;

#ifdef MPI
  double atime, etime;
  MPI_File fp;
  MPI_Offset siteSize=3*2*sizeof(double);
  LemonRecordHeader *header;
  LemonWriter *writer;
#else
  FILE *fp;
  int siteSize=3*2*sizeof(double);
#endif

  f = &Jacobi;
  evlength_su3v = N2;
  
  if(g_proc_id == g_stdio_proc && g_debug_level >0) 
    {
      printf("Number of %s eigenvalues to compute = %d\n",
	     maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues));
      printf("Using Jacobi-Davidson method! \n");
    }
  if((*nr_of_eigenvalues) < 8){
    j_max = 15;
    j_min = 8;
  }
  else{
    j_max = 2*(*nr_of_eigenvalues);
    j_min = (*nr_of_eigenvalues);
  }
  if(precision < 1.e-14){
    prec = 1.e-14;
  }
  else{
    prec = precision;
  }
  max_eigenvector_= calloc(N2, sizeof(su3_vector));
  max_eigenvector = max_eigenvector_;
  
  if(allocated == 0) 
    {
      allocated = 1;
      eigenvectors_su3v = calloc(N2*(*nr_of_eigenvalues), sizeof(su3_vector));;
      eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
      inv_eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
    }
  
  solver_it_max = 64;
  /* compute the maximal one first */
  /* DEBUG 
  jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
		50., 1.e-12, 
		1, 15, 8, max_iterations, 1, 0, 0, NULL,
		CG, solver_it_max,
		threshold_max, decay_max, verbosity,
		&converged, (complex*) max_eigenvector, (double*) &max_eigenvalue_su3v,
		&returncode2, JD_MAXIMAL, 1,tslice,f);
  */
  
#ifdef MPI
  atime = MPI_Wtime();
#endif
  
  /* (re-) compute minimal eigenvalues */
  converged = 0;
  solver_it_max = 256;
  
  if(maxmin)
    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
		  50., prec, 
		  (*nr_of_eigenvalues), j_max, j_min, 
		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
		  CG, solver_it_max,
		  threshold_max, decay_max, verbosity,
		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
		  &returncode, JD_MAXIMAL, 1,tslice,
		  f);
  else
    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
		  0., prec, 
		  (*nr_of_eigenvalues), j_max, j_min, 
		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
		  CG, solver_it_max,
		  threshold_min, decay_min, verbosity,
		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
		  &returncode, JD_MINIMAL, 1,tslice,
		  f);
  
#ifdef MPI
  etime = MPI_Wtime();
  if(g_proc_id == 0) {
    printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime);
    }
#endif

  
  /* Printout eigenvalues.  */
  if(g_proc_id == 0) {
    sprintf(eigvl_filename,"eigenvalues.%.3d.%.4d", tslice, nstore);
    efp=fopen(eigvl_filename,"w");
    for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
      fprintf(efp,"%e\n",eigenvls_su3v[v0dim]);
    }
    fclose(efp);    
  }

  /* Printout eigenvectors.  */
  for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
    sprintf(filename, "eigenvector.%.3d.%.3d.%.4d", v0dim, tslice, nstore);
    s=(su3_vector*)&eigenvectors_su3v[v0dim*N2];
#ifdef MPI 
# ifdef HAVE_LIBLEMON
    // SEGNO: dovrebbe stampare 8*2*3*SPACEVOLUME data per file, ma ne stampa 8*2*4n*SPACEVOLUME (n=4-1 per ev 0-3)

    MPI_File_open(g_cart_grid, filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp);
    writer = lemonCreateWriter(&fp, g_cart_grid);
    header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(complex));
    lemonWriteRecordHeader(header, writer);
    lemonDestroyHeader(header);
    lemonWriteLatticeParallel(writer, s, siteSize, dims);
    lemonWriterCloseRecord(writer);
    lemonDestroyWriter(writer);
    MPI_File_close(&fp);
# else
  if(g_proc_id == 0) {
    printf("Cannot write eigenvectors: you need LEMON for writing eigenvectors with MPI\n");
    }
# endif
#else
    fp=fopen(filename,"wb");
    fwrite(s,siteSize,SPACEVOLUME,fp);
    fclose(fp);
#endif // MPI
    sqnorm=square_norm_su3vect(s,SPACEVOLUME,1);
    if(g_proc_id == 0) {
      printf("wrote eigenvector | |^2 = %e \n",sqnorm);
    }
  }

  returnvalue=eigenvls_su3v[0];
  free(max_eigenvector_);
#else
  fprintf(stderr, "lapack not available, so JD method for EV computation not available \n");
#endif // LAPACK
  return(returnvalue);
}
示例#2
0
int main(int argc, char **argv)
{
  MPI_File fp;

  LemonWriter *w;
  LemonReader *r;
  LemonRecordHeader *h;

  double *data;
  double tick, tock;
  double *timesRead;
  double *timesWrite;
  double stdRead = 0.0;
  double stdWrite = 0.0;
  int mpisize;
  int rank;
  char const *type;
  int ldsize;
  unsigned long long int fsize;
  int *hashMatch, *hashMatchAll;
  double const rscale = 1.0 / RAND_MAX;

  int ME_flag=1, MB_flag=1, status=0;

  int latDist[] = {0, 0, 0, 0};
  int periods[] = {1, 1, 1, 1};
  int locSizes[4];
  int latSizes[4];
  int localVol = 1;
  int latVol = localVol;

  MPI_Comm cartesian;
  int i, j;

  md5_state_t state;
  md5_byte_t before[16];
  md5_byte_t after[16];
  
  int L;
  int iters; 

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &mpisize);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  
  if (argc != 3)
  {
    usage(rank, argv);
    MPI_Finalize();
    return 1;
  }
  
  L = atoi(argv[1]);
  if (L <= 0)
    usage(rank, argv);

  iters = atoi(argv[2]);
  if (iters <= 0)
    usage(rank, argv);

  timesWrite = (double*)calloc(iters, sizeof(double));
  if (timesWrite == (double*)NULL)
  {
    fprintf(stderr, "ERROR: Could not allocate memory.\n");
    return 1;
  }
  timesRead = (double*)calloc(iters, sizeof(double));
  if (timesRead == (double*)NULL)
  {
    fprintf(stderr, "ERROR: Could not allocate memory.\n");
    return 1;
  }
  hashMatch = (int*)calloc(iters, sizeof(int));
    if (hashMatch == (int*)NULL)
  {
    fprintf(stderr, "ERROR: Could not allocate memory.\n");
    return 1;
  }
  hashMatchAll = (int*)calloc(iters, sizeof(int));
  if (hashMatchAll == (int*)NULL)
  {
    fprintf(stderr, "ERROR: Could not allocate memory.\n");
    return 1;
  }
  
  /* Construct a Cartesian topology, adjust lattice sizes where needed */
  MPI_Dims_create(mpisize, 4, latDist);
  
  for (i = 0; i < 4; ++i)
  {
    int div = (i == 3 ? (2 * L) : L) / latDist[i];
    locSizes[i] = div ? div : 1;
    localVol *= locSizes[i];
    latSizes[i] = locSizes[i] * latDist[i];
  }
  latVol = mpisize * localVol;
  ldsize = localVol * 72 * sizeof(double);
  fsize = (unsigned long long int)latVol * 72 * sizeof(double);
 
  MPI_Cart_create(MPI_COMM_WORLD, 4, latDist, periods, 1, &cartesian);
  MPI_Comm_rank(cartesian, &rank);
  
  if (rank == 0)
  {
    fprintf(stdout, "Benchmark on a block of data %s in size,\n", humanForm(fsize));
    fprintf(stdout, "representing a %u x %u x %u x %u lattice", latSizes[0], latSizes[1], latSizes[2], latSizes[3]);
    if (mpisize == 1)
      fprintf(stdout, ".\n\n");
    else
    {
      fprintf(stdout, ",\ndistributed over %u MPI processes\n", mpisize);
      fprintf(stdout, "for a local %u x %u x %u x %u lattice.\n\n", locSizes[0], locSizes[1], locSizes[2], locSizes[3]);
    }
  }

  /* Allocate a block of memory for dummy data to write */
  data = (double*)malloc(ldsize);
  if (data == (double*)NULL)
  {
    fprintf(stderr, "ERROR: Could not allocate memory.\n");
    return 1;
  }
  srand(time(NULL) + rank);

  /* Start of test */
  for (i = 0; i < iters; ++i)
  {
    if (rank == 0)
      fprintf(stdout, "Measurement %d of %d.\n", i + 1, iters);
    /* Create a block of dummy data to write out 
       Fill with some random numbers to make sure we don't get coincidental matches here */
     for (j = 0; j < (localVol * 72); ++j)
	   data[j] = rscale * (double)rand();

    /* Calculate a hash of the data, to check integrity against */
    md5_init(&state);
    md5_append(&state, (md5_byte_t const *)data, ldsize);
    md5_finish(&state, before);
    
    /* Note that the following is the only (?) way to truncate the file with MPI */
    MPI_File_open(cartesian, "benchmark.test", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp);
    MPI_File_set_size(fp, 0);
    w = lemonCreateWriter(&fp, cartesian);

    h = lemonCreateHeader(MB_flag, ME_flag, "benchmark", latVol);
    status = lemonWriteRecordHeader(h, w);

    lemonDestroyHeader(h);

    MPI_Barrier(cartesian);
    tick = MPI_Wtime();
    lemonWriteLatticeParallel(w, data, 72 * sizeof(double), latSizes);
    tock = MPI_Wtime();
    MPI_Barrier(cartesian);
    timesWrite[i] = tock - tick;
    if (rank == 0)
      fprintf(stdout, "Time spent writing was %4.2g s.\n", timesWrite[i]);

    lemonWriterCloseRecord(w);
    lemonDestroyWriter(w);
    MPI_File_close(&fp);

    /* Clear data to avoid an utterly failed read giving md5 hash matches from the old data */
     memset(data, 0, ldsize);

    /* Start of reading test */
    MPI_File_open(cartesian, "benchmark.test", MPI_MODE_RDONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fp);
    r = lemonCreateReader(&fp, cartesian);

    if (lemonReaderNextRecord(r))
      fprintf(stderr, "Node %d reports: next record failed.\n", rank);

    type = lemonReaderType(r);
    if (strncmp(type, "benchmark", 13))
      fprintf(stderr, "Node %d reports: wrong type read.\n", rank);

    MPI_Barrier(cartesian);
    tick = MPI_Wtime();
    lemonReadLatticeParallel(r, data, 72 * sizeof(double), latSizes);
    tock = MPI_Wtime();
    timesRead[i] = tock - tick;
    MPI_Barrier(cartesian);
    if (rank == 0)
      fprintf(stdout, "Time spent reading was %4.2g s.\n", timesRead[i]);

    lemonDestroyReader(r);
    MPI_File_close(&fp);

    md5_init(&state);
    md5_append(&state, (md5_byte_t const *)data, ldsize);
    md5_finish(&state, after);

    hashMatch[i] = strncmp((char const *)before, (char const *)after, 16) != 0 ? 1 : 0;
    MPI_Reduce(hashMatch + i, hashMatchAll + i, 1, MPI_INT, MPI_SUM, 0, cartesian);
    if (rank == 0)
    {
      if (hashMatchAll[i] == 0)
        fprintf(stdout, "All nodes report that MD5 hash matches.\n\n");
      else
        fprintf(stdout, "WARNING: MD5 hash failure detected!\n\n");
    }
  }

  /* Aggregate the data */
  hashMatch[0] = 0;
  stdWrite = timesWrite[0] * timesWrite[0];
  stdRead = timesRead[0] * timesRead[0];
  for (i = 1; i < iters; ++i)
  {
    hashMatchAll[0] += hashMatchAll[i];
    timesWrite[0] += timesWrite[i];
    stdWrite += timesWrite[i] * timesWrite[i];
    timesRead[0] += timesRead[i];
    stdRead += timesRead[i] * timesRead[i];
  }
  stdWrite /= iters;
  stdRead /= iters;
  timesWrite[0] /= iters;
  timesRead[0] /= iters;

  stdWrite -= timesWrite[0] * timesWrite[0];
  stdRead -= timesRead[0] * timesRead[0];
  
  if (rank == 0)
  {
    fprintf(stdout, "Average time spent writing was %4.2e s, ", timesWrite[0]);
    fprintf(stdout, "with a standard deviation of %4.2e s.\n", sqrt(stdWrite));
    fprintf(stdout, "Average time spent reading was %4.2e s, ", timesRead[0]);
    fprintf(stdout, "with a standard deviation of %4.2e s.\n\n", sqrt(stdRead));
    
    stdWrite *= (double)fsize / (timesWrite[0] * timesWrite[0]);
    stdRead *= (double)fsize / (timesRead[0] * timesRead[0]);
    fprintf(stdout, "Average writing speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesWrite[0])));
    fprintf(stdout, "Average reading speed was %s/s\n", humanForm((unsigned long long int)(fsize / timesRead[0])));

    if (hashMatchAll[0] == 0)
      fprintf(stdout, "All data hashed correctly.\n");
    else
      fprintf(stdout, "WARNING: %d hash mismatches detected!.\n", hashMatchAll[0]);
  }

  MPI_Finalize();

  free(data);
  free(timesWrite);
  free(timesRead);
  free(hashMatch);
  free(hashMatchAll);
  
  return(0);
}