Beispiel #1
0
int schwartz_screening(PFock_t pfock, BasisSet_t basis)
{
    int myrank;
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    // create shell pairs values    
    ERD_t erd;
    int nthreads = omp_get_max_threads();
    CInt_createERD(basis, &erd, nthreads);  
    int nshells = pfock->nshells;
    
    // create global arrays for screening 
    int dims[2];
    int nprow = pfock->nprow;
    int npcol = pfock->npcol;
    int block[2];
    int map[nprow + npcol];
    for (int i = 0; i < nprow; i++) {
        map[i] = pfock->rowptr_sh[i];
    }   
    for (int i = 0; i < npcol; i++) {
        map[i + nprow] = pfock->colptr_sh[i];
    }
    block[0] = nprow;
    block[1] = npcol;           
    dims[0] = nshells;
    dims[1] = nshells;
#if defined(USE_ELEMENTAL)
    ElGlobalArraysCreateIrreg_d( eldga, 2, dims, "array Screening", block, map, &pfock->ga_screening);
#else    
    pfock->ga_screening =
        NGA_Create_irreg(C_DBL, 2, dims, "array Screening", block, map);
    if (0 == pfock->ga_screening) {
        return -1;
    }
#endif

    // compute the max shell value
    double *sq_values = (double *)PFOCK_MALLOC(sizeof(double) * 
        pfock->nshells_row * pfock->nshells_col);
    if (NULL == sq_values) {
        return -1;
    }
    int startM = pfock->sshell_row;
    int startN = pfock->sshell_col;
    int endM = pfock->eshell_row;
    int endN = pfock->eshell_col;
    double maxtmp = 0.0;
    #pragma omp parallel
    {
        int tid = omp_get_thread_num();
        #pragma omp for reduction(max:maxtmp)
        for (int M = startM; M <= endM; M++) {
            int dimM = CInt_getShellDim(basis, M);
            for (int N = startN; N <= endN; N++) {
                int dimN = CInt_getShellDim(basis, N);
                int nints;
                double *integrals;
                CInt_computeShellQuartet(basis, erd, tid, M, N, M, N,
                                         &integrals, &nints);            
                double maxvalue = 0.0;
                if (nints != 0) {
                    for (int iM = 0; iM < dimM; iM++) {
                        for (int iN = 0; iN < dimN; iN++) {
                            int index = 
                                iM * (dimN*dimM*dimN+dimN) + iN * (dimM*dimN+1);
                            if (maxvalue < fabs(integrals[index])) {
                                maxvalue = fabs(integrals[index]);                    
                            }
                        }
                    }
                }
                sq_values[(M - startM) * (endN - startN + 1)  + (N - startN)] 
                    = maxvalue;
                if (maxvalue > maxtmp) {
                    maxtmp = maxvalue;
                }
            }
        }
    }
    int lo[2];
    int hi[2];

    lo[0] = startM;
    lo[1] = startN;
    hi[0] = endM;
    hi[1] = endN;
    int ld = endN - startN + 1;
#if defined(USE_ELEMENTAL)
    ElGlobalArraysPut_d( eldga, pfock->ga_screening, lo, hi, 
                         sq_values, &ld );
#else 
    NGA_Put(pfock->ga_screening, lo, hi, sq_values, &ld);
#endif
    // max value
    MPI_Allreduce(&maxtmp, &(pfock->maxvalue), 1,
                  MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    CInt_destroyERD(erd);
    PFOCK_FREE(sq_values);

    // init shellptr
    sq_values = (double *)PFOCK_MALLOC(sizeof(double) * nshells);
    if (NULL == sq_values) {
        return -1;
    }
    int nnz = 0;
    double eta = pfock->tolscr2 / pfock->maxvalue;
    pfock->shellptr = (int *)PFOCK_MALLOC(sizeof(int) * (nshells + 1));
    pfock->mem_cpu += 1.0 * sizeof(int) * (nshells + 1);
    if (NULL == pfock->shellptr) {
        return -1;
    }
    memset(pfock->shellptr, 0, sizeof(int) * (nshells + 1));
    for (int M = 0; M < nshells; M++) {
        pfock->shellptr[M] = nnz;
        lo[0] = M;
        hi[0] = M;
        lo[1] = 0;
        hi[1] = nshells - 1;
        ld = nshells;
#if defined(USE_ELEMENTAL)
        ElGlobalArraysGet_d( eldga, pfock->ga_screening, lo, hi, 
                             sq_values, &ld );
#else 
        NGA_Get(pfock->ga_screening, lo, hi, sq_values, &ld);
#endif
        for (int N = 0; N < nshells; N++) {
            double maxvalue = sq_values[N];
            if (maxvalue > eta) {
                if (M > N && (M + N) % 2 == 1 || M < N && (M + N) % 2 == 0) {
                    continue;
                } else {
                    nnz++;
                }
            }
        }
        pfock->shellptr[M + 1] = nnz;
    }
    pfock->nnz = nnz;
    
    double maxvalue;  
    pfock->shellvalue  = (double *)PFOCK_MALLOC(sizeof(double) * nnz);
    pfock->shellid  = (int *)PFOCK_MALLOC(sizeof(int) * nnz);
    pfock->shellrid  = (int *)PFOCK_MALLOC(sizeof(int) * nnz);
    pfock->mem_cpu += 1.0 * sizeof(double) * nnz + 2.0 * sizeof(int) * nnz;
    nshells = pfock->nshells;
    if (pfock->shellvalue == NULL ||
        pfock->shellid == NULL ||
        pfock->shellrid == NULL) {
        return -1;
    }    
    nnz = 0;
    for (int A = 0; A < nshells; A++) {
        pfock->shellptr[A] = nnz;
        lo[0] = A;
        hi[0] = A;
        lo[1] = 0;
        hi[1] = nshells - 1;
        ld = nshells;
        #if defined(USE_ELEMENTAL)
        ElGlobalArraysGet_d( eldga, pfock->ga_screening, lo, hi, 
                             sq_values, &ld );
        #else 
        NGA_Get(pfock->ga_screening, lo, hi, sq_values, &ld);
        #endif
        for (int B = 0; B < nshells; B++) {
            maxvalue = sq_values[B];
            if (maxvalue > eta) {
                if (A > B && (A + B) % 2 == 1 || A < B && (A + B) % 2 == 0)
                    continue;
                if (A == B) {
                    pfock->shellvalue[nnz] = maxvalue;                       
                } else {
                    pfock->shellvalue[nnz] = -maxvalue;
                }
                pfock->shellid[nnz] = B;
                pfock->shellrid[nnz] = A;
                nnz++;
            }
        }
    }
    PFOCK_FREE(sq_values);
#if defined(USE_ELEMENTAL)
    ElGlobalArraysDestroy_d( eldga, pfock->ga_screening );
#else
    GA_Destroy(pfock->ga_screening);
#endif
    return 0;
}
Beispiel #2
0
int main (int argc, char **argv)
{
  if (argc != 4) {
    printf ("Usage: %s <basisset> <xyz>\n", argv[0]);
    return -1;
  }
  
  const uint64_t freq = get_cpu_frequency();
  const int nthreads = atoi(argv[3]);
  /*
#ifdef _OPENMP
  omp_set_num_threads(nthreads);
#else
  assert(nthreads == 1);
#endif
  */
  // load basis set
  BasisSet_t basis;
  CInt_createBasisSet(&basis);
  CInt_loadBasisSet(basis, argv[1], argv[2]);
  
  printf("Molecule info:\n");
  printf("  #Atoms\t= %d\n", CInt_getNumAtoms(basis));
  printf("  #Shells\t= %d\n", CInt_getNumShells(basis));
  printf("  #Funcs\t= %d\n", CInt_getNumFuncs(basis));
  printf("  #OccOrb\t= %d\n", CInt_getNumOccOrb(basis));

  ERD_t erd;
  CInt_createERD(basis, &erd, nthreads);

  printf("Computing Lazy Evaluation Cholesky of ERIs\n");
  // reset profiler
  //  erd_reset_profile();

  int n = CInt_getNumFuncs(basis);
  int n2 = n * n;
  int n3 = n2 * n;
  int n4 = n3 * n;
  
  double* G_ERI;
  double tol = 1e-6;
  int max_rank = n2;
  //int max_rank = (1-floor(log10(tol)))*n;
  int rank;
  const uint64_t start_clock = __rdtsc();
  cholERI(basis, erd, &G_ERI, tol, max_rank, &rank);
  const uint64_t end_clock = __rdtsc();
  const uint64_t total_ticks = end_clock - start_clock;
  const double timepass = ((double) total_ticks) / freq;
  printf("Done\n");
  printf("Total GigaTicks: %.3lf, freq = %.3lf GHz\n", (double) (total_ticks) * 1.0e-9, (double)freq/1.0e9);
  printf("Total time: %.4lf secs\n", timepass);

  printf("n: %d, rank: %d, 7n: %d, n2: %d\n",n,rank,7*n,n2);
  double* diag = (double*) malloc(n2*sizeof(double));
  computeDiag(basis, erd, diag);
  for (int i = 0; i < n2; i++) {
    double aii = 0;
    for (int j = 0; j < rank; j++) {
      aii += G_ERI[i+j*n2] * G_ERI[i+j*n2];
    }
    double abserror2 = (diag[i] - aii)*(diag[i] - aii);
    if (abserror2 > tol)
      printf("i=%d, truth=%1.2e, approx=%1.2e, error: %1.2e\n", i, diag[i], aii, abserror2);
  }
  free(diag);

  printf("Testing accuracy for each shell quartet\n");
  double chol_time_total = 0;
  double CInt_time_total = 0;
  int nshell = CInt_getNumShells(basis);
  int shellIndexM, shellIndexN, shellIndexP, shellIndexQ;
  int correct = 1;
  for (shellIndexM = 0; shellIndexM < nshell; shellIndexM++) {
    for (shellIndexN = 0; shellIndexN < nshell; shellIndexN++) {
      for (shellIndexP = 0; shellIndexP < nshell; shellIndexP++) {
	for (shellIndexQ = 0; shellIndexQ < nshell; shellIndexQ++) {
	  int dimM = CInt_getShellDim (basis, shellIndexM);
	  int dimN = CInt_getShellDim (basis, shellIndexN);
	  int dimP = CInt_getShellDim (basis, shellIndexP);
	  int dimQ = CInt_getShellDim (basis, shellIndexQ);

	  // Compute shell with Cholesky
	  double *cholintegrals;
	  int cholnints;
	  const uint64_t chol_start = __rdtsc();
	  cholComputeShellQuartet(basis, G_ERI, rank, shellIndexM, shellIndexN, shellIndexP, shellIndexQ, &cholintegrals, &cholnints);
	  const uint64_t chol_end = __rdtsc();
	  chol_time_total += ((double) chol_end - chol_start) / freq;
	  
	  // Compute the same shell quartet with CInt
	  double *integrals;
	  int nints;
	  const uint64_t CInt_start = __rdtsc();
	  CInt_computeShellQuartet(basis, erd, 0, shellIndexM, shellIndexN, shellIndexP, shellIndexQ, &integrals, &nints);
	  const uint64_t CInt_end = __rdtsc();
	  CInt_time_total += ((double) CInt_end - CInt_start) / freq;
	  
	  // Compare each integral individually
	  for (int iM = 0; iM < dimM; iM++) {
	    for (int iN = 0; iN < dimN; iN++) {
	      for (int iP = 0; iP < dimP; iP++) {
		for (int iQ = 0; iQ < dimQ; iQ++) {
		  int idx = iM + dimM * (iN + dimN * (iP + dimP *(iQ)));
		  double abserror2 = (integrals[idx] - cholintegrals[idx]);
		  abserror2 = abserror2*abserror2;
		  if (abserror2 > tol) {
		    correct = 0;
		    printf("Integral does not satisfy error tolerance: error = %1.2e\n",abserror2);
		  }
		}
	      }
	    }
	  }
	  free(cholintegrals);
	}
      }
    }
  }
  if (correct) {
    printf("All integrals in all shell quartets satisfy error tolerance\n");
  } else {
    printf("Some integrals did not satisfy error tolerance\n");
  }
  printf("Total time to eval shells from Cholesky factor: %.4lf secs\n", chol_time_total);
  printf("Total time to eval shells with CInt: %.4lf secs\n", CInt_time_total);
  printf("Total time to compute Cholesky factor and eval shells from Cholesky factor: %.4lf secs\n", timepass+chol_time_total);

  printf("Computing Structured Lazy Evaluation Cholesky of ERIs\n");
  double* G_structERI;
  max_rank = (n*(n+1))/2;
  const uint64_t struct_start_clock = __rdtsc();
  structcholERI(basis, erd, &G_structERI, tol, max_rank, &rank);
  const uint64_t struct_end_clock = __rdtsc();
  const uint64_t struct_total_ticks = struct_end_clock - struct_start_clock;
  const double struct_timepass = ((double) struct_total_ticks) / freq;
  printf("Done\n");
  printf("Total GigaTicks: %.3lf, freq = %.3lf GHz\n", (double) (struct_total_ticks) * 1.0e-9, (double)freq/1.0e9);
  printf("Total time: %.4lf secs\n", struct_timepass);
  
  
  double* structdiag = (double*) malloc(max_rank*sizeof(double));
  computeStructDiag(basis, erd, structdiag);
  for (int i = 0; i < max_rank; i++) {
    double aii = 0;
    for (int j = 0; j < rank; j++) {
      aii += G_structERI[i+j*max_rank] * G_structERI[i+j*max_rank];
    }
    double abserror2 = (structdiag[i] - aii)*(structdiag[i] - aii);
    if (abserror2 > tol)
      printf("i=%d, truth=%1.2e, approx=%1.2e, error: %1.2e\n", i, structdiag[i], aii, abserror2);
  }
  free(structdiag);
  printf("Testing accuracy for each shell quartet\n");
  double struct_chol_time_total = 0;
  CInt_time_total = 0;
  correct = 1;
  for (shellIndexM = 0; shellIndexM < nshell; shellIndexM++) {
    for (shellIndexN = 0; shellIndexN < nshell; shellIndexN++) {
      for (shellIndexP = 0; shellIndexP < nshell; shellIndexP++) {
	for (shellIndexQ = 0; shellIndexQ < nshell; shellIndexQ++) {
	  int dimM = CInt_getShellDim (basis, shellIndexM);
	  int dimN = CInt_getShellDim (basis, shellIndexN);
	  int dimP = CInt_getShellDim (basis, shellIndexP);
	  int dimQ = CInt_getShellDim (basis, shellIndexQ);

	  // Compute shell with structured Cholesky
	  double *cholintegrals;
	  int cholnints;
	  const uint64_t struct_chol_start = __rdtsc();
	  structcholComputeShellQuartet(basis, G_structERI, rank, shellIndexM, shellIndexN, shellIndexP, shellIndexQ, &cholintegrals, &cholnints);
	  const uint64_t struct_chol_end = __rdtsc();
	  struct_chol_time_total += ((double) struct_chol_end - struct_chol_start) / freq;
	  
	  // Compute the same shell quartet with CInt  
	  double *integrals;
	  int nints;
	  const uint64_t CInt_start = __rdtsc();
	  CInt_computeShellQuartet(basis, erd, 0, shellIndexM, shellIndexN, shellIndexP, shellIndexQ, &integrals, &nints);
	  const uint64_t CInt_end = __rdtsc();
	  CInt_time_total += ((double) CInt_end - CInt_start) / freq;
	  
	  // Compare each integral individually
	  for (int iM = 0; iM < dimM; iM++) {
	    for (int iN = 0; iN < dimN; iN++) {
	      for (int iP = 0; iP < dimP; iP++) {
		for (int iQ = 0; iQ < dimQ; iQ++) {
		  int idx = iM + dimM * (iN + dimN * (iP + dimP *(iQ)));
		  double abserror2 = (integrals[idx] - cholintegrals[idx]);
		  abserror2 = abserror2*abserror2;
		  if (abserror2 > tol) {
		    correct = 0;
		    printf("Integral does not satisfy error tolerance: error = %1.2e\n",abserror2);
		  }
		}
	      }
	    }
	  }
	  free(cholintegrals);
	}
      }
    }
  }
  if (correct) {
    printf("All integrals in all shell quartets satisfy error tolerance\n");
  } else {
    printf("Some integrals did not satisfy error tolerance\n");
  }
  printf("Total time to eval shells from structured Cholesky factor: %.4lf secs\n", struct_chol_time_total);
  printf("Total time to eval shells with CInt: %.4lf secs\n", CInt_time_total);
  printf("Total time to compute Cholesky factor and eval shells from Cholesky factor: %.4lf secs\n", struct_timepass+struct_chol_time_total);

  free(G_structERI);  
  free(G_ERI);
  return 0;
}