long double sum_to_all(long double in) { long double out = in; #ifdef HAVE_MPI if (MPI_LONG_DOUBLE == MPI_DATATYPE_NULL) out = sum_to_all(double(in)); else MPI_Allreduce(&in,&out,1,MPI_LONG_DOUBLE,MPI_SUM,mycomm); #endif return out; }
double *dft_ldos::ldos() const { // we try to get the overall scale factor right (at least for a point source) // so that we can compare against the analytical formula for testing // ... in most practical cases, the scale factor won't matter because // the user will compute the relative LDOS of 2 cases (e.g. LDOS/vacuum) // overall scale factor double Jsum_all = sum_to_all(Jsum); double scale = 4.0/pi // from definition of LDOS comparison to power * -0.5 // power = -1/2 Re[E* J] / (Jsum_all * Jsum_all); // normalize to unit-integral current double *sum = new double[Nomega]; for (int i = 0; i < Nomega; ++i) /* 4/pi * work done by unit dipole */ sum[i] = scale * real(Fdft[i] * conj(Jdft[i])) / abs2(Jdft[i]); double *out = new double[Nomega]; sum_to_all(sum, out, Nomega); delete[] sum; return out; }
complex<long double> sum_to_all(complex<long double> in) { complex<long double> out = in; #ifdef HAVE_MPI if (MPI_LONG_DOUBLE == MPI_DATATYPE_NULL) { complex<double> dout; dout = sum_to_all(complex<double>(double(in.real()), double(in.imag()))); out = complex<long double>(dout.real(), dout.imag()); } else MPI_Allreduce(&in,&out,2,MPI_LONG_DOUBLE,MPI_SUM,mycomm); #endif return out; }
static double norm2(size_t n, const realnum *x) { // note: we don't just do sqrt(dot(n, x, x)) in order to avoid overflow size_t i; double xmax = 0, scale; long double sum = 0; for (i = 0; i < n; ++i) { double xabs = fabs(x[i]); if (xabs > xmax) xmax = xabs; } xmax = max_to_all(xmax); if (xmax == 0) return 0; scale = 1.0 / xmax; for (i = 0; i < n; ++i) { double xs = scale * x[i]; sum += xs * xs; } return xmax * sqrt(sum_to_all(sum)); }
int main(int argc, char* argv[]) { int c, i, mype, num_pes, tests, passed; char *pgm; shmem_init(); mype = shmem_my_pe(); num_pes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while((c=getopt(argc,argv,"ampsSoxhv")) != -1) { switch(c) { case 'a': And++; // do not run and_to_all break; case 'm': Min++; // do not run min_to_all break; case 'o': Or++; // do not run or_to_all break; case 'p': Prod++; // do not run prod_to_all break; case 's': Sum++; // do not run sum_to_all break; case 'x': Xor++; // do not run xor_to_all break; case 'S': Serialize++; break; case 'v': Verbose++; break; case 'h': default: Rfprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); shmem_finalize(); return 1; } } for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { pSync[i] = SHMEM_SYNC_VALUE; pSync1[i] = SHMEM_SYNC_VALUE; } tests = passed = 0; shmem_barrier_all(); passed += max_to_all(mype, num_pes); tests++; if (!Min) { passed += min_to_all(mype, num_pes); tests++; } if (!Sum) { passed += sum_to_all(mype, num_pes); tests++; } if (!And) { passed += and_to_all(mype, num_pes); tests++; } if (!Prod) { passed += prod_to_all(mype, num_pes); tests++; } if (!Or) { passed += or_to_all(mype, num_pes); tests++; } if (!Xor) { passed += xor_to_all(mype, num_pes); tests++; } c = 0; if (mype == 0) { if ((Verbose || tests != passed)) fprintf(stderr,"to_all[%d] %d of %d tests passed\n", mype,passed,tests); c = (tests == passed ? 0 : 1); } shmem_finalize(); return c; }
void sum_to_all(const complex<float> *in, complex<double> *out, int size) { sum_to_all((const float*) in, (double*) out, 2*size); }
void sum_to_all(const float *in, double *out, int size) { double *in2 = new double[size]; for (int i = 0; i < size; ++i) in2[i] = in[i]; sum_to_all(in2, out, size); delete[] in2; }
complex<double> *dft_ldos::J() const { complex<double> *out = new complex<double>[Nomega]; sum_to_all(Jdft, out, Nomega); return out; }
static double dot(size_t n, const realnum *x, const realnum *y) { double sum = 0; for (size_t i = 0; i < n; ++i) sum += x[i] * y[i]; return sum_to_all(sum); }