long long dom_decomp_3d(const long long Nx, const long long Ny, const long long Nz, const long long Np, long long *pNGx, long long *pNGy, long long *pNGz){ long long rx_min, rx_max, rx, ry, rz, I; long long rx0 = 1; long long ry0 = 1; long long rz0 = 1; long long I0 = 1; long long init=1; long long quotient; long long remainder; long long err, t, Npt; /* Constrain the decomposition */ rx_max = Nx < Np ? Nx : Np; /* Require ry >= 1, rz >= 1 and rx <= Nx */ /* Compute a global minimum constraint on rx. */ t = (Ny < Np ? Ny : Np)*(Nz < Np ? Nz : Np); /* t = Max(ry)*Max(rz) */ if(t < Np){ /* Require rx >= 1, ry <= Ny and rz <= Nz */ quotient = Np/t; remainder = Np%t; /* rx_min = the smallest integer >= Np/t */ rx_min = quotient + (remainder > 0 ? 1 : 0); } else rx_min = 1; /* printf("rx_min = %d, rx_max = %d\n",rx_min, rx_max); */ for(rx = rx_min; rx <= rx_max; rx++){ quotient = Np/rx; remainder = Np%rx; if(remainder == 0){ Npt = quotient; /* Np for transverse (y,z) decomposition */ err = dom_decomp_2d(Ny, Nz, Npt, &ry, &rz); if(err == 0){ /* Now compute the amount of messaging */ I = (rx - 1)*Ny*Nz + (ry - 1)*Nx*Nz + (rz - 1)*Nx*Ny; if(I < 0) continue; /* Integer Overflow */ if(init || I < I0){ rx0 = rx; ry0 = ry; rz0 = rz; I0 = I; init = 0; /* printf("I(rx = %d, ry = %d, rz = %d) = %d\n",rx,ry,rz,I); */ } } } } if(init) return 1; /* Error locating a solution */ *pNGx = rx0; *pNGy = ry0; *pNGz = rz0; return 0; }
static int dom_decomp_3d(const int Nx, const int Ny, const int Nz, const int Np, int *pNGx, int *pNGy, int *pNGz){ div_t dv; int rx_min, rx_max, rx, ry, rz, I; int rx0=1, ry0=1, rz0=1, I0=0, init=1; int ierr, t, Npt; /* Constrain the decomposition */ rx_max = Nx < Np ? Nx : Np; /* Require ry >= 1, rz >= 1 and rx <= Nx */ /* Compute a global minimum constraint on rx. */ t = (Ny < Np ? Ny : Np)*(Nz < Np ? Nz : Np); /* t = Max(ry)*Max(rz) */ if(t < Np){ /* Require rx >= 1, ry <= Ny and rz <= Nz */ dv = div(Np, t); /* rx_min = the smallest integer >= Np/t */ rx_min = dv.quot + (dv.rem > 0 ? 1 : 0); } else rx_min = 1; /* printf("rx_min = %d, rx_max = %d\n",rx_min, rx_max); */ for(rx = rx_min; rx <= rx_max; rx++){ dv = div(Np, rx); if(dv.rem == 0){ Npt = dv.quot; /* Np for transverse (y,z) decomposition */ ierr = dom_decomp_2d(Ny, Nz, Npt, &ry, &rz); if(ierr == 0){ /* Now compute the amount of messaging */ I = (rx - 1)*Ny*Nz + (ry - 1)*(Nx + 2*nghost*rx)*Nz + (rz - 1)*(Nx + 2*nghost*rx)*(Ny + 2*nghost*ry); if(I < 0){ /* Integer Overflow */ /* printf("[3d new] I = %d\n",I); */ continue; } if(init || I < I0){ rx0 = rx; ry0 = ry; rz0 = rz; I0 = I; init = 0; /* printf("I(rx = %d, ry = %d, rz = %d) = %d\n",rx,ry,rz,I); */ } } } } if(init) return 1; /* Error locating a solution */ *pNGx = rx0; *pNGy = ry0; *pNGz = rz0; return 0; }
static int dom_decomp(const int Nx, const int Ny, const int Nz, const int Np, int *pNGx, int *pNGy, int *pNGz) { if(Nx > 1 && Ny == 1 && Nz == 1){ /* 1-D */ if(Np > Nx) return 1; /* Too many procs. */ *pNGx = Np; *pNGy = 1; *pNGz = 1; return 0; } else if(Nx > 1 && Ny > 1 && Nz == 1){ /* 2-D */ *pNGz = 1; return dom_decomp_2d(Nx, Ny, Np, pNGx, pNGy); } else if(Nx > 1 && Ny > 1 && Nz > 1){ /* 3-D */ return dom_decomp_3d(Nx, Ny, Nz, Np, pNGx, pNGy, pNGz); } return 1; /* Error - particular case not expected */ }
long long main(void){ long long Nx=512, Ny=1024, Nz = 256, Np = 8, NGx, NGy, NGz, Np_max; long long rx,ry,rz,I; long long err1, err2; #if 0 for(Np = 1; Np <= 2048; Np++){ dom_decomp_2d(Nx, Ny, Np, &NGx, &NGy); printf("Nx = %d, Ny = %d, Np = %d, NGx = %d, NGy = %d\n", Nx,Ny,Np,NGx,NGy); printf("Grids measure: Nx/NGx = nx = %e, Ny/NGy = ny = %e\n\n", (double)Nx/(double)NGx, (double)Ny/(double)NGy); } #endif #if 1 for(Nx = 1; Nx <= 48; Nx++){ for(Ny = 1; Ny <= 48; Ny++){ for(Np = 1; Np <= Nx*Ny; Np++){ err1 = dom_decomp_2d(Nx, Ny, Np, &NGx, &NGy); err2 = dom_decomp_2d_serial(Nx, Ny, Np, &rx, &ry); if(err1 == 0 && err2 == 0){ if(rx != NGx || ry != NGy){ printf("Nx = %d, Ny = %d, Np = %d\n",Nx,Ny,Np); printf("[std alg]: NGx = %d, NGy = %d\n",NGx,NGy); printf("[linear alg]: NGx = %d, NGy = %d\n",rx,ry); } else if(NGx > Nx || NGy > Ny){ printf("Error: Nx = %d, Ny = %d, Np = %d, NGx = %d, NGy = %d\n", Nx,Ny,Np,NGx,NGy); } /* else printf("Nx=%d, Ny=%d, Np=%d, NGx=%d, NGy=%d\n", Nx,Ny,Np,NGx,NGy); */ } else if(err1 + err2 == 1) printf("err1 = %d, err2 = %d\n",err1, err2); } } } #endif #if 0 for(Nx = 1; Nx <= 48; Nx++){ for(Ny = 1; Ny <= 48; Ny++){ for(Nz = 1; Nz <= 48; Nz++){ Np_max = Nx*Ny*Nz; Np_max = Np_max < 100000 ? Np_max : 100000; for(Np = 1; Np <= Np_max; Np++){ err1 = dom_decomp_3d(Nx, Ny, Nz, Np, &NGx, &NGy, &NGz); err2 = dom_decomp_3d_serial(Nx, Ny, Nz, Np, &rx, &ry, &rz); if(err1 == 0 && err2 == 0){ if(rx != NGx || ry != NGy || rz != NGz){ printf("Nx = %d, Ny = %d, Nz = %d, Np = %d\n",Nx,Ny,Nz,Np); printf("[std alg]: NGx = %d, NGy = %d, NGz = %d\n",NGx,NGy,NGz); printf("[linear alg]: NGx = %d, NGy = %d, NGz = %d\n",rx,ry,rz); I = (NGx - 1)*Ny*Nz + (NGy - 1)*Nx*Nz + (NGz - 1)*Nx*Ny; printf("[std alg]: I = %d\n",I); I = (rx - 1)*Ny*Nz + (ry - 1)*Nx*Nz + (rz - 1)*Nx*Ny; printf("[linear alg]: I = %d\n",I); } else if(NGx > Nx || NGy > Ny || NGz > Nz){ printf("Error: Nx = %d, Ny = %d, Np = %d, NGx = %d, NGy = %d\n", Nx,Ny,Np,NGx,NGy); } /* else printf("Nx=%d, Ny=%d, Nz=%d, Np=%d, NGx=%d, NGy=%d, NGz=%d\n", Nx,Ny,Np,NGx,NGy); */ } else if(err1 + err2 == 1) printf("err1 = %d, err2 = %d\n",err1, err2); } } } } #endif return 0; }