void Zoltan_RIB_reduce_double(double *in, double *out, int len, MPI_Comm comm, int nproc, int rank, int proc, int n) { int i, m, to, tag = 32107; MPI_Status status; /* this is a recursive function for Tflops_Special that takes a double vector in and returns the summed vector out for a subset of processors of the entire number of processors. rank is a processors rank within its partition of size nproc while proc is the rank of the processor with the entire number of processors being used. */ m = 2*n; if (rank%m) { to = proc - n; MPI_Send(in, len, MPI_DOUBLE, to, tag, comm); MPI_Recv(out, len, MPI_DOUBLE, to, tag, comm, &status); } else if (rank + n < nproc) { to = proc + n; MPI_Recv(out, len, MPI_DOUBLE, to, tag, comm, &status); for (i = 0; i < len; i++) in[i] += out[i]; if (m < nproc) Zoltan_RIB_reduce_double(in, out, len, comm, nproc, rank, proc, m); else for (i = 0; i < len; i++) out[i] = in[i]; MPI_Send(out, len, MPI_DOUBLE, to, tag, comm); } else Zoltan_RIB_reduce_double(in, out, len, comm, nproc, rank, proc, m); }
/* * Calculate a 3x3 inertial matrix representing the * locations of the 3-dimensional coordinates. */ static void inertial_matrix3D(ZZ *zstruct, double *X, int num_obj, double *cm, double (*im)[3]) { double tmp1[6], tmp2[6]; double xx, yy, zz, xy, xz, yz; double xdif, ydif, zdif; int j, rank; double cmt[3]; double xxt, yyt, zzt, xyt, xzt, yzt; double *c, num_coords, total_coords; MPI_Comm comm = zstruct->Communicator; int proc = zstruct->Proc; int nproc = zstruct->Num_Proc; int proclower = 0; num_coords = (double)num_obj; /* Much of this was taken from Zoltan_RIB_inertial3d() */ cm[0] = cm[1] = cm[2] = 0.0; for (j = 0, c = X; j < num_obj; j++, c += 3) { cm[0] += c[0]; cm[1] += c[1]; cm[2] += c[2]; } if (zstruct->Tflops_Special) { rank = proc - proclower; Zoltan_RIB_reduce_double(cm, cmt, 3, comm, nproc, rank, proc, 1); Zoltan_RIB_reduce_double(&num_coords, &total_coords, 1, comm, nproc, rank, proc, 1); } else { MPI_Allreduce(cm,cmt,3,MPI_DOUBLE,MPI_SUM,comm); MPI_Allreduce(&num_coords,&total_coords,1,MPI_DOUBLE,MPI_SUM,comm); } /* Global center of mass */ cm[0] = cmt[0]/total_coords; cm[1] = cmt[1]/total_coords; cm[2] = cmt[2]/total_coords; xx = yy = zz = xy = xz = yz = 0.0; for (j = 0, c = X; j < num_obj; j++, c += 3) { xdif = c[0] - cm[0]; ydif = c[1] - cm[1]; zdif = c[2] - cm[2]; xx += xdif*xdif; yy += ydif*ydif; zz += zdif*zdif; xy += xdif*ydif; xz += xdif*zdif; yz += ydif*zdif; } if (zstruct->Tflops_Special) { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = zz; tmp1[3] = xy; tmp1[4] = xz; tmp1[5] = yz; Zoltan_RIB_reduce_double(tmp1, tmp2, 6, comm, nproc, rank, proc, 1); xxt = tmp2[0]; yyt = tmp2[1]; zzt = tmp2[2]; xyt = tmp2[3]; xzt = tmp2[4]; yzt = tmp2[5]; } else { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = zz; tmp1[3] = xy; tmp1[4] = xz; tmp1[5] = yz; MPI_Allreduce(tmp1, tmp2, 6, MPI_DOUBLE, MPI_SUM, comm); xxt = tmp2[0]; yyt = tmp2[1]; zzt = tmp2[2]; xyt = tmp2[3]; xzt = tmp2[4]; yzt = tmp2[5]; } /* Global inertial tensor matrix */ im[0][0] = xxt; im[1][1] = yyt; im[2][2] = zzt; im[0][1] = im[1][0] = xyt; im[0][2] = im[2][0] = xzt; im[1][2] = im[2][1] = yzt; }
/* * Calculate a 2x2 inertial matrix representing the * locations of the 2-dimensional coordinates. */ static void inertial_matrix2D(ZZ *zstruct, double *X, int num_obj, double *cm, double (*im)[3]) { double tmp1[3], tmp2[3]; double xx, yy, xy; double xdif, ydif; int j, rank; double cmt[2]; double xxt, yyt, xyt; double *c, num_coords, total_coords; MPI_Comm comm = zstruct->Communicator; int proc = zstruct->Proc; int nproc = zstruct->Num_Proc; int proclower = 0; num_coords = (double)num_obj; cm[0] = cm[1] = 0.0; for (j = 0, c = X; j < num_obj; j++, c += 2) { cm[0] += c[0]; cm[1] += c[1]; } if (zstruct->Tflops_Special) { rank = proc - proclower; Zoltan_RIB_reduce_double(cm, cmt, 2, comm, nproc, rank, proc, 1); Zoltan_RIB_reduce_double(&num_coords, &total_coords, 1, comm, nproc, rank, proc, 1); } else { MPI_Allreduce(cm,cmt,2,MPI_DOUBLE,MPI_SUM,comm); MPI_Allreduce(&num_coords,&total_coords,1,MPI_DOUBLE,MPI_SUM,comm); } /* Global center of mass */ cm[0] = cmt[0]/total_coords; cm[1] = cmt[1]/total_coords; xx = yy = xy = 0.0; for (j = 0, c = X; j < num_obj; j++, c += 2) { xdif = c[0] - cm[0]; ydif = c[1] - cm[1]; xx += xdif*xdif; yy += ydif*ydif; xy += xdif*ydif; } if (zstruct->Tflops_Special) { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = xy; Zoltan_RIB_reduce_double(tmp1, tmp2, 3, comm, nproc, rank, proc, 1); xxt = tmp2[0]; yyt = tmp2[1]; xyt = tmp2[2]; } else { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = xy; MPI_Allreduce(tmp1, tmp2, 3, MPI_DOUBLE, MPI_SUM, comm); xxt = tmp2[0]; yyt = tmp2[1]; xyt = tmp2[2]; } /* Global inertial tensor matrix */ im[0][0] = xxt; im[1][1] = yyt; im[0][1] = im[1][0] = xyt; }
int Zoltan_RIB_inertial3d( int Tflops_Special, /* Use Tflops_Special communication; should be 0 if called from serial_rib */ struct Dot_Struct *dotpt, /* graph data structure */ int *dindx, /* index array into dotpt; if NULL, access dotpt directly */ int dotnum, /* number of vtxs in graph */ int wgtflag, /* are vertex weights being used? */ double cm[3], /* center of mass in each direction */ double evec[3], /* eigenvector */ double *value, /* array for value to sort on */ MPI_Comm comm, /* communicator for partition */ int proc, /* Global proc number (Tflops_Special) */ int nproc, /* Number of procs in partition (Tflops_Special) */ int proclower /* Lowest numbered proc in partition (Tflops_Special)*/ ) { double tensor[3][3]; /* inertia tensor */ double tmp1[6], tmp2[6];/* temporary variables for MPI_Allreduce */ double xx, yy, zz; /* elements of inertial tensor */ double xy, xz, yz; /* elements of inertial tensor */ double xdif, ydif, zdif;/* deviation from center of mass */ double eval, res; /* eigenvalue and error in eval calculation */ double wgt_sum; /* sum of all the vertex weights */ int i, j; /* loop counter */ double cmt[3], wgtt; /* temp for center of mass */ double xxt, yyt, zzt; /* temp for inertial tensor */ double xyt, xzt, yzt; /* temp for inertial tensor */ int rank = 0; /* rank in partition (Tflops_Special) */ /* Compute center of mass and total mass. */ cm[0] = cm[1] = cm[2] = 0.0; if (wgtflag) { wgt_sum = 0; for (j = 0; j < dotnum; j++) { i = (dindx ? dindx[j] : j); wgt_sum += dotpt[i].Weight[0]; cm[0] += dotpt[i].Weight[0]*dotpt[i].X[0]; cm[1] += dotpt[i].Weight[0]*dotpt[i].X[1]; cm[2] += dotpt[i].Weight[0]*dotpt[i].X[2]; } } else { wgt_sum = dotnum; for (j = 0; j < dotnum; j++) { i = (dindx ? dindx[j] : j); cm[0] += dotpt[i].X[0]; cm[1] += dotpt[i].X[1]; cm[2] += dotpt[i].X[2]; } } /* Sum weights across processors */ if (Tflops_Special) { rank = proc - proclower; Zoltan_RIB_reduce_double(cm, cmt, 3, comm, nproc, rank, proc, 1); Zoltan_RIB_reduce_double(&wgt_sum, &wgtt, 1, comm, nproc, rank, proc, 1); } else { MPI_Allreduce(cm,cmt,3,MPI_DOUBLE,MPI_SUM,comm); MPI_Allreduce(&wgt_sum,&wgtt,1,MPI_DOUBLE,MPI_SUM,comm); } cm[0] = cmt[0]/wgtt; cm[1] = cmt[1]/wgtt; cm[2] = cmt[2]/wgtt; /* Generate 6 elements of Inertial tensor. */ xx = yy = zz = xy = xz = yz = 0.0; if (wgtflag) for (j = 0; j < dotnum; j++) { i = (dindx ? dindx[j] : j); xdif = dotpt[i].X[0] - cm[0]; ydif = dotpt[i].X[1] - cm[1]; zdif = dotpt[i].X[2] - cm[2]; xx += dotpt[i].Weight[0]*xdif*xdif; yy += dotpt[i].Weight[0]*ydif*ydif; zz += dotpt[i].Weight[0]*zdif*zdif; xy += dotpt[i].Weight[0]*xdif*ydif; xz += dotpt[i].Weight[0]*xdif*zdif; yz += dotpt[i].Weight[0]*ydif*zdif; } else for (j = 0; j < dotnum; j++) { i = (dindx ? dindx[j] : j); xdif = dotpt[i].X[0] - cm[0]; ydif = dotpt[i].X[1] - cm[1]; zdif = dotpt[i].X[2] - cm[2]; xx += xdif*xdif; yy += ydif*ydif; zz += zdif*zdif; xy += xdif*ydif; xz += xdif*zdif; yz += ydif*zdif; } /* Sum tensor across processors */ if (Tflops_Special) { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = zz; tmp1[3] = xy; tmp1[4] = xz; tmp1[5] = yz; Zoltan_RIB_reduce_double(tmp1, tmp2, 6, comm, nproc, rank, proc, 1); xxt = tmp2[0]; yyt = tmp2[1]; zzt = tmp2[2]; xyt = tmp2[3]; xzt = tmp2[4]; yzt = tmp2[5]; } else { tmp1[0] = xx; tmp1[1] = yy; tmp1[2] = zz; tmp1[3] = xy; tmp1[4] = xz; tmp1[5] = yz; MPI_Allreduce(tmp1, tmp2, 6, MPI_DOUBLE, MPI_SUM, comm); xxt = tmp2[0]; yyt = tmp2[1]; zzt = tmp2[2]; xyt = tmp2[3]; xzt = tmp2[4]; yzt = tmp2[5]; } /* Compute eigenvector with maximum eigenvalue. */ tensor[0][0] = xxt; tensor[1][1] = yyt; tensor[2][2] = zzt; tensor[0][1] = tensor[1][0] = xyt; tensor[0][2] = tensor[2][0] = xzt; tensor[1][2] = tensor[2][1] = yzt; evals3(tensor, &res, &res, &eval); eigenvec3(tensor, eval, evec, &res); /* Calculate value to sort/split on for each cell. */ /* This is inner product with eigenvector. */ for (j = 0; j < dotnum; j++) { i = (dindx ? dindx[j] : j); value[j] = (dotpt[i].X[0] - cm[0])*evec[0] + (dotpt[i].X[1] - cm[1])*evec[1] + (dotpt[i].X[2] - cm[2])*evec[2]; } return(ZOLTAN_OK); }