// This does the exchange of the gauge field ghost zone and places it // into the ghost array. // This should be optimized so it is reused if called multiple times void cpuGaugeField::exchangeGhost() const { void **send = (void**)malloc(sizeof(void*)*QUDA_MAX_DIM); for (int d=0; d<nDim; d++) { send[d] = malloc(nFace * surface[d] * reconstruct * precision); } // get the links into a contiguous buffer if (precision == QUDA_DOUBLE_PRECISION) { packGhost((double**)gauge, (double**)send, nFace, x, volumeCB, surfaceCB); } else { packGhost((float**)gauge, (float**)send, nFace, x, volumeCB, surfaceCB); } // communicate between nodes FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision); faceBuf.exchangeCpuLink(ghost, send); for (int i=0; i<4; i++) { double sum = 0.0; for (int j=0; j<nFace*surface[i]*reconstruct; j++) { sum += ((double*)(ghost[i]))[j]; } } for (int d=0; d<nDim; d++) free(send[d]); free(send); }
// this actually applies the preconditioned dslash, e.g., D_ee^{-1} D_eo or D_oo^{-1} D_oe void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param) { #ifndef MULTI_GPU if (precision == QUDA_DOUBLE_PRECISION) dslashReference((double*)out, (double**)gauge, (double*)in, oddBit, daggerBit); else dslashReference((float*)out, (float**)gauge, (float*)in, oddBit, daggerBit); #else GaugeFieldParam gauge_field_param(gauge, gauge_param); cpuGaugeField cpu(gauge_field_param); cpu.exchangeGhost(); void **ghostGauge = (void**)cpu.Ghost(); // Get spinor ghost fields // First wrap the input spinor into a ColorSpinorField ColorSpinorParam csParam; csParam.v = in; csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION; csParam.nColor = 3; csParam.nSpin = 4; csParam.nDim = 4; for (int d=0; d<4; d++) csParam.x[d] = Z[d]; csParam.precision = precision; csParam.pad = 0; csParam.siteSubset = QUDA_PARITY_SITE_SUBSET; csParam.x[0] /= 2; csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; csParam.create = QUDA_REFERENCE_FIELD_CREATE; cpuColorSpinorField inField(csParam); { // Now do the exchange QudaParity otherParity = QUDA_INVALID_PARITY; if (oddBit == QUDA_EVEN_PARITY) otherParity = QUDA_ODD_PARITY; else if (oddBit == QUDA_ODD_PARITY) otherParity = QUDA_EVEN_PARITY; else errorQuda("ERROR: full parity not supported in function %s", __FUNCTION__); int nFace = 1; FaceBuffer faceBuf(Z, 4, mySpinorSiteSize, nFace, precision); faceBuf.exchangeCpuSpinor(inField, otherParity, daggerBit); } void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer; void** back_nbr_spinor = inField.backGhostFaceBuffer; if (precision == QUDA_DOUBLE_PRECISION) { dslashReference((double*)out, (double**)gauge, (double**)ghostGauge, (double*)in, (double**)fwd_nbr_spinor, (double**)back_nbr_spinor, oddBit, daggerBit); } else{ dslashReference((float*)out, (float**)gauge, (float**)ghostGauge, (float*)in, (float**)fwd_nbr_spinor, (float**)back_nbr_spinor, oddBit, daggerBit); } #endif }
// This does the exchange of the gauge field ghost zone and places it // into the ghost array. void cpuGaugeField::exchangeGhost() { if (ghostExchange) return; void *send[QUDA_MAX_DIM]; for (int d=0; d<nDim; d++) send[d] = safe_malloc(nFace*surface[d]*reconstruct*precision); // get the links into contiguous buffers extractGaugeGhost(*this, send); // communicate between nodes FaceBuffer faceBuf(x, nDim, reconstruct, nFace, precision); faceBuf.exchangeLink(ghost, send, QUDA_CPU_FIELD_LOCATION); for (int d=0; d<nDim; d++) host_free(send[d]); ghostExchange = true; }