void cudaColorSpinorField::saveCPUSpinorField(cpuColorSpinorField &dest) const { if (nDim != dest.Ndim()) { errorQuda("Number of dimensions %d %d don't match", nDim, dest.Ndim()); } if (volume != dest.volume) { errorQuda("Volumes %d %d don't match", volume, dest.volume); } if (SiteOrder() != dest.SiteOrder()) { errorQuda("Subset orders don't match"); } if (nColor != 3) { errorQuda("Nc != 3 not yet supported"); } if (siteSubset != dest.siteSubset) { errorQuda("Subset types do not match %d %d", siteSubset, dest.siteSubset); } if (precision == QUDA_HALF_PRECISION) { ColorSpinorParam param(*this); // acquire all attributes of this param.precision = QUDA_SINGLE_PRECISION; // change precision param.create = QUDA_COPY_FIELD_CREATE; cudaColorSpinorField tmp(*this, param); tmp.saveCPUSpinorField(dest); return; } // (temporary?) bug fix for padding memset(buffer, 0, bufferBytes); cudaMemcpy(buffer, v, bytes, cudaMemcpyDeviceToHost); #define SAVE_SPINOR_GPU_TO_CPU(myNs) \ if (precision == QUDA_DOUBLE_PRECISION) { \ if (dest.precision == QUDA_DOUBLE_PRECISION) { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ unpackSpinor<3,myNs,1>((double*)dest.v, (double*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ unpackSpinor<3,myNs,2>((double*)dest.v, (double*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ errorQuda("double4 not supported"); \ } \ } else { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ unpackSpinor<3,myNs,1>((float*)dest.v, (double*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ unpackSpinor<3,myNs,2>((float*)dest.v, (double*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ errorQuda("double4 not supported"); \ } \ } \ } else { \ if (dest.precision == QUDA_DOUBLE_PRECISION) { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ unpackSpinor<3,myNs,1>((double*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ unpackSpinor<3,myNs,2>((double*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ unpackSpinor<3,myNs,4>((double*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } \ } else { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ unpackSpinor<3,myNs,1>((float*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ unpackSpinor<3,myNs,2>((float*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ unpackSpinor<3,myNs,4>((float*)dest.v, (float*)buffer, volume, pad, x, dest.total_length, total_length, \ dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ } \ } \ } switch(nSpin){ case 1: SAVE_SPINOR_GPU_TO_CPU(1); break; case 4: SAVE_SPINOR_GPU_TO_CPU(4); break; default: errorQuda("invalid number of spinors in function %s\n", __FUNCTION__); } #undef SAVE_SPINOR_GPU_TO_CPU return; }
void cudaColorSpinorField::loadCPUSpinorField(const cpuColorSpinorField &src) { if (nDim != src.Ndim()) { errorQuda("Number of dimensions %d %d don't match", nDim, src.Ndim()); } if (volume != src.volume) { errorQuda("Volumes %d %d don't match", volume, src.volume); } if (SiteOrder() != src.SiteOrder()) { errorQuda("Subset orders don't match"); } if (nColor != 3) { errorQuda("Nc != 3 not yet supported"); } if (siteSubset != src.siteSubset) { errorQuda("Subset types do not match %d %d", siteSubset, src.siteSubset); } if (precision == QUDA_HALF_PRECISION) { ColorSpinorParam param(*this); // acquire all attributes of this param.precision = QUDA_SINGLE_PRECISION; // change precision param.create = QUDA_COPY_FIELD_CREATE; cudaColorSpinorField tmp(src, param); copy(tmp); return; } // (temporary?) bug fix for padding memset(buffer, 0, bufferBytes); #define LOAD_SPINOR_CPU_TO_GPU(myNs) \ if (precision == QUDA_DOUBLE_PRECISION) { \ if (src.precision == QUDA_DOUBLE_PRECISION) { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ packSpinor<3,myNs,1>((double*)buffer, (double*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ packSpinor<3,myNs,2>((double*)buffer, (double*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ errorQuda("double4 not supported"); \ } \ } else { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ packSpinor<3,myNs,1>((double*)buffer, (float*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ packSpinor<3,myNs,2>((double*)buffer, (float*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ errorQuda("double4 not supported"); \ } \ } \ } else { \ if (src.precision == QUDA_DOUBLE_PRECISION) { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ packSpinor<3,myNs,1>((float*)buffer, (double*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ packSpinor<3,myNs,2>((float*)buffer, (double*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ packSpinor<3,myNs,4>((float*)buffer, (double*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } \ } else { \ if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ packSpinor<3,myNs,1>((float*)buffer, (float*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ packSpinor<3,myNs,2>((float*)buffer, (float*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ packSpinor<3,myNs,4>((float*)buffer, (float*)src.v, volume, pad, x, total_length, src.total_length, \ src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ } \ } \ } switch(nSpin){ case 1: LOAD_SPINOR_CPU_TO_GPU(1); break; case 4: LOAD_SPINOR_CPU_TO_GPU(4); break; default: errorQuda("invalid number of spinors in function %s\n", __FUNCTION__); } #undef LOAD_SPINOR_CPU_TO_GPU /* for (int i=0; i<length; i++) { std::cout << i << " " << ((float*)src.v)[i] << " " << ((float*)buffer)[i] << std::endl; }*/ cudaMemcpy(v, buffer, bytes, cudaMemcpyHostToDevice); return; }