void cudaGaugeField::saveCPUField(cpuGaugeField &cpu) const { anisotropy_ = anisotropy; fat_link_max_ = fat_link_max; X_ = x; t_boundary_ = t_boundary; if (precision == QUDA_DOUBLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { retrieveGaugeField((double*)cpu.gauge, (double2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { retrieveGaugeField((float*)cpu.gauge, (double2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (precision == QUDA_SINGLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { retrieveGaugeField((double*)cpu.gauge, (float2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { retrieveGaugeField((double*)cpu.gauge, (float4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { retrieveGaugeField((float*)cpu.gauge, (float2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { retrieveGaugeField((float*)cpu.gauge, (float4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } } else if (precision == QUDA_HALF_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { retrieveGaugeField((double*)cpu.gauge, (short2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { retrieveGaugeField((double*)cpu.gauge, (short4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { retrieveGaugeField((float*)cpu.gauge, (short2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { retrieveGaugeField((float*)cpu.gauge, (short4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } } }
void cudaGaugeField::saveCPUField(cpuGaugeField &cpu, const QudaFieldLocation &pack_location) const { // do device-side reordering then copy if (pack_location == QUDA_CUDA_FIELD_LOCATION) { // check parameters are suitable for device-side packing if (precision != cpu.Precision()) errorQuda("cpu precision %d and cuda precision %d must be the same", cpu.Precision(), precision ); if (reconstruct != QUDA_RECONSTRUCT_NO) errorQuda("Only no reconstruction supported"); if (order != QUDA_FLOAT2_GAUGE_ORDER) errorQuda("Only QUDA_FLOAT2_GAUGE_ORDER supported"); if (cpu.Order() != QUDA_MILC_GAUGE_ORDER) errorQuda("Only QUDA_MILC_GAUGE_ORDER supported"); if (precision == QUDA_DOUBLE_PRECISION){ storeGaugeField((double*)cpu.gauge, (double2*)gauge, bytes, volumeCB, stride, precision); } else if (precision == QUDA_SINGLE_PRECISION){ storeGaugeField((float*)cpu.gauge, (float2*)gauge, bytes, volumeCB, stride, precision); } else { errorQuda("Half precision not supported"); } } else if (pack_location == QUDA_CPU_FIELD_LOCATION) { // do copy then host-side reorder // FIXME - nasty globals anisotropy_ = anisotropy; fat_link_max_ = fat_link_max; X_ = x; t_boundary_ = t_boundary; if (reconstruct != QUDA_RECONSTRUCT_10) { if (precision == QUDA_DOUBLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { storeGaugeField((double*)cpu.gauge, (double2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { storeGaugeField((float*)cpu.gauge, (double2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (precision == QUDA_SINGLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { storeGaugeField((double*)cpu.gauge, (float2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { storeGaugeField((double*)cpu.gauge, (float4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { storeGaugeField((float*)cpu.gauge, (float2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { storeGaugeField((float*)cpu.gauge, (float4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } } else if (precision == QUDA_HALF_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { storeGaugeField((double*)cpu.gauge, (short2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { storeGaugeField((double*)cpu.gauge, (short4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { storeGaugeField((float*)cpu.gauge, (short2*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } else { storeGaugeField((float*)cpu.gauge, (short4*)(gauge), cpu.order, reconstruct, bytes, volumeCB, pad); } } } } else { if (cpu.Precision() != precision) errorQuda("cpu and gpu precison has to be the same at this moment"); if (precision == QUDA_HALF_PRECISION) errorQuda("half precision is not supported at this moment"); if (cpu.order != QUDA_MILC_GAUGE_ORDER) errorQuda("Only MILC gauge order supported in momentum unpack, not %d", cpu.order); if (precision == QUDA_DOUBLE_PRECISION) { storeMomToCPUArray( (double*)cpu.gauge, (double2*)even, (double2*)odd, bytes, volume, pad); }else { //SINGLE PRECISIONS storeMomToCPUArray( (float*)cpu.gauge, (float2*)even, (float2*)odd, bytes, volume, pad); } } // reconstruct 10 } else { errorQuda("Invalid pack location %d", pack_location); } }
void cudaGaugeField::loadCPUField(const cpuGaugeField &cpu) { checkField(cpu); // FIXME anisotropy_ = anisotropy; X_ = x; t_boundary_ = t_boundary; #ifdef MULTI_GPU cpu.exchangeGhost(); #endif if (precision == QUDA_DOUBLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { loadGaugeField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, cpu.Order(), reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { loadGaugeField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (precision == QUDA_SINGLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((float4*)(even), (float4*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((float4*)(even), (float4*)(odd), (float*)cpu.gauge, (float*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } } else if (precision == QUDA_HALF_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION){ if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((short2*)(even), (short2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((short4*)(even), (short4*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((short2*)(even), (short2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((short4*)(even), (short4*)(odd), (float*)cpu.gauge, (float*)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } } }
void cudaGaugeField::loadCPUField(const cpuGaugeField &cpu, const QudaFieldLocation &pack_location) { checkField(cpu); if (pack_location == QUDA_CUDA_FIELD_LOCATION) { errorQuda("Not implemented"); // awaiting Guochun's new gauge packing } else if (pack_location == QUDA_CPU_FIELD_LOCATION) { // FIXME anisotropy_ = anisotropy; X_ = x; t_boundary_ = t_boundary; #ifdef MULTI_GPU //FIXME: if this is MOM field, we don't need exchange data if(link_type != QUDA_ASQTAD_MOM_LINKS){ cpu.exchangeGhost(); } #endif if (reconstruct != QUDA_RECONSTRUCT_10) { // gauge field if (precision == QUDA_DOUBLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { loadGaugeField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, cpu.Order(), reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { loadGaugeField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (precision == QUDA_SINGLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((float4*)(even), (float4*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((float4*)(even), (float4*)(odd), (float*)cpu.gauge, (float**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } } else if (precision == QUDA_HALF_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION){ if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((short2*)(even), (short2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((short4*)(even), (short4*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { if (reconstruct == QUDA_RECONSTRUCT_NO) { loadGaugeField((short2*)(even), (short2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost, cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } else { loadGaugeField((short4*)(even), (short4*)(odd), (float*)cpu.gauge, (float**)(cpu.ghost), cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max); } } } } else { // momentum field if (precision == QUDA_DOUBLE_PRECISION) { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { loadMomField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, bytes, volumeCB, pad); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { loadMomField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, bytes, volumeCB, pad); } } else { if (cpu.Precision() == QUDA_DOUBLE_PRECISION) { loadMomField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, bytes, volumeCB, pad); } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) { loadMomField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, bytes, volumeCB, pad); } } } // gauge or momentum } else { errorQuda("Invalid pack location %d", pack_location); } }