Exemplo n.º 1
0
void cudaGaugeField::saveCPUField(cpuGaugeField &cpu, const QudaFieldLocation &pack_location) const
{

  // do device-side reordering then copy
  if (pack_location == QUDA_CUDA_FIELD_LOCATION) {
    // check parameters are suitable for device-side packing
    if (precision != cpu.Precision())
      errorQuda("cpu precision %d and cuda precision %d must be the same", 
		cpu.Precision(), precision );

    if (reconstruct != QUDA_RECONSTRUCT_NO)
      errorQuda("Only no reconstruction supported");

    if (order != QUDA_FLOAT2_GAUGE_ORDER)
      errorQuda("Only QUDA_FLOAT2_GAUGE_ORDER supported");

    if (cpu.Order() != QUDA_MILC_GAUGE_ORDER)
      errorQuda("Only QUDA_MILC_GAUGE_ORDER supported");

    if (precision == QUDA_DOUBLE_PRECISION){
      storeGaugeField((double*)cpu.gauge, (double2*)gauge, bytes, volumeCB, stride, precision);
    } else if (precision == QUDA_SINGLE_PRECISION){
      storeGaugeField((float*)cpu.gauge, (float2*)gauge, bytes, volumeCB, stride, precision);
    } else {
      errorQuda("Half precision not supported");
    }

  } else if (pack_location == QUDA_CPU_FIELD_LOCATION) { // do copy then host-side reorder
    
    // FIXME - nasty globals
    anisotropy_ = anisotropy;
    fat_link_max_ = fat_link_max;
    X_ = x;
    t_boundary_ = t_boundary;
    
    if (reconstruct != QUDA_RECONSTRUCT_10) {
      if (precision == QUDA_DOUBLE_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  storeGaugeField((double*)cpu.gauge, (double2*)(gauge),
			  cpu.order, reconstruct, bytes, volumeCB, pad);
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  storeGaugeField((float*)cpu.gauge, (double2*)(gauge),
			  cpu.order, reconstruct, bytes, volumeCB, pad);
	}
	
      } else if (precision == QUDA_SINGLE_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    storeGaugeField((double*)cpu.gauge, (float2*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  } else {
	    storeGaugeField((double*)cpu.gauge, (float4*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  }
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    storeGaugeField((float*)cpu.gauge, (float2*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  } else {
	    storeGaugeField((float*)cpu.gauge, (float4*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  }
	}
	
      } else if (precision == QUDA_HALF_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    storeGaugeField((double*)cpu.gauge, (short2*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  } else {
	    storeGaugeField((double*)cpu.gauge, (short4*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  }
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    storeGaugeField((float*)cpu.gauge, (short2*)(gauge),
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  } else {
	    storeGaugeField((float*)cpu.gauge, (short4*)(gauge), 
			    cpu.order, reconstruct, bytes, volumeCB, pad);
	  }
	}
      }
    } else {

      if (cpu.Precision() != precision)
	errorQuda("cpu and gpu precison has to be the same at this moment");
    
      if (precision == QUDA_HALF_PRECISION)
	errorQuda("half precision is not supported at this moment");
    
      if (cpu.order != QUDA_MILC_GAUGE_ORDER)
	errorQuda("Only MILC gauge order supported in momentum unpack, not %d", cpu.order);

      if (precision == QUDA_DOUBLE_PRECISION) {
	storeMomToCPUArray( (double*)cpu.gauge, (double2*)even, (double2*)odd, bytes, volume, pad);	
      }else { //SINGLE PRECISIONS
	storeMomToCPUArray( (float*)cpu.gauge, (float2*)even, (float2*)odd, bytes, volume, pad);	
      }
    } // reconstruct 10
  } else {
    errorQuda("Invalid pack location %d", pack_location);
  }

}
Exemplo n.º 2
0
void cudaGaugeField::loadCPUField(const cpuGaugeField &cpu) {

  checkField(cpu);

  // FIXME
  anisotropy_ = anisotropy;
  X_ = x;
  t_boundary_ = t_boundary;

#ifdef MULTI_GPU
  cpu.exchangeGhost();
#endif

  if (precision == QUDA_DOUBLE_PRECISION) {

    if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
      loadGaugeField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost,
		     cpu.Order(), reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
    } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
      loadGaugeField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost,
		     cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
    }

  } else if (precision == QUDA_SINGLE_PRECISION) {

    if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
      if (reconstruct == QUDA_RECONSTRUCT_NO) {
	loadGaugeField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost, 
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
      } else {
	loadGaugeField((float4*)(even), (float4*)(odd), (double*)cpu.gauge, (double*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
      }
    } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
      if (reconstruct == QUDA_RECONSTRUCT_NO) {
	loadGaugeField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
      } else {
	loadGaugeField((float4*)(even), (float4*)(odd), (float*)cpu.gauge, (float*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
      }
    }

  } else if (precision == QUDA_HALF_PRECISION) {

    if (cpu.Precision() == QUDA_DOUBLE_PRECISION){
      if (reconstruct == QUDA_RECONSTRUCT_NO) {
	loadGaugeField((short2*)(even), (short2*)(odd), (double*)cpu.gauge, (double*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
      } else {
	loadGaugeField((short4*)(even), (short4*)(odd), (double*)cpu.gauge, (double*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
      }
    } else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
      if (reconstruct == QUDA_RECONSTRUCT_NO) {
	loadGaugeField((short2*)(even), (short2*)(odd), (float*)cpu.gauge, (float*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
      } else {
	loadGaugeField((short4*)(even), (short4*)(odd), (float*)cpu.gauge, (float*)cpu.ghost,
		       cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
      }
    }

  }

}
Exemplo n.º 3
0
void cudaGaugeField::loadCPUField(const cpuGaugeField &cpu, const QudaFieldLocation &pack_location)
{

  checkField(cpu);

  if (pack_location == QUDA_CUDA_FIELD_LOCATION) {
    errorQuda("Not implemented"); // awaiting Guochun's new gauge packing
  } else if (pack_location == QUDA_CPU_FIELD_LOCATION) {
    // FIXME
    anisotropy_ = anisotropy;
    X_ = x;
    t_boundary_ = t_boundary;
    
#ifdef MULTI_GPU
    //FIXME: if this is MOM field, we don't need exchange data
    if(link_type != QUDA_ASQTAD_MOM_LINKS){ 
      cpu.exchangeGhost();
    }
#endif
    
    if (reconstruct != QUDA_RECONSTRUCT_10) { // gauge field
      if (precision == QUDA_DOUBLE_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  loadGaugeField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost,
			 cpu.Order(), reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  loadGaugeField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost,
			 cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	}
	
      } else if (precision == QUDA_SINGLE_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    loadGaugeField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost, 
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
	  } else {
	    loadGaugeField((float4*)(even), (float4*)(odd), (double*)cpu.gauge, (double**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	  }
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    loadGaugeField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	  } else {
	    loadGaugeField((float4*)(even), (float4*)(odd), (float*)cpu.gauge, (float**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	  }
	}
	
      } else if (precision == QUDA_HALF_PRECISION) {
	
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION){
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    loadGaugeField((short2*)(even), (short2*)(odd), (double*)cpu.gauge, (double**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	  } else {
	    loadGaugeField((short4*)(even), (short4*)(odd), (double*)cpu.gauge, (double**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
	  }
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  if (reconstruct == QUDA_RECONSTRUCT_NO) {
	    loadGaugeField((short2*)(even), (short2*)(odd), (float*)cpu.gauge, (float**)cpu.ghost,
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);
	  } else {
	    loadGaugeField((short4*)(even), (short4*)(odd), (float*)cpu.gauge, (float**)(cpu.ghost),
			   cpu.order, reconstruct, bytes, volumeCB, surfaceCB, pad, nFace, link_type, fat_link_max);	      
	  }
	}
      }
    } else { // momentum field
      if  (precision == QUDA_DOUBLE_PRECISION) {
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  loadMomField((double2*)(even), (double2*)(odd), (double*)cpu.gauge, bytes, volumeCB, pad);
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  loadMomField((double2*)(even), (double2*)(odd), (float*)cpu.gauge, bytes, volumeCB, pad);
	} 
      } else {
	if (cpu.Precision() == QUDA_DOUBLE_PRECISION) {
	  loadMomField((float2*)(even), (float2*)(odd), (double*)cpu.gauge, bytes, volumeCB, pad);
	} else if (cpu.Precision() == QUDA_SINGLE_PRECISION) {
	  loadMomField((float2*)(even), (float2*)(odd), (float*)cpu.gauge, bytes, volumeCB, pad);
	} 
      }      
    } // gauge or momentum
  } else {
    errorQuda("Invalid pack location %d", pack_location);
  }
    
}