Ejemplo n.º 1
0
void loadParityClover(ParityClover ret, void *clover, QudaPrecision cpu_prec, 
		      CloverFieldOrder clover_order)
{
  // use pinned memory                                                                                           
  void *packedClover, *packedCloverNorm;

  if (ret.precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) {
    errorQuda("Cannot have CUDA double precision without CPU double precision");
  }
  if (clover_order != QUDA_PACKED_CLOVER_ORDER) {
    errorQuda("Invalid clover_order");
  }

#ifndef __DEVICE_EMULATION__
  if (cudaMallocHost(&packedClover, ret.bytes) == cudaErrorMemoryAllocation) {
    errorQuda("Error allocating clover pinned memory");
  }  
  if (ret.precision == QUDA_HALF_PRECISION) 
    if (cudaMallocHost(&packedCloverNorm, ret.bytes/18) == cudaErrorMemoryAllocation) {
      errorQuda("Error allocating clover pinned memory");
    } 
#else
  packedClover = malloc(ret.bytes);
  if (ret.precision == QUDA_HALF_PRECISION) packedCloverNorm = malloc(ret.bytes/18);
#endif
    
  if (ret.precision == QUDA_DOUBLE_PRECISION) {
    packParityClover((double2 *)packedClover, (double *)clover, ret.volume, ret.pad);
  } else if (ret.precision == QUDA_SINGLE_PRECISION) {
    if (cpu_prec == QUDA_DOUBLE_PRECISION) {
      packParityClover((float4 *)packedClover, (double *)clover, ret.volume, ret.pad);
    } else {
      packParityClover((float4 *)packedClover, (float *)clover, ret.volume, ret.pad);
    }
  } else {
    if (cpu_prec == QUDA_DOUBLE_PRECISION) {
      packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			   (double *)clover, ret.volume, ret.pad);
    } else {
      packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			   (float *)clover, ret.volume, ret.pad);
    }
  }
  
  cudaMemcpy(ret.clover, packedClover, ret.bytes, cudaMemcpyHostToDevice);
  if (ret.precision == QUDA_HALF_PRECISION) {
    cudaMemcpy(ret.cloverNorm, packedCloverNorm, ret.bytes/18, cudaMemcpyHostToDevice);
  }

#ifndef __DEVICE_EMULATION__
  cudaFreeHost(packedClover);
  if (ret.precision == QUDA_HALF_PRECISION) cudaFreeHost(packedCloverNorm);
#else
  free(packedClover);
  if (ret.precision == QUDA_HALF_PRECISION) free(packedCloverNorm);
#endif

}
Ejemplo n.º 2
0
  void cudaCloverField::loadParityField(void *clover, void *cloverNorm, const void *h_clover, 
					const QudaPrecision cpu_prec, const CloverFieldOrder cpu_order)
  {
    // use pinned memory                                                                                           
    void *packedClover, *packedCloverNorm;

    if (precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) {
      errorQuda("Cannot have CUDA double precision without CPU double precision");
    }
    if (cpu_order != QUDA_PACKED_CLOVER_ORDER && cpu_order != QUDA_BQCD_CLOVER_ORDER) 
      errorQuda("Invalid clover order %d", cpu_order);

    if (cudaMallocHost(&packedClover, bytes/2) == cudaErrorMemoryAllocation)
      errorQuda("Error allocating clover pinned memory");

    if (precision == QUDA_HALF_PRECISION) {
      if (cudaMallocHost(&packedCloverNorm, norm_bytes/2) == cudaErrorMemoryAllocation)
	{
	  errorQuda("Error allocating clover pinned memory");
	} 
    }
    
    if (precision == QUDA_DOUBLE_PRECISION) {
      packParityClover((double2 *)packedClover, (double *)h_clover, volumeCB, pad, cpu_order);
    } else if (precision == QUDA_SINGLE_PRECISION) {
      if (cpu_prec == QUDA_DOUBLE_PRECISION) {
	packParityClover((float4 *)packedClover, (double *)h_clover, volumeCB, pad, cpu_order);
      } else {
	packParityClover((float4 *)packedClover, (float *)h_clover, volumeCB, pad, cpu_order);
      }
    } else {
      if (cpu_prec == QUDA_DOUBLE_PRECISION) {
	packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			     (double *)h_clover, volumeCB, pad, cpu_order);
      } else {
	packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			     (float *)h_clover, volumeCB, pad, cpu_order);
      }
    }
  
    cudaMemcpy(clover, packedClover, bytes/2, cudaMemcpyHostToDevice);
    if (precision == QUDA_HALF_PRECISION)
      cudaMemcpy(cloverNorm, packedCloverNorm, norm_bytes/2, cudaMemcpyHostToDevice);

    cudaFreeHost(packedClover);
    if (precision == QUDA_HALF_PRECISION) cudaFreeHost(packedCloverNorm);
  }
Ejemplo n.º 3
0
  void cudaCloverField::loadParityField(void *clover, void *cloverNorm, const void *h_clover, 
					const QudaPrecision cpu_prec, const CloverFieldOrder cpu_order)
  {
    // use pinned memory                                                                                           
    void *packedClover, *packedCloverNorm=0;

    if (precision == QUDA_DOUBLE_PRECISION && cpu_prec != QUDA_DOUBLE_PRECISION) {
      errorQuda("Cannot have CUDA double precision without CPU double precision");
    }
    if (cpu_order != QUDA_PACKED_CLOVER_ORDER && cpu_order != QUDA_BQCD_CLOVER_ORDER) 
      errorQuda("Invalid clover order %d", cpu_order);

    resizeBuffer(bytes/2 + norm_bytes/2);
    packedClover = bufferPinned;
    if (precision == QUDA_HALF_PRECISION) packedCloverNorm = (char*)bufferPinned + bytes/2;

    if (precision == QUDA_DOUBLE_PRECISION) {
      packParityClover((double2 *)packedClover, (double *)h_clover, volumeCB, pad, cpu_order);
    } else if (precision == QUDA_SINGLE_PRECISION) {
      if (cpu_prec == QUDA_DOUBLE_PRECISION) {
	packParityClover((float4 *)packedClover, (double *)h_clover, volumeCB, pad, cpu_order);
      } else {
	packParityClover((float4 *)packedClover, (float *)h_clover, volumeCB, pad, cpu_order);
      }
    } else {
      if (cpu_prec == QUDA_DOUBLE_PRECISION) {
	packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			     (double *)h_clover, volumeCB, pad, cpu_order);
      } else {
	packParityCloverHalf((short4 *)packedClover, (float *)packedCloverNorm, 
			     (float *)h_clover, volumeCB, pad, cpu_order);
      }
    }
  
    cudaMemcpy(clover, packedClover, bytes/2, cudaMemcpyHostToDevice);
    if (precision == QUDA_HALF_PRECISION)
      cudaMemcpy(cloverNorm, packedCloverNorm, norm_bytes/2, cudaMemcpyHostToDevice);
  }