示例#1
0
  cudaCloverField::cudaCloverField(const void *h_clov, const void *h_clov_inv, 
				   const QudaPrecision cpu_prec, 
				   const QudaCloverFieldOrder cpu_order,
				   const CloverFieldParam &param)
    : CloverField(param), clover(0), norm(0), cloverInv(0), invNorm(0)
  {
    if (h_clov) {
      clover = device_malloc(bytes);
      if (precision == QUDA_HALF_PRECISION) {
	norm = device_malloc(norm_bytes);
      }

      even = clover;
      odd = (char*)clover + bytes/2;
    
      evenNorm = norm;
      oddNorm = (char*)norm + norm_bytes/2;

      loadCPUField(clover, norm, h_clov, cpu_prec, cpu_order);
    } 

    if (h_clov_inv) {
      cloverInv = device_malloc(bytes);
      if (precision == QUDA_HALF_PRECISION) {
	invNorm = device_malloc(bytes);
      }

      evenInv = cloverInv;
      oddInv = (char*)cloverInv + bytes/2;
    
      evenInvNorm = invNorm;
      oddInvNorm = (char*)invNorm + norm_bytes/2;

      total_bytes += bytes + norm_bytes;

      loadCPUField(cloverInv, invNorm, h_clov_inv, cpu_prec, cpu_order);

      // this is a hack to ensure that we can autotune the clover
      // operator when just using symmetric preconditioning
      if (!clover) {
	clover = cloverInv;
	even = evenInv;
	odd = oddInv;
      }
      if (!norm) {
	norm = invNorm;
	evenNorm = evenInvNorm;
	oddNorm = oddInvNorm;
      }
    } 

#ifdef USE_TEXTURE_OBJECTS
    createTexObject(evenTex, evenNormTex, even, evenNorm);
    createTexObject(oddTex, oddNormTex, odd, oddNorm);
    createTexObject(evenInvTex, evenInvNormTex, evenInv, evenInvNorm);
    createTexObject(oddInvTex, oddInvNormTex, oddInv, oddInvNorm);
#endif
    
  }
示例#2
0
  cudaCloverField::cudaCloverField(const void *h_clov, const void *h_clov_inv, 
				   const QudaPrecision cpu_prec, 
				   const QudaCloverFieldOrder cpu_order,
				   const CloverFieldParam &param)
    : CloverField(param), clover(0), norm(0), cloverInv(0), invNorm(0)
  {
  

    if (h_clov) {
      if (cudaMalloc((void**)&clover, bytes) == cudaErrorMemoryAllocation) {
	errorQuda("Error allocating clover term");
      }   
    
      if (precision == QUDA_HALF_PRECISION) {
	if (cudaMalloc((void**)&norm, norm_bytes) == cudaErrorMemoryAllocation) {
	  errorQuda("Error allocating clover norm");
	}
      }

      even = clover;
      odd = (char*)clover + bytes/2;
    
      evenNorm = norm;
      oddNorm = (char*)norm + norm_bytes/2;

      loadCPUField(clover, norm, h_clov, cpu_prec, cpu_order);
    } 

    if (h_clov_inv) {
      if (cudaMalloc((void**)&cloverInv, bytes) == cudaErrorMemoryAllocation) {
	errorQuda("Error allocating clover inverse term");
      }   
    
      if (precision == QUDA_HALF_PRECISION) {
	if (cudaMalloc((void**)&invNorm, norm_bytes) == cudaErrorMemoryAllocation) {
	  errorQuda("Error allocating clover inverse norm");
	}
      }

      evenInv = cloverInv;
      oddInv = (char*)cloverInv + bytes/2;
    
      evenInvNorm = invNorm;
      oddInvNorm = (char*)invNorm + norm_bytes/2;

      total_bytes += bytes + norm_bytes;

      loadCPUField(cloverInv, invNorm, h_clov_inv, cpu_prec, cpu_order);

      // this is a hack to ensure that we can autotune the clover
      // operator when just using symmetric preconditioning
      if (!clover) {
	clover = cloverInv;
	even = evenInv;
	odd = oddInv;
      }
      if (!norm) {
	norm = invNorm;
	evenNorm = evenInvNorm;
	oddNorm = oddInvNorm;
      }
    } 

  }