cudaCloverField::cudaCloverField(const void *h_clov, const void *h_clov_inv, const QudaPrecision cpu_prec, const QudaCloverFieldOrder cpu_order, const CloverFieldParam ¶m) : CloverField(param), clover(0), norm(0), cloverInv(0), invNorm(0) { if (h_clov) { clover = device_malloc(bytes); if (precision == QUDA_HALF_PRECISION) { norm = device_malloc(norm_bytes); } even = clover; odd = (char*)clover + bytes/2; evenNorm = norm; oddNorm = (char*)norm + norm_bytes/2; loadCPUField(clover, norm, h_clov, cpu_prec, cpu_order); } if (h_clov_inv) { cloverInv = device_malloc(bytes); if (precision == QUDA_HALF_PRECISION) { invNorm = device_malloc(bytes); } evenInv = cloverInv; oddInv = (char*)cloverInv + bytes/2; evenInvNorm = invNorm; oddInvNorm = (char*)invNorm + norm_bytes/2; total_bytes += bytes + norm_bytes; loadCPUField(cloverInv, invNorm, h_clov_inv, cpu_prec, cpu_order); // this is a hack to ensure that we can autotune the clover // operator when just using symmetric preconditioning if (!clover) { clover = cloverInv; even = evenInv; odd = oddInv; } if (!norm) { norm = invNorm; evenNorm = evenInvNorm; oddNorm = oddInvNorm; } } #ifdef USE_TEXTURE_OBJECTS createTexObject(evenTex, evenNormTex, even, evenNorm); createTexObject(oddTex, oddNormTex, odd, oddNorm); createTexObject(evenInvTex, evenInvNormTex, evenInv, evenInvNorm); createTexObject(oddInvTex, oddInvNormTex, oddInv, oddInvNorm); #endif }
cudaCloverField::cudaCloverField(const void *h_clov, const void *h_clov_inv, const QudaPrecision cpu_prec, const QudaCloverFieldOrder cpu_order, const CloverFieldParam ¶m) : CloverField(param), clover(0), norm(0), cloverInv(0), invNorm(0) { if (h_clov) { if (cudaMalloc((void**)&clover, bytes) == cudaErrorMemoryAllocation) { errorQuda("Error allocating clover term"); } if (precision == QUDA_HALF_PRECISION) { if (cudaMalloc((void**)&norm, norm_bytes) == cudaErrorMemoryAllocation) { errorQuda("Error allocating clover norm"); } } even = clover; odd = (char*)clover + bytes/2; evenNorm = norm; oddNorm = (char*)norm + norm_bytes/2; loadCPUField(clover, norm, h_clov, cpu_prec, cpu_order); } if (h_clov_inv) { if (cudaMalloc((void**)&cloverInv, bytes) == cudaErrorMemoryAllocation) { errorQuda("Error allocating clover inverse term"); } if (precision == QUDA_HALF_PRECISION) { if (cudaMalloc((void**)&invNorm, norm_bytes) == cudaErrorMemoryAllocation) { errorQuda("Error allocating clover inverse norm"); } } evenInv = cloverInv; oddInv = (char*)cloverInv + bytes/2; evenInvNorm = invNorm; oddInvNorm = (char*)invNorm + norm_bytes/2; total_bytes += bytes + norm_bytes; loadCPUField(cloverInv, invNorm, h_clov_inv, cpu_prec, cpu_order); // this is a hack to ensure that we can autotune the clover // operator when just using symmetric preconditioning if (!clover) { clover = cloverInv; even = evenInv; odd = oddInv; } if (!norm) { norm = invNorm; evenNorm = evenInvNorm; oddNorm = oddInvNorm; } } }