void GPUInterface::MemsetShort(GPUPtr dest, unsigned short val, size_t count) { #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr, "\t\t\tEntering GPUInterface::MemsetShort\n"); #endif SAFE_CUPP(cuMemsetD16(dest, val, count)); #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr, "\t\t\tLeaving GPUInterface::MemsetShort\n"); #endif }
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD16Async)(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) { int error = 0; long vgErrorAddress; vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&hStream, sizeof(CUstream)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'hStream' in call to cuMemsetD16Async not defined.\n"); } cgLock(); CUcontext ctx = NULL; cgCtxListType *nodeCtx; cgMemListType *nodeMemDst; // Get current context .. cgGetCtx(&ctx); nodeCtx = cgFindCtx(ctx); // .. and locate memory if we are handling device memory nodeMemDst = cgFindMem(nodeCtx, dstDevice); if (nodeMemDst && nodeMemDst->locked & 2 && nodeMemDst->stream != hStream) { error++; VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n"); } if (nodeMemDst) { nodeMemDst->locked = nodeMemDst->locked | 2; nodeMemDst->stream = hStream; } cgUnlock(); if (error) { VALGRIND_PRINTF_BACKTRACE(""); } return cuMemsetD16(dstDevice, us, N); }