void GPUInterface::MemsetShort(GPUPtr dest,
                               unsigned short val,
                               size_t count) {
#ifdef BEAGLE_DEBUG_FLOW
    fprintf(stderr, "\t\t\tEntering GPUInterface::MemsetShort\n");
#endif

    SAFE_CUPP(cuMemsetD16(dest, val, count));

#ifdef BEAGLE_DEBUG_FLOW
    fprintf(stderr, "\t\t\tLeaving  GPUInterface::MemsetShort\n");
#endif

}
Пример #2
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD16Async)(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) {
   int error = 0;
   long vgErrorAddress;
   
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&hStream, sizeof(CUstream));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'hStream' in call to cuMemsetD16Async not defined.\n");
   }
   
   cgLock();
   
   CUcontext ctx = NULL;
   cgCtxListType *nodeCtx;
   cgMemListType *nodeMemDst;
   
   
   // Get current context ..
   cgGetCtx(&ctx);
   nodeCtx = cgFindCtx(ctx);
   
   // .. and locate memory if we are handling device memory
   nodeMemDst = cgFindMem(nodeCtx, dstDevice);
   
   if (nodeMemDst && nodeMemDst->locked & 2 && nodeMemDst->stream != hStream) {
      error++;
      VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n");
   }
   
   if (nodeMemDst) {
      nodeMemDst->locked = nodeMemDst->locked | 2;
      nodeMemDst->stream = hStream;
   }
   
   cgUnlock();
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   return cuMemsetD16(dstDevice, us, N);
}