// pthread_mutex_destroy PTH_FUNC(int, pthreadZumutexZudestroy, // pthread_mutex_destroy pthread_mutex_t *mutex) { int ret; OrigFn fn; VALGRIND_GET_ORIG_FN(fn); if (TRACE_PTH_FNS) { fprintf(stderr, "<< pthread_mxdestroy %p", mutex); fflush(stderr); } DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE, pthread_mutex_t*,mutex); CALL_FN_W_W(ret, fn, mutex); if (ret != 0) { DO_PthAPIerror( "pthread_mutex_destroy", ret ); } if (TRACE_PTH_FNS) { fprintf(stderr, " :: mxdestroy -> %d >>\n", ret); } return ret; }
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD2D32)(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) { OrigFn fn; CUresult result; CUcontext ctx = NULL; cgMemListType *nodeMemDst; int error = 0; long vgErrorAddress; size_t dstSize; VALGRIND_GET_ORIG_FN(fn); cgLock(); CALL_FN_W_5W(result, fn, dstDevice, dstPitch, ui, Width, Height); // Check if function parameters are defined. // TODO: Warning or error in case of a partially undefined ui? vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD2D32 not defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstPitch, sizeof(size_t)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'dstPitch' in call to cuMemsetD2D32 not defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&ui, sizeof(ui)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Warning: 'ui' in call to cuMemsetD2D32 is not fully defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Width, sizeof(size_t)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'Width' in call to cuMemsetD2D32 not defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Height, sizeof(size_t)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'Height' in call to cuMemsetD2D32 not defined.\n"); } // Fetch current context cgGetCtx(&ctx); nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice); // Check if memory has been allocated if (!nodeMemDst) { error++; VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD2D32.\n"); } else { // If memory is allocated, check size of available memory dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr); // The whole memory block of dstPitch*Height must fit into memory if (dstSize < sizeof(ui) * dstPitch * Height) { error++; VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD2D32.\n" " Expected %lu allocated bytes but only found %lu.\n", sizeof(ui) * dstPitch * Height, dstSize); } // Check if dstDevice and dstPitch are both properly aligned // TODO: Is this a valid check? (see also cuMemsetD32) if (dstDevice % 4) { error++; VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD2D32 not four byte aligned.\n"); } if (dstPitch % 4) { error++; VALGRIND_PRINTF("Error: Destination pitch in call to cuMemsetD2D32 not four byte aligned.\n"); } } // Make sure pitch is big enough to accommodate asked for Width if (dstPitch < Width) { error++; VALGRIND_PRINTF("Error: dstPitch smaller than Width in call to cuMemsetD2D32.\n"); } if (error) { VALGRIND_PRINTF_BACKTRACE(""); } cgUnlock(); return result; }
// pthread_create PTH_FUNC(int, pthreadZucreateZa, // pthread_create* pthread_t *thread, const pthread_attr_t *attr, void *(*start) (void *), void *arg) { int res; int ret; OrigFn fn; #if defined(ALLOCATE_THREAD_ARGS_ON_THE_STACK) DrdPosixThreadArgs thread_args; #endif DrdPosixThreadArgs* thread_args_p; VALGRIND_GET_ORIG_FN(fn); #if defined(ALLOCATE_THREAD_ARGS_ON_THE_STACK) thread_args_p = &thread_args; #else thread_args_p = malloc(sizeof(*thread_args_p)); #endif assert(thread_args_p); thread_args_p->start = start; thread_args_p->arg = arg; #if defined(WAIT_UNTIL_CREATED_THREAD_STARTED) DRD_IGNORE_VAR(thread_args_p->wrapper_started); thread_args_p->wrapper_started = 0; #endif /* * Find out whether the thread will be started as a joinable thread * or as a detached thread. If no thread attributes have been specified, * this means that the new thread will be started as a joinable thread. */ thread_args_p->detachstate = PTHREAD_CREATE_JOINABLE; if (attr) { if (pthread_attr_getdetachstate(attr, &thread_args_p->detachstate) != 0) { assert(0); } } assert(thread_args_p->detachstate == PTHREAD_CREATE_JOINABLE || thread_args_p->detachstate == PTHREAD_CREATE_DETACHED); DRD_(entering_pthread_create)(); CALL_FN_W_WWWW(ret, fn, thread, attr, DRD_(thread_wrapper), thread_args_p); DRD_(left_pthread_create)(); #if defined(WAIT_UNTIL_CREATED_THREAD_STARTED) if (ret == 0) { /* * Wait until the thread wrapper started. * @todo Find out why some regression tests fail if thread arguments are * passed via dynamically allocated memory and if the loop below is * removed. */ while (! thread_args_p->wrapper_started) { sched_yield(); } } #if defined(ALLOCATE_THREAD_ARGS_DYNAMICALLY) free(thread_args_p); #endif #endif VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__DRD_START_NEW_SEGMENT, pthread_self(), 0, 0, 0, 0); return ret; }
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpy3D)(const CUDA_MEMCPY3D *pCopy) { OrigFn fn; CUresult result; CUcontext ctx = NULL; int error = 0, error_addressable, error_defined; long vgErrorAddress = 0, vgErrorAddressDefined = 0; VALGRIND_GET_ORIG_FN(fn); cgLock(); CALL_FN_W_W(result, fn, pCopy); // Check if pCopy is null, not allocated or undefined. // For obvious reasons we skip the following checks if either condition is true. if (!pCopy) { error++; VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D is NULL.\n"); cgUnlock(); return result; } else if ( vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(pCopy, sizeof(CUDA_MEMCPY3D)) ) { error++; VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D points to unallocated memory.\n"); cgUnlock(); return result; } // It makes no sense to check _IS_DEFINED on the whole structure, since only part of it is used! // General checks of constaints imposed by reference manual if (pCopy->srcMemoryType != CU_MEMORYTYPE_ARRAY) { if (pCopy->srcPitch && pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes) { error++; VALGRIND_PRINTF("Error: srcPitch < WidthInBytes+srcXInBytes in cuMemcpy3D.\n"); } if (pCopy->srcHeight && pCopy->srcHeight < pCopy->Height + pCopy->srcY) { error++; VALGRIND_PRINTF("Error: srcHeight < Height+srcY in cuMemcpy3D.\n"); } } if (pCopy->dstMemoryType != CU_MEMORYTYPE_ARRAY) { if (pCopy->dstPitch && pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes) { error++; VALGRIND_PRINTF("Error: dstPitch < WidthInBytes+dstXInBytes in cuMemcpy3D.\n"); } if (pCopy->dstHeight && pCopy->dstHeight < pCopy->Height + pCopy->dstY) { error++; VALGRIND_PRINTF("Error: dstHeight < Height+dstY in cuMemcpy3D.\n"); } } switch (pCopy->srcMemoryType) { case CU_MEMORYTYPE_UNIFIED: // TODO: How do we handle unified memory? break; case CU_MEMORYTYPE_HOST: { void *line; error_addressable = 0; error_defined = 0; // TODO: Is Height, Depth > 1, even for 1D/2D copy operations? for (int i = 0 ; i < pCopy->Height ; i++) { for (int j = 0 ; j < pCopy->Depth ; j++) { line = (void*)( (char*)pCopy->srcHost + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i))*pCopy->srcPitch + pCopy->srcXInBytes ); vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes); if (vgErrorAddress) { error_addressable++; } else { vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(line, (size_t)pCopy->WidthInBytes); if (vgErrorAddress) { error_defined++; } } } } // TODO: Can we give precise information about location of error? if (error_addressable) { error++; VALGRIND_PRINTF("Error: (Part of) source host memory not allocated\n" " in call to cuMemcpy3D.\n"); } if (error_defined) { error++; VALGRIND_PRINTF("Error: (Part of) source host memory not defined\n" " in call to cuMemcpy3D.\n"); } break; } case CU_MEMORYTYPE_DEVICE: { // ptrEnd points to the end of the memory area which pCopy->srcDevice points into CUdeviceptr line, ptrEnd; cgMemListType *nodeMem; // TODO: Check if pCopy->srcDevice is defined? cgGetCtx(&ctx); nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->srcDevice); // We only track addressable status (whether memory is allocated) for device memory regions error_addressable = 0; if (nodeMem) { ptrEnd = nodeMem->dptr + nodeMem->size; /* for (int i = 0 ; i < pCopy->Height ; i++) { for (int j = 0 ; j < pCopy->Depth ; j++) { line = (CUdeviceptr)( pCopy->srcDevice + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i)) * pCopy->srcPitch + pCopy->srcXInBytes ); // Is there enough allocated memory left to statisfy the current line? if (ptrEnd - line < pCopy->WidthInBytes) { error_addressable++; } } } */ // Device memory should not be fragmented, so we only check the very last slice of memory line = (CUdeviceptr)( pCopy->srcDevice + ( (pCopy->srcZ + pCopy->Depth - 1) * pCopy->srcHeight + (pCopy->srcY + pCopy->Height - 1) ) * pCopy->srcPitch + pCopy->srcXInBytes); if (ptrEnd - line < pCopy->WidthInBytes) { error_addressable++; } } else { error_addressable++; } if (error_addressable) { error++; VALGRIND_PRINTF("Error: (Part of) source device memory not allocated\n" " in call to cuMemcpy3D.\n"); } break; } case CU_MEMORYTYPE_ARRAY: { CUDA_ARRAY3D_DESCRIPTOR descriptor; int bytesPerElement; int widthInBytes; // Fetch array descriptor .. cuArray3DGetDescriptor(&descriptor, pCopy->srcArray); bytesPerElement = cgArrDescBytesPerElement(&descriptor); if (!bytesPerElement) { error++; VALGRIND_PRINTF("Error: Unknown Format value in src array descriptor in cuMemcpy3D.\n"); } widthInBytes = bytesPerElement * descriptor.Width; // .. and check if dimensions are conform to the ones requested in pCopy if (widthInBytes - pCopy->srcXInBytes < pCopy->WidthInBytes) { error++; VALGRIND_PRINTF("Error: Available width of %u bytes in source array is smaller than\n" " requested Width of %u bytes in pCopy of cuMemcpy3D.\n", widthInBytes - pCopy->srcXInBytes, pCopy->WidthInBytes); } if (pCopy->Height > 1 && descriptor.Height - pCopy->srcY < pCopy->Height) { error++; VALGRIND_PRINTF("Error: Available Height of %u in source array is smaller than\n" " requested Height of %u in pCopy of cuMemcpy3D.\n", descriptor.Height - pCopy->srcY, pCopy->Height); } if (pCopy->Depth > 1 && descriptor.Depth - pCopy->srcZ < pCopy->Depth) { error++; VALGRIND_PRINTF("Error: Available Depth of %u in source array is smaller than\n" " requested Depth of %u in pCopy of cuMemcpy3D.\n", descriptor.Depth - pCopy->srcY, pCopy->Height); } break; } default: error++; VALGRIND_PRINTF("Error: Unknown source memory type %d in cuMemcpy3D\n"); break; } switch (pCopy->dstMemoryType) { case CU_MEMORYTYPE_UNIFIED: // TODO: How do we handle unified memory? break; case CU_MEMORYTYPE_HOST: { void *line; error_addressable = 0; error_defined = 0; // TODO: Is Height, Depth > 1, even for 1D/2D copy operations? for (int i = 0 ; i < pCopy->Height ; i++) { for (int j = 0 ; j < pCopy->Depth ; j++) { line = (void*)( (char*)pCopy->dstHost + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i))*pCopy->dstPitch + pCopy->dstXInBytes ); // Unlike for the source operand we only need to check allocation status here vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes); if (vgErrorAddress) { error_addressable++; } } } // TODO: Can we give precise information about location of error? if (error_addressable) { error++; VALGRIND_PRINTF("Error: (Part of) destination host memory not allocated\n" " in call to cuMemcpy3D.\n"); } break; } case CU_MEMORYTYPE_DEVICE: { // ptrEnd points to the end of the memory area which pCopy->dstDevice points into CUdeviceptr line, ptrEnd; cgMemListType *nodeMem; // TODO: Check if pCopy->dstDevice is defined? cgGetCtx(&ctx); nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->dstDevice); // We only track addressable status (whether memory is allocated) for device memory regions error_addressable = 0; if (nodeMem) { ptrEnd = nodeMem->dptr + nodeMem->size; /* for (int i = 0 ; i < pCopy->Height ; i++) { for (int j = 0 ; j < pCopy->Depth ; j++) { line = (CUdeviceptr)( pCopy->dstDevice + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i)) * pCopy->dstPitch + pCopy->dstXInBytes ); // Is there enough allocated memory left to statisfy the current line? if (ptrEnd - line < pCopy->WidthInBytes) { error_addressable++; } } } */ // Device memory should not be fragmented, so we only check the very last slice of memory line = (CUdeviceptr)( pCopy->dstDevice + ( (pCopy->dstZ + pCopy->Depth - 1) * pCopy->dstHeight + (pCopy->dstY + pCopy->Height - 1) ) * pCopy->dstPitch + pCopy->dstXInBytes); if (ptrEnd - line < pCopy->WidthInBytes) { error_addressable++; } } else { error_addressable++; } if (error_addressable) { error++; VALGRIND_PRINTF("Error: (Part of) destination device memory not allocated\n" " in call to cuMemcpy3D.\n"); } break; } case CU_MEMORYTYPE_ARRAY: { CUDA_ARRAY3D_DESCRIPTOR descriptor; int bytesPerElement; int widthInBytes; // Fetch array descriptor .. cuArray3DGetDescriptor(&descriptor, pCopy->dstArray); bytesPerElement = cgArrDescBytesPerElement(&descriptor); if (!bytesPerElement) { error++; VALGRIND_PRINTF("Error: Unknown Format value in dst array descriptor in cuMemcpy3D.\n"); } widthInBytes = bytesPerElement * descriptor.Width; // .. and check if dimensions are conform to the ones requested in pCopy if (widthInBytes - pCopy->dstXInBytes < pCopy->WidthInBytes) { error++; VALGRIND_PRINTF("Error: Available width of %u bytes in destination array is smaller than\n" " requested Width of %u bytes in pCopy of cuMemcpy3D.\n", widthInBytes - pCopy->dstXInBytes, pCopy->WidthInBytes); } if (pCopy->Height > 1 && descriptor.Height - pCopy->dstY < pCopy->Height) { error++; VALGRIND_PRINTF("Error: Available Height of %u in destination array is smaller than\n" " requested Height of %u in pCopy of cuMemcpy3D.\n", descriptor.Height - pCopy->dstY, pCopy->Height); } if (pCopy->Depth > 1 && descriptor.Depth - pCopy->dstZ < pCopy->Depth) { error++; VALGRIND_PRINTF("Error: Available Depth of %u in destination array is smaller than\n" " requested Depth of %u in pCopy of cuMemcpy3D.\n", descriptor.Depth - pCopy->dstZ, pCopy->Depth); } break; } default: error++; VALGRIND_PRINTF("Error: Unknown destination memory type %d in cuMemcpy3D\n"); break; } if (error) { VALGRIND_PRINTF_BACKTRACE(" %d errors detected in call to cuMemcpy3D.", error); } cgUnlock(); return result; }
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyHtoA)(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount) { OrigFn fn; CUresult result; CUcontext ctx = NULL; cgCtxListType *nodeCtx; cgArrListType *nodeArrDst; cgMemListType *nodeMemSrc; long vgErrorAddress; int error = 0; VALGRIND_GET_ORIG_FN(fn); cgLock(); CALL_FN_W_WWWW(result, fn, dstArray, dstOffset, srcHost, ByteCount); // Check if actual function parameters are defined if (VALGRIND_CHECK_MEM_IS_DEFINED(&dstArray, sizeof(CUarray))) { error++; VALGRIND_PRINTF("Error: dstArray in call to cuMemcpyAtoD is not defined.\n"); } else if (!dstArray) { error++; VALGRIND_PRINTF("Error: dstArray in call to cuMemcpyAtoD is NULL.\n"); } if (VALGRIND_CHECK_MEM_IS_DEFINED(&dstOffset, sizeof(size_t))) { error++; VALGRIND_PRINTF("Error: dstOffset in call to cuMemcpyAtoD is not defined.\n"); } if (VALGRIND_CHECK_MEM_IS_DEFINED(&srcHost, sizeof(CUdeviceptr))) { error++; VALGRIND_PRINTF("Error: srcHost in call to cuMemcpyAtoD is not defined.\n"); } else if (!srcHost) { error++; VALGRIND_PRINTF("Error: srcDevice in call to cuMemcpyAtoD is NULL"); } if (VALGRIND_CHECK_MEM_IS_DEFINED(&ByteCount, sizeof(size_t))) { error++; VALGRIND_PRINTF("Error: ByteCount in call to cuMemcpyAtoD is not defined.\n"); } cgGetCtx(&ctx); nodeCtx = cgFindCtx(ctx); nodeArrDst = cgFindArr(nodeCtx, dstArray); if (srcHost) { vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(srcHost, ByteCount); // Check if memory referenced by srcHost has been allocated. if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: Source host memory in cuMemcpyHtoA is not not allocated.\n" " Expected %l bytes but only found %l.\n", ByteCount, vgErrorAddress - (long)srcHost); } else { // If allocated, now check if host memory is defined. vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(srcHost, ByteCount); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: Source host memory in cuMemcpyHtoA is not defined.\n" " Expected %l bytes but only found %l.\n", ByteCount, vgErrorAddress - (long)srcHost); } } } if (nodeArrDst) { // Check if array is 1-dimensional or big enough in first dimension if (nodeArrDst->desc.Height > 1 || nodeArrDst->desc.Depth > 1) { if (nodeArrDst->desc.Width - dstOffset < ByteCount) { error++; VALGRIND_PRINTF("Error: Destination array in cuMemcpyAtoD is 2-dimensional\n" " and ByteCount bigger than available width in first dimension.\n"); } else { VALGRIND_PRINTF("Warning: Destination array in cuMemcpyAtoD is 2-dimensional.\n"); } } else if (nodeArrDst->desc.Width - dstOffset < ByteCount) { // If array is 1D, check size. VALGRIND_PRINTF("Error: Destination array in cuMemcpyAtoD is too small.\n" " Expected %l bytes but only found %l.\n", ByteCount, nodeArrDst->desc.Width - dstOffset); error++; } } else { error++; VALGRIND_PRINTF("Error: Destination array not allocated in call to cuMemcpyAtoD.\n"); } cgUnlock(); return result; }
// Copy Host->Device CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyHtoD)(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount) { OrigFn fn; CUresult result; CUcontext ctx = NULL; cgCtxListType *nodeCtx; cgMemListType *nodeMem; size_t dstSize; long vgErrorAddress, vgErrorAddressDstDevice, vgErrorAddressSrcHost; VALGRIND_GET_ORIG_FN(fn); cgLock(); vgErrorAddressDstDevice = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(void*)); vgErrorAddressSrcHost = VALGRIND_CHECK_MEM_IS_DEFINED(&srcHost, sizeof(CUdeviceptr)); // TODO: Currently errors are exclusive .. i.e. with undefined src and NULL // dst pointer, only the undefined pointer is reported. if (vgErrorAddressDstDevice || vgErrorAddressSrcHost) { VALGRIND_PRINTF("Error:"); if (vgErrorAddressDstDevice) { VALGRIND_PRINTF(" destination device"); if (vgErrorAddressSrcHost) { VALGRIND_PRINTF(" and"); } } if (vgErrorAddressSrcHost) { VALGRIND_PRINTF(" source host"); } VALGRIND_PRINTF_BACKTRACE(" pointer in cuMemcpyHtoD not defined.\n"); } else if (dstDevice != 0 && srcHost != NULL) { cgGetCtx(&ctx); // Check allocation status and available size on device nodeCtx = cgFindCtx(ctx); nodeMem = cgFindMem(nodeCtx, dstDevice); if (!nodeMem) { VALGRIND_PRINTF("Error: Device memory during host->device memory copy is not allocated."); } else { dstSize = nodeMem->size - (dstDevice - nodeMem->dptr); if (dstSize < ByteCount) { VALGRIND_PRINTF("Error: Allocated device memory too small for host->device memory copy.\n"); VALGRIND_PRINTF(" Expected %lu allocated bytes but only found %lu.", ByteCount, dstSize); } } if (!nodeMem || dstSize < ByteCount) { VALGRIND_PRINTF_BACKTRACE("\n"); } // Check allocation and definedness for host memory vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(srcHost, ByteCount); if (vgErrorAddress) { VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not allocated.\n"); VALGRIND_PRINTF(" Expected %lu allocated bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost); } else { vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(srcHost, ByteCount); if (vgErrorAddress) { VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not defined.\n"); VALGRIND_PRINTF(" Expected %lu defined bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost); } } if (vgErrorAddress) { VALGRIND_PRINTF_BACKTRACE("\n"); } } else { VALGRIND_PRINTF("Error: cuMemcpyHtoD called with NULL"); if (dstDevice == 0) { VALGRIND_PRINTF(" device"); if (srcHost == NULL) VALGRIND_PRINTF(" and"); } if (srcHost == NULL) { VALGRIND_PRINTF(" host"); } VALGRIND_PRINTF_BACKTRACE(" pointer.\n"); } CALL_FN_W_WWW(result, fn, dstDevice, srcHost, ByteCount); cgUnlock(); return result; }
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD16)(CUdeviceptr dstDevice, unsigned short us, size_t N) { OrigFn fn; CUresult result; CUcontext ctx = NULL; cgMemListType *nodeMemDst; int error = 0; long vgErrorAddress; size_t dstSize; VALGRIND_GET_ORIG_FN(fn); cgLock(); CALL_FN_W_WWW(result, fn, dstDevice, us, N); // Check if function parameters are defined. // TODO: Warning or error in case of a partially undefined us? vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD16 not defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&us, sizeof(us)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Warning: 'us' in call to cuMemsetD16 is not fully defined.\n"); } vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&N, sizeof(size_t)); if (vgErrorAddress) { error++; VALGRIND_PRINTF("Error: 'N' in call to cuMemsetD16 not defined.\n"); } // Fetch current context cgGetCtx(&ctx); nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice); // Check if memory has been allocated if (!nodeMemDst) { error++; VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD16.\n"); } else { // If memory is allocated, check size of available memory dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr); if (dstSize < sizeof(unsigned short) * N) { error++; VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD16.\n" " Expected %lu allocated bytes but only found %lu.\n", sizeof(unsigned short) * N, dstSize); } // Check if pointer is properly two byte aligned. // TODO: Is this a valid check? if (dstDevice % 2) { error++; VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD16 not two byte aligned.\n"); } } if (error) { VALGRIND_PRINTF_BACKTRACE(""); } cgUnlock(); return result; }