// TODO: Can we do additional checks?
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuArrayGetDescriptor)(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray) {
   OrigFn         fn;
   CUresult       result;
   CUcontext      ctx = NULL;
   cgCtxListType  *ctxNode;
   cgArrListType  *node;
   
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_WW(result, fn, pArrayDescriptor, hArray);
   
   // Determine context of current thread ..
   cgGetCtx(&ctx);
   // .. locate the respective ctx node ..
   ctxNode = cgFindCtx(ctx);
   // .. and finally locate the array in the context's list of arrays.
   node = cgFindArr(ctxNode, hArray);
   
   if (result == CUDA_SUCCESS && !node) {
      VALGRIND_PRINTF("cuArrayGetDescriptor returned successfully, but array not found\n");
      VALGRIND_PRINTF_BACKTRACE("   in cudagrind's internal list. Reason: Unknown\n");
   } else if (result != CUDA_SUCCESS && node) {
      VALGRIND_PRINTF("cuArrayGetDescriptor returned with error code: %d,\n", result);
      VALGRIND_PRINTF_BACKTRACE("   but array is found in cudagrind's internal list.\n");
   } else if (result != CUDA_SUCCESS) {
      VALGRIND_PRINTF("cuArrayGetDescriptor returned with error code: %d,\n", result);
      VALGRIND_PRINTF_BACKTRACE("   possible reason: Wrong context or array not previously created.\n");
   }
   
   cgUnlock();
   return result;
}
Example #2
0
// Helper function that fetches the current CUDA context
void cgGetCtx(CUcontext *ctx) {
   CUresult res;
   
   res = cuCtxGetCurrent(ctx);
   if (res != CUDA_SUCCESS) {
      VALGRIND_PRINTF_BACKTRACE("Error: Retrieving CUDA context in VG-wrapper failed.\n"
      );
   } else if (*ctx == NULL) {
      VALGRIND_PRINTF_BACKTRACE("Error: Retrieved NULL context in Valgrind wrapper.\n:"
      );
   }
}
Example #3
0
void valgrindClearRange(MM_GCExtensionsBase *extensions, uintptr_t baseAddress, uintptr_t size)
{
    if (size == 0)
    {
        return;
    }
    uintptr_t topInclusiveAddr = baseAddress + size - 1;

#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Clearing objects in range b/w 0x%lx and  0x%lx\n", baseAddress, topInclusiveAddr);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    MUTEX_ENTER(extensions->memcheckHashTableMutex);
    GC_HashTableIterator it(extensions->memcheckHashTable);
    uintptr_t *currentSlotPointer = (uintptr_t *)it.nextSlot();
    while (currentSlotPointer != NULL)
    {
        if (baseAddress <= *currentSlotPointer && topInclusiveAddr >= *currentSlotPointer)
        {
            valgrindFreeObjectDirect(extensions, *currentSlotPointer);
            it.removeSlot();
        }
        currentSlotPointer = (uintptr_t *)it.nextSlot();
    }
    MUTEX_EXIT(extensions->memcheckHashTableMutex);

    /* Valgrind automatically marks free objects as noaccess.
    We still mark the entire region as no access for any left out areas */
    valgrindMakeMemNoaccess(baseAddress, size);
}
Example #4
0
/*
 * Removes the entry of array referenced by dptr from ctxNode's array list.
 *
 * Input:
 *   cgCtxListType *ctxNode - The node from which dptr is to be removed
 *   CUarray       dptr     - The device pointer of the to be removed array entry
 */
void cgCtxDelArr(cgCtxListType *ctxNode, CUarray dptr) {
   cgArrListType *toFree, *node = ctxNode->array;
   int deleted = 0;
   // Run through list of memory segments and remove it if it's found
   if (node) {
      if (node->dptr == dptr) {
         ctxNode->array = node->next;
         toFree = node;
         deleted = 1;
      } else {
         while (node->next && node->next->dptr != dptr) {
            node = node->next;
         }
         // If node->next is not NULL it has to contain dptr now
         if (node->next) {
            toFree = node->next;
            node->next = node->next->next;
            deleted = 1;
         }
      }
   }
   // Print error if the to be deletec device pointer can not be found
   if (!deleted) {
      VALGRIND_PRINTF("Error: Tried to remove non-existant device array reference in cgCtxDelArr.\n");
      VALGRIND_PRINTF_BACKTRACE("Possible reason: Wrong CUDA context or double free on device array pointer.\n");
   } else { // Else free the memory used by the node ..
      free(toFree);
      // TODO: Also remove the context entry if it's empty? And where?
   }
}
Example #5
0
/*
 * Adds an entry for the array referenced by dptr with given descriptor desc to the given context node ctxNode.
 *
 * cgCtxLisType *ctxNode       - Node containing memory information for context
 * CUarray      dptr           - Device array reference (1D,2D or 3D)
 * CUDA_ARRAY3D_DESCRIPTOR desc- Descripter array referenced by dptr
 */
void cgCtxAddArr(cgCtxListType *ctxNode, CUarray dptr, CUDA_ARRAY3D_DESCRIPTOR desc) {
   cgArrListType *node;
   // Create new entry if list is still empty
   if (!(ctxNode->array)) {
      ctxNode->array = (cgArrListType*)malloc(sizeof(cgArrListType));
      node = ctxNode->array;
      node->dptr = dptr;
      node->desc = desc;
      node->locked = 0;
      // Do not have to set node->stream here
      node->next = NULL;
   } else {
      node = ctxNode->array;
      while (node->next && node->dptr != dptr) {
         node = node->next;
      }
      if (node->dptr != dptr) {
         node->next = (cgArrListType*)malloc(sizeof(cgArrListType));
         node = node->next;
         node->dptr = dptr;
         node->desc = desc;
         node->locked = 0;
         // Do not have to set node->stream here
         node->next = NULL;
      } else {
         VALGRIND_PRINTF("Error: Tried to add already existing array reference in cgCtxAddArr.\n");
         VALGRIND_PRINTF_BACKTRACE("Possible reason: Unknown. This should not have happened!");
      }
   }
}
Example #6
0
/*
 * Adds an entry for the device memory referenced by dptr of given size to the given context node ctxNode.
 *
 * cgCtxLisType *ctxNode - Node containing memory information for context
 * CUdeviceptr  dptr     - Device pointer to memory
 * size_t       size     - Size of memory referenced by dptr
 */
void cgCtxAddMem(cgCtxListType *ctxNode, CUdeviceptr dptr, size_t size) {
   cgMemListType *node;
   // Create new entry if list is still empty
   if (!(ctxNode->memory)) {
      ctxNode->memory = (cgMemListType*)malloc(sizeof(cgMemListType));
      node = ctxNode->memory;
      node->dptr = dptr;
      node->isSymbol = 0;
      node->size = size;
      node->locked = 0;
      // Do not have to set node->stream here
      node->next = NULL;
   } else {
      node = ctxNode->memory;
      while (node->next && node->dptr != dptr) {
         node = node->next;
      }
      if (node->dptr != dptr) {
         node->next = (cgMemListType*)malloc(sizeof(cgMemListType));
         node = node->next;
         node->dptr = dptr;
         node->isSymbol = 0;
         node->size = size;
         node->locked = 0;
         // Do not have to set node->stream here
         node->next = NULL;
      } else {
         VALGRIND_PRINTF("Error: Tried to add already existing device pointer in cgCtxAddMem.\n");
         VALGRIND_PRINTF_BACKTRACE("Possible reason: Unknown. This should not have happened!");
      }
   }
}
Example #7
0
void valgrindFreeObject(MM_GCExtensionsBase *extensions, uintptr_t baseAddress)
{
    int objSize;
    if (MM_ForwardedHeader((omrobjectptr_t)baseAddress).isForwardedPointer())
    {
        /* In scavanger an object may act as pointer to another object(it's replica in another region).
           In this case, getConsumedSizeInBytesWithHeader returns some junk value.
           So instead we calculate the size of the object (replica) it is pointing to 
           and use it for freeing original object.
        */
        omrobjectptr_t fwObject = MM_ForwardedHeader((omrobjectptr_t)baseAddress).getForwardedObject();
        objSize = (int)((GC_ObjectModel)extensions->objectModel).getConsumedSizeInBytesWithHeader(fwObject);
    }
    else
    {
        objSize = (int)((GC_ObjectModel)extensions->objectModel).getConsumedSizeInBytesWithHeader((omrobjectptr_t)baseAddress);
    }

#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Clearing an object at 0x%lx of size %d\n", baseAddress, objSize);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    VALGRIND_CHECK_MEM_IS_DEFINED(baseAddress, objSize);
    VALGRIND_MEMPOOL_FREE(extensions->valgrindMempoolAddr, baseAddress);

    MUTEX_ENTER(extensions->memcheckHashTableMutex);
    hashTableRemove(extensions->memcheckHashTable, &baseAddress);
    MUTEX_EXIT(extensions->memcheckHashTableMutex);
}
Example #8
0
void valgrindMakeMemDefined(uintptr_t address, uintptr_t size)
{
#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Marking an area as defined at 0x%lx of size %lu\n", address, size);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    VALGRIND_MAKE_MEM_DEFINED(address, size);
}
Example #9
0
void valgrindMakeMemNoaccess(uintptr_t address, uintptr_t size)
{

#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Marking an area as noaccess at 0x%lx of size %lu\n", address, size);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    VALGRIND_MAKE_MEM_NOACCESS(address, size);
}
Example #10
0
MMINLINE void valgrindFreeObjectDirect(MM_GCExtensionsBase *extensions, uintptr_t baseAddress)
{
    int objSize = (int)((GC_ObjectModel)extensions->objectModel).getConsumedSizeInBytesWithHeader((omrobjectptr_t)baseAddress);

#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Clearing an object at 0x%lx of size %d\n", baseAddress, objSize);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    VALGRIND_CHECK_MEM_IS_DEFINED(baseAddress, objSize);
    VALGRIND_MEMPOOL_FREE(extensions->valgrindMempoolAddr, baseAddress);
}
int
main (int argc, char **argv)
{
   int x = 0;
   x += VALGRIND_PRINTF("Yo ");
   x += VALGRIND_PRINTF("Yo ");
   x += VALGRIND_PRINTF("Ma\n");
   fprintf(stderr, "%d\n", x);
   x  = VALGRIND_PRINTF_BACKTRACE("Backtrace line one\nLine two:\n");
   fprintf(stderr, "%d\n", x);
   return 0;
}
Example #12
0
void valgrindMempoolAlloc(MM_GCExtensionsBase *extensions, uintptr_t baseAddress, uintptr_t size)
{
#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Allocating an object at 0x%lx of size %lu\n", baseAddress, size);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    /* Allocate object in Valgrind memory pool. */
    VALGRIND_MEMPOOL_ALLOC(extensions->valgrindMempoolAddr, baseAddress, size);
    MUTEX_ENTER(extensions->memcheckHashTableMutex);
    hashTableAdd(extensions->memcheckHashTable, &baseAddress);
    MUTEX_EXIT(extensions->memcheckHashTableMutex);
}
Example #13
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyDtoDAsync)(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) {
   int error = 0;
   long vgErrorAddress;
   
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&hStream, sizeof(CUstream));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'hStream' in call to cuMemcpyDtoDAsync not defined.\n");
   }

   cgLock();
   
   CUcontext ctx = NULL;
   cgCtxListType *nodeCtx;
   cgMemListType *nodeMemDst, *nodeMemSrc;
   
   
   // Get current context ..
   cgGetCtx(&ctx);
   nodeCtx = cgFindCtx(ctx);
   
   // .. and locate memory if we are handling device memory
   nodeMemDst = cgFindMem(nodeCtx, dstDevice);
   nodeMemSrc = cgFindMem(nodeCtx, srcDevice);
   
   if (nodeMemDst && nodeMemDst->locked & 2 && nodeMemDst->stream != hStream) {
      error++;
      VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n");
   }
   if (nodeMemSrc && nodeMemSrc->locked && nodeMemSrc->stream != hStream) {
      error++;
      VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n");
   }
   
   if (nodeMemDst) {
      nodeMemDst->locked = nodeMemDst->locked | 2;
      nodeMemDst->stream = hStream;
   }
   if (nodeMemSrc) {
      nodeMemSrc->locked = nodeMemSrc->locked | 1;
      nodeMemSrc->stream = hStream;
   }
   
   cgUnlock();
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   return cuMemcpyDtoD(dstDevice, srcDevice, ByteCount);
}
Example #14
0
void valgrindResizeObject(MM_GCExtensionsBase *extensions, uintptr_t baseAddress, uintptr_t oldSize, uintptr_t newSize)
{

#if defined(VALGRIND_REQUEST_LOGS)
    VALGRIND_PRINTF_BACKTRACE("Resizing an object at 0x%lx from size %d to %d\n", baseAddress, (int)oldSize, (int)newSize);
#endif /* defined(VALGRIND_REQUEST_LOGS) */

    /* We could have used VALGRIND_MEMPOOL_CHANGE request to let Valgrind know of moved object
    but it is very slow without an internal hack. (https://bugs.kde.org/show_bug.cgi?id=366817)*/
    // VALGRIND_CHECK_MEM_IS_DEFINED(baseAddress, oldSize);

    /* Valgrind already knows former size of object allocated at baseAddress. So it will 
    mark the area from baseAddress to oldSize-1 noaccess on a free request as desired*/
    VALGRIND_MEMPOOL_FREE(extensions->valgrindMempoolAddr, baseAddress);

    /* And we don't need to remove and add same address in extensions->_allocatedObjects */
    VALGRIND_MEMPOOL_ALLOC(extensions->valgrindMempoolAddr, baseAddress, newSize);
}
Example #15
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuArrayDestroy)(CUarray hArray) {
   OrigFn      fn;
   CUresult    result;
   CUcontext   ctx = NULL;
   
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   
   if (hArray != 0) {
      cgGetCtx(&ctx);
      cgDelArr(ctx, hArray);
   } else {
      VALGRIND_PRINTF_BACKTRACE("Error: cuArrayDestroy called with invalid NULL pointer.\n");
   }
   CALL_FN_W_W(result, fn, hArray);
   
   cgUnlock();
   return result;
}
Example #16
0
/**
 * When running under Valgrind, check whether all bytes in the range [packet,
 * packet+length) are defined. Let Valgrind print a backtrace if one or more
 * bytes with uninitialized values have been found. This function can help to
 * find the cause of undefined value errors if --track-origins=yes is not
 * sufficient. Does nothing when not running under Valgrind.
 *
 * Note: this requires a fairly recent valgrind.
 */
void
netsnmp_check_definedness(const void *packet, size_t length)
{
#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__)   \
    && (__VALGRIND_MAJOR__ > 3                                   \
        || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6))

    if (RUNNING_ON_VALGRIND) {
        int i;
        char vbits;

        for (i = 0; i < length; ++i) {
            if (VALGRIND_GET_VBITS((const char *)packet + i, &vbits, 1) == 1
                && vbits)
                VALGRIND_PRINTF_BACKTRACE("Undefined: byte %d/%d", i,
                                          (int)length);
        }
    }

#endif
}
Example #17
0
// Copy Host->Device
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyHtoD)(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgCtxListType *nodeCtx;
   cgMemListType *nodeMem;
   size_t dstSize;
   long vgErrorAddress, vgErrorAddressDstDevice, vgErrorAddressSrcHost;
   
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();

   vgErrorAddressDstDevice = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(void*));
   vgErrorAddressSrcHost   = VALGRIND_CHECK_MEM_IS_DEFINED(&srcHost, sizeof(CUdeviceptr));
   // TODO: Currently errors are exclusive .. i.e. with undefined src and NULL
   //       dst pointer, only the undefined pointer is reported.
   if (vgErrorAddressDstDevice || vgErrorAddressSrcHost) {
      VALGRIND_PRINTF("Error:");
      if (vgErrorAddressDstDevice) {
         VALGRIND_PRINTF(" destination device");
         if (vgErrorAddressSrcHost) {
            VALGRIND_PRINTF(" and");
         }
      }
      if (vgErrorAddressSrcHost) {
         VALGRIND_PRINTF(" source host");
      }
      VALGRIND_PRINTF_BACKTRACE(" pointer in cuMemcpyHtoD not defined.\n");
   } else if (dstDevice != 0 && srcHost != NULL) {
      cgGetCtx(&ctx);
      // Check allocation status and available size on device
      nodeCtx = cgFindCtx(ctx);
      nodeMem = cgFindMem(nodeCtx, dstDevice);
      if (!nodeMem) {
         VALGRIND_PRINTF("Error: Device memory during host->device memory copy is not allocated.");
      } else {
         dstSize = nodeMem->size - (dstDevice - nodeMem->dptr);
         if (dstSize < ByteCount) {
            VALGRIND_PRINTF("Error: Allocated device memory too small for host->device memory copy.\n");
            VALGRIND_PRINTF("       Expected %lu allocated bytes but only found %lu.", ByteCount, dstSize);
         }
      }
      if (!nodeMem || dstSize < ByteCount) {
         VALGRIND_PRINTF_BACKTRACE("\n");
      }
      // Check allocation and definedness for host memory
      vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(srcHost, ByteCount);
      if (vgErrorAddress) {
         VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not allocated.\n");
         VALGRIND_PRINTF("       Expected %lu allocated bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost);
      } else {
         vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(srcHost, ByteCount);
         if (vgErrorAddress) {
            VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not defined.\n");
            VALGRIND_PRINTF("       Expected %lu defined bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost);
         }
      }
      if (vgErrorAddress) {
         VALGRIND_PRINTF_BACKTRACE("\n");
      }
   } else {
      VALGRIND_PRINTF("Error: cuMemcpyHtoD called with NULL");
      if (dstDevice == 0) {
	       VALGRIND_PRINTF(" device");
	       if (srcHost == NULL) VALGRIND_PRINTF(" and");
	   }
	   if (srcHost == NULL) {
	      VALGRIND_PRINTF(" host");
	   }
	   VALGRIND_PRINTF_BACKTRACE(" pointer.\n");
   }
   
   CALL_FN_W_WWW(result, fn, dstDevice, srcHost, ByteCount);
   cgUnlock();
   return result;
}
Example #18
0
static void panic(const char *str)
{
   VALGRIND_PRINTF_BACKTRACE("Program aborting because of call to %s", str);
   _exit(99);
   *(int *)0 = 'x';
}
Example #19
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD2D32)(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgMemListType *nodeMemDst;
   
   int error = 0;
   long vgErrorAddress;
   size_t dstSize;

   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_5W(result, fn, dstDevice, dstPitch, ui, Width, Height);
   
   // Check if function parameters are defined.
   // TODO: Warning or error in case of a partially undefined ui?
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstPitch, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstPitch' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&ui, sizeof(ui));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Warning: 'ui' in call to cuMemsetD2D32 is not fully defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Width, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'Width' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Height, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'Height' in call to cuMemsetD2D32 not defined.\n");
   }
   
   // Fetch current context
   cgGetCtx(&ctx);
   nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice);
   
   // Check if memory has been allocated
   if (!nodeMemDst) {
      error++;
      VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD2D32.\n");
   } else {
      // If memory is allocated, check size of available memory
      dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr);
      // The whole memory block of dstPitch*Height must fit into memory
      if (dstSize < sizeof(ui) * dstPitch * Height) {
         error++;
         VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD2D32.\n"
                         "       Expected %lu allocated bytes but only found %lu.\n",
                         sizeof(ui) * dstPitch * Height, dstSize);
      }
      
      // Check if dstDevice and dstPitch are both properly aligned
      // TODO: Is this a valid check? (see also cuMemsetD32)
      if (dstDevice % 4) {
         error++;
         VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD2D32 not four byte aligned.\n");
      }
      if (dstPitch % 4) {
         error++;
         VALGRIND_PRINTF("Error: Destination pitch in call to cuMemsetD2D32 not four byte aligned.\n");
      }
   }
   
   // Make sure pitch is big enough to accommodate asked for Width
   if (dstPitch < Width) {
      error++;
      VALGRIND_PRINTF("Error: dstPitch smaller than Width in call to cuMemsetD2D32.\n");
   }
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   cgUnlock();
   return result;
}
Example #20
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpy3D)(const CUDA_MEMCPY3D *pCopy) {
   OrigFn      fn;
   CUresult    result;
   CUcontext  ctx = NULL;
   int error = 0, error_addressable, error_defined;
   long vgErrorAddress = 0, vgErrorAddressDefined = 0;
   
     
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_W(result, fn, pCopy);

   // Check if pCopy is null, not allocated or undefined.
   // For obvious reasons we skip the following checks if either condition is true.
   if (!pCopy) {
      error++;
      VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D is NULL.\n");
      cgUnlock();
      return result;
   } else if ( vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(pCopy, sizeof(CUDA_MEMCPY3D)) ) {
      error++;
      VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D points to unallocated memory.\n");
      cgUnlock();
      return result;
   } // It makes no sense to check _IS_DEFINED on the whole structure, since only part of it is used!
   
   // General checks of constaints imposed by reference manual
   if (pCopy->srcMemoryType != CU_MEMORYTYPE_ARRAY) {
      if (pCopy->srcPitch && pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes) {
         error++;
         VALGRIND_PRINTF("Error: srcPitch < WidthInBytes+srcXInBytes in cuMemcpy3D.\n");
      }
      if (pCopy->srcHeight && pCopy->srcHeight < pCopy->Height + pCopy->srcY) {
         error++;
         VALGRIND_PRINTF("Error: srcHeight < Height+srcY in cuMemcpy3D.\n");
      }
   }
   if (pCopy->dstMemoryType != CU_MEMORYTYPE_ARRAY) {
      if (pCopy->dstPitch && pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes) {
         error++;
         VALGRIND_PRINTF("Error: dstPitch < WidthInBytes+dstXInBytes in cuMemcpy3D.\n");
      }
      if (pCopy->dstHeight && pCopy->dstHeight < pCopy->Height + pCopy->dstY) {
         error++;
         VALGRIND_PRINTF("Error: dstHeight < Height+dstY in cuMemcpy3D.\n");
      }
   }
   switch (pCopy->srcMemoryType) {
      case CU_MEMORYTYPE_UNIFIED:
         // TODO: How do we handle unified memory?
         break;
      case CU_MEMORYTYPE_HOST: {
         void *line;
      
         error_addressable = 0;
         error_defined = 0;
         // TODO: Is Height, Depth > 1, even for 1D/2D copy operations?   
         for (int i = 0 ; i < pCopy->Height ; i++) {
            for (int j = 0 ; j < pCopy->Depth ; j++) {
               line = (void*)(
                        (char*)pCopy->srcHost 
                        + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i))*pCopy->srcPitch 
                        + pCopy->srcXInBytes
                     );
               vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes);
               if (vgErrorAddress) {
                  error_addressable++;
               } else {
                  vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(line, (size_t)pCopy->WidthInBytes);
                  if (vgErrorAddress) {
                     error_defined++;
                  }
               }
            }
         }
         // TODO: Can we give precise information about location of error?
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source host memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         if (error_defined) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source host memory not defined\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_DEVICE: {
         // ptrEnd points to the end of the memory area which pCopy->srcDevice points into
         CUdeviceptr line, ptrEnd;
         cgMemListType *nodeMem;
         
         // TODO: Check if pCopy->srcDevice is defined?
         cgGetCtx(&ctx);
         nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->srcDevice);
         
         // We only track addressable status (whether memory is allocated) for device memory regions
         error_addressable = 0;
         if (nodeMem) {
            ptrEnd = nodeMem->dptr + nodeMem->size;
            /*
            for (int i = 0 ; i < pCopy->Height ; i++) {
               for (int j = 0 ; j < pCopy->Depth ; j++) {
                  line = (CUdeviceptr)(
                           pCopy->srcDevice
                           + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i)) * pCopy->srcPitch
                           + pCopy->srcXInBytes
                         );
                  
                  // Is there enough allocated memory left to statisfy the current line?
                  if (ptrEnd - line < pCopy->WidthInBytes) {
                     error_addressable++;
                  }
               }
            }
            */
            
            // Device memory should not be fragmented, so we only check the very last slice of memory
            line = (CUdeviceptr)(
                     pCopy->srcDevice 
                     + (
                        (pCopy->srcZ + pCopy->Depth - 1) * pCopy->srcHeight 
                        + (pCopy->srcY + pCopy->Height - 1)
                       ) * pCopy->srcPitch 
                     + pCopy->srcXInBytes);
            if (ptrEnd - line < pCopy->WidthInBytes) {
               error_addressable++;
            }
         } else {
            error_addressable++;
         }
         
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source device memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_ARRAY: {
         CUDA_ARRAY3D_DESCRIPTOR descriptor;
         int bytesPerElement;
         int widthInBytes;
         
         // Fetch array descriptor ..
         cuArray3DGetDescriptor(&descriptor, pCopy->srcArray);
         bytesPerElement = cgArrDescBytesPerElement(&descriptor);
         if (!bytesPerElement) {
            error++;
            VALGRIND_PRINTF("Error: Unknown Format value in src array descriptor in cuMemcpy3D.\n");
         }
         widthInBytes = bytesPerElement * descriptor.Width;
         // .. and check if dimensions are conform to the ones requested in pCopy
         if (widthInBytes - pCopy->srcXInBytes < pCopy->WidthInBytes) {
            error++;
            VALGRIND_PRINTF("Error: Available width of %u bytes in source array is smaller than\n"
                            "       requested Width of %u bytes in pCopy of cuMemcpy3D.\n", 
                                    widthInBytes - pCopy->srcXInBytes, pCopy->WidthInBytes);
         }
         if (pCopy->Height > 1 && descriptor.Height - pCopy->srcY < pCopy->Height) {
            error++;
            VALGRIND_PRINTF("Error: Available Height of %u in source array is smaller than\n"
                            "       requested Height of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Height - pCopy->srcY, pCopy->Height);
         }
         if (pCopy->Depth > 1 && descriptor.Depth - pCopy->srcZ < pCopy->Depth) {
            error++;
            VALGRIND_PRINTF("Error: Available Depth of %u in source array is smaller than\n"
                            "       requested Depth of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Depth - pCopy->srcY, pCopy->Height);
         }
         break;
      }
      default:
         error++;
         VALGRIND_PRINTF("Error: Unknown source memory type %d in cuMemcpy3D\n");
         break;
   }
   
   switch (pCopy->dstMemoryType) {
      case CU_MEMORYTYPE_UNIFIED:
         // TODO: How do we handle unified memory?
         break;
      case CU_MEMORYTYPE_HOST: {
         void *line;
         
         error_addressable = 0;
         error_defined = 0;
         // TODO: Is Height, Depth > 1, even for 1D/2D copy operations?
         for (int i = 0 ; i < pCopy->Height ; i++) {
            for (int j = 0 ; j < pCopy->Depth ; j++) {
               line = (void*)(
                        (char*)pCopy->dstHost 
                        + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i))*pCopy->dstPitch 
                        + pCopy->dstXInBytes
                     );
               // Unlike for the source operand we only need to check allocation status here
               vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes);
               if (vgErrorAddress) {
                  error_addressable++;
               }
            }
         }
         // TODO: Can we give precise information about location of error?
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) destination host memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_DEVICE: {
         // ptrEnd points to the end of the memory area which pCopy->dstDevice points into
         CUdeviceptr line, ptrEnd;
         cgMemListType *nodeMem;
         
         // TODO: Check if pCopy->dstDevice is defined?
         cgGetCtx(&ctx);
         nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->dstDevice);
         
         // We only track addressable status (whether memory is allocated) for device memory regions
         error_addressable = 0;
         if (nodeMem) {
            ptrEnd = nodeMem->dptr + nodeMem->size;
            /*
            for (int i = 0 ; i < pCopy->Height ; i++) {
               for (int j = 0 ; j < pCopy->Depth ; j++) {
                  line = (CUdeviceptr)(
                           pCopy->dstDevice
                           + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i)) * pCopy->dstPitch
                           + pCopy->dstXInBytes
                         );
                  
                  // Is there enough allocated memory left to statisfy the current line?
                  if (ptrEnd - line < pCopy->WidthInBytes) {
                     error_addressable++;
                  }
               }
            }
            */
            
            // Device memory should not be fragmented, so we only check the very last slice of memory
            line = (CUdeviceptr)(
                     pCopy->dstDevice 
                     + (
                        (pCopy->dstZ + pCopy->Depth - 1) * pCopy->dstHeight 
                        + (pCopy->dstY + pCopy->Height - 1)
                       ) * pCopy->dstPitch 
                     + pCopy->dstXInBytes);
            if (ptrEnd - line < pCopy->WidthInBytes) {
               error_addressable++;
            }
         } else {
            error_addressable++;
         }
         
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) destination device memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_ARRAY: {
         CUDA_ARRAY3D_DESCRIPTOR descriptor;
         int bytesPerElement;
         int widthInBytes;
         
         // Fetch array descriptor ..
         cuArray3DGetDescriptor(&descriptor, pCopy->dstArray);
         bytesPerElement = cgArrDescBytesPerElement(&descriptor);
         if (!bytesPerElement) {
               error++;
               VALGRIND_PRINTF("Error: Unknown Format value in dst array descriptor in cuMemcpy3D.\n");
         }
         widthInBytes = bytesPerElement * descriptor.Width;
         // .. and check if dimensions are conform to the ones requested in pCopy
         if (widthInBytes - pCopy->dstXInBytes < pCopy->WidthInBytes) {
            error++;
            VALGRIND_PRINTF("Error: Available width of %u bytes in destination array is smaller than\n"
                            "       requested Width of %u bytes in pCopy of cuMemcpy3D.\n", 
                                    widthInBytes - pCopy->dstXInBytes, pCopy->WidthInBytes);
         }
         if (pCopy->Height > 1 && descriptor.Height - pCopy->dstY < pCopy->Height) {
            error++;
            VALGRIND_PRINTF("Error: Available Height of %u in destination array is smaller than\n"
                            "       requested Height of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Height - pCopy->dstY, pCopy->Height);
         }
         if (pCopy->Depth > 1 && descriptor.Depth - pCopy->dstZ < pCopy->Depth) {
            error++;
            VALGRIND_PRINTF("Error: Available Depth of %u in destination array is smaller than\n"
                            "       requested Depth of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Depth - pCopy->dstZ, pCopy->Depth);
         }
         break;
      }
      default:
         error++;
         VALGRIND_PRINTF("Error: Unknown destination memory type %d in cuMemcpy3D\n");
         break;
   }
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("   %d errors detected in call to cuMemcpy3D.", error);
   }
   
   cgUnlock();
   return result;
}
void
_dbus_trace_ref (const char *obj_name,
                 void       *obj,
                 int         old_refcount,
                 int         new_refcount,
                 const char *why,
                 const char *env_var,
                 int        *enabled)
{
  _dbus_assert (obj_name != NULL);
  _dbus_assert (obj != NULL);
  _dbus_assert (old_refcount >= -1);
  _dbus_assert (new_refcount >= -1);

  if (old_refcount == -1)
    {
      _dbus_assert (new_refcount == -1);
    }
  else
    {
      _dbus_assert (new_refcount >= 0);
      _dbus_assert (old_refcount >= 0);
      _dbus_assert (old_refcount > 0 || new_refcount > 0);
    }

  _dbus_assert (why != NULL);
  _dbus_assert (env_var != NULL);
  _dbus_assert (enabled != NULL);

  if (*enabled < 0)
    {
      const char *s = _dbus_getenv (env_var);

      *enabled = FALSE;

      if (s && *s)
        {
          if (*s == '0')
            *enabled = FALSE;
          else if (*s == '1')
            *enabled = TRUE;
          else
            _dbus_warn ("%s should be 0 or 1 if set, not '%s'", env_var, s);
        }
    }

  if (*enabled)
    {
      if (old_refcount == -1)
        {
          VALGRIND_PRINTF_BACKTRACE ("%s %p ref stolen (%s)",
                                     obj_name, obj, why);
          _dbus_verbose ("%s %p ref stolen (%s)\n",
                         obj_name, obj, why);
        }
      else
        {
          VALGRIND_PRINTF_BACKTRACE ("%s %p %d -> %d refs (%s)",
                                     obj_name, obj,
                                     old_refcount, new_refcount, why);
          _dbus_verbose ("%s %p %d -> %d refs (%s)\n",
                         obj_name, obj, old_refcount, new_refcount, why);
        }
    }
}
Example #22
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD16)(CUdeviceptr dstDevice, unsigned short us, size_t N) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgMemListType *nodeMemDst;
   
   int error = 0;
   long vgErrorAddress;
   size_t dstSize;

   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_WWW(result, fn, dstDevice, us, N);
   
   // Check if function parameters are defined.
   // TODO: Warning or error in case of a partially undefined us?
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD16 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&us, sizeof(us));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Warning: 'us' in call to cuMemsetD16 is not fully defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&N, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'N' in call to cuMemsetD16 not defined.\n");
   }
   
   
   // Fetch current context
   cgGetCtx(&ctx);
   nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice);
   
   // Check if memory has been allocated
   if (!nodeMemDst) {
      error++;
      VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD16.\n");
   } else {
      // If memory is allocated, check size of available memory
      dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr);
      if (dstSize < sizeof(unsigned short) * N) {
         error++;
         VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD16.\n"
                         "       Expected %lu allocated bytes but only found %lu.\n",
                         sizeof(unsigned short) * N, dstSize);
      }
      
      // Check if pointer is properly two byte aligned.
      // TODO: Is this a valid check?
      if (dstDevice % 2) {
         error++;
         VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD16 not two byte aligned.\n");
      }
   }
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   cgUnlock();
   return result;
}
Example #23
0
int rl_read(rlite *db, rl_data_type *type, long page, void *context, void **obj, int cache)
{
	// fprintf(stderr, "r %ld %s\n", page, type->name);
#ifdef RL_DEBUG
	int keep = 0;
	long initial_page_size = db->page_size;
	if (page == 0 && type != &rl_data_type_header) {
		VALGRIND_PRINTF_BACKTRACE("Unexpected");
		return RL_UNEXPECTED;
	}
#endif
	unsigned char *data = NULL;
	int retval;
	unsigned char *serialize_data;
	retval = rl_read_from_cache(db, type, page, context, obj);
	if (retval != RL_NOT_FOUND) {
		if (!cache) {
			RL_MALLOC(serialize_data, db->page_size * sizeof(unsigned char));
			retval = type->serialize(db, *obj, serialize_data);
			if (retval != RL_OK) {
				rl_free(serialize_data);
				return retval;
			}
			retval = type->deserialize(db, obj, context, serialize_data);
			rl_free(serialize_data);
			if (retval != RL_OK) {
				return retval;
			}
			retval = RL_FOUND;
		}
		return retval;
	}
	RL_MALLOC(data, db->page_size * sizeof(unsigned char));
	if (db->driver_type == RL_FILE_DRIVER) {
		rl_file_driver *driver = db->driver;
		RL_CALL(file_driver_fp, RL_OK, db);
		fseek(driver->fp, page * db->page_size, SEEK_SET);
		size_t read = fread(data, sizeof(unsigned char), db->page_size, driver->fp);
		if (read != (size_t)db->page_size) {
			if (page > 0) {
#ifdef RL_DEBUG
				print_cache(db);
#endif
				fprintf(stderr, "Unable to read page %ld on line %d\n", page, __LINE__);
				perror(NULL);
			}
			retval = RL_NOT_FOUND;
			goto cleanup;
		}
	}
	else if (db->driver_type == RL_MEMORY_DRIVER) {
		rl_memory_driver *driver = db->driver;
		if ((page + 1) * db->page_size > driver->datalen) {
			fprintf(stderr, "Unable to read page %ld on line %d\n", page, __LINE__);
			retval = RL_NOT_FOUND;
			goto cleanup;
		}
		memcpy(data, &driver->data[page * db->page_size], sizeof(unsigned char) * db->page_size);
	}
	else {
		fprintf(stderr, "Unexpected driver %d when asking for page %ld\n", db->driver_type, page);
		retval = RL_UNEXPECTED;
		goto cleanup;
	}

	long pos;
	retval = rl_search_cache(db, type, page, NULL, &pos, context, db->read_pages, db->read_pages_len);
	if (retval != RL_NOT_FOUND) {
		fprintf(stderr, "Unexpectedly found page in cache\n");
		retval = RL_UNEXPECTED;
		goto cleanup;
	}

	retval = type->deserialize(db, obj, context ? context : type, data);
	if (retval != RL_OK) {
		goto cleanup;
	}

	if (cache) {
		rl_ensure_pages(db);
		rl_page *page_obj;
		page_obj = rl_malloc(sizeof(*page_obj));
		if (!page_obj) {
			if (obj) {
				if (type->destroy && *obj) {
					type->destroy(db, *obj);
				}
				*obj = NULL;
			}
			retval = RL_OUT_OF_MEMORY;
			goto cleanup;
		}
		page_obj->page_number = page;
		page_obj->type = type;
		page_obj->obj = obj ? *obj : NULL;
#ifdef RL_DEBUG
		keep = 1;
		if (initial_page_size != db->page_size) {
			page_obj->serialized_data = rl_realloc(data, db->page_size * sizeof(unsigned char));
			if (page_obj->serialized_data == NULL) {
				rl_free(page_obj);
				retval = RL_OUT_OF_MEMORY;
				goto cleanup;
			}
			data = page_obj->serialized_data;
			if (db->page_size > initial_page_size) {
				memset(&data[initial_page_size], 0, db->page_size - initial_page_size);
			}
		}
		else {
			page_obj->serialized_data = data;
		}

		serialize_data = calloc(db->page_size, sizeof(unsigned char));
		if (!serialize_data) {
			rl_free(page_obj->serialized_data);
			rl_free(page_obj);
		}
		retval = type->serialize(db, obj ? *obj : NULL, serialize_data);
		if (retval != RL_OK) {
			goto cleanup;
		}
		if (memcmp(data, serialize_data, db->page_size) != 0) {
			fprintf(stderr, "serialize unserialized data mismatch\n");
			long i;
			for (i = 0; i < db->page_size; i++) {
				if (serialize_data[i] != data[i]) {
					fprintf(stderr, "at position %ld expected %d, got %d\n", i, serialize_data[i], data[i]);
				}
			}
		}
		rl_free(serialize_data);
#endif
		if (pos < db->read_pages_len) {
			memmove(&db->read_pages[pos + 1], &db->read_pages[pos], sizeof(rl_page *) * (db->read_pages_len - pos));
		}
		db->read_pages[pos] = page_obj;
		db->read_pages_len++;
	}
	if (retval == RL_OK) {
		retval = RL_FOUND;
	}
cleanup:
#ifdef RL_DEBUG
	if (retval != RL_FOUND || !keep) {
		rl_free(data);
	}
#endif
#ifndef RL_DEBUG
	rl_free(data);
#endif
	return retval;
}
Example #24
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpy2DAsync)(const CUDA_MEMCPY2D *pCopy, CUstream hStream) {
    int error = 0;
    long vgErrorAddress;

    vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&hStream, sizeof(CUstream));
    if (vgErrorAddress) {
        error++;
        VALGRIND_PRINTF("Error: 'hStream' in call to cuMemcpy2DAsync not defined.\n");
    }

    cgLock();

    CUcontext      ctx = NULL;
    cgGetCtx(&ctx);

    // Check if destination (device) memory/array is already being written to.
    switch (pCopy->dstMemoryType) {
    case CU_MEMORYTYPE_DEVICE: {
        cgMemListType  *nodeMem;

        nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->dstDevice);

        if (nodeMem) {
            // Are we trying to read a memory region that's being written by diffrent stream?
            if (nodeMem->locked & 2 && nodeMem->stream != hStream) {
                error++;
                VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n");
            }

            nodeMem->locked = nodeMem->locked | 1;
            nodeMem->stream = hStream;
        }

        break;
    }
    case CU_MEMORYTYPE_ARRAY: {
        cgArrListType  *nodeArr;

        nodeArr = cgFindArr(cgFindCtx(ctx), pCopy->dstArray);

        if (nodeArr) {
            // Are we trying to read an array that's being written by different stream?
            if (nodeArr->locked & 2 && nodeArr->stream != hStream) {
                error++;
                VALGRIND_PRINTF("Error: Concurrent write and read access to array by different streams.\n");
            }

            nodeArr->locked = nodeArr->locked | 1;
            nodeArr->stream = hStream;
        }

        break;
    }
    }

    // Check if source (device) memory/array is already being written to/read from.
    switch (pCopy->srcMemoryType) {
    case CU_MEMORYTYPE_DEVICE: {
        cgMemListType  *nodeMem;

        nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->srcDevice);

        if (nodeMem) {
            // Are we trying to read a memory region that's being written by diffrent stream?
            if (nodeMem->locked && nodeMem->stream != hStream) {
                error++;
                VALGRIND_PRINTF("Error: Concurrent write and read access by different streams.\n");
            }

            nodeMem->locked = nodeMem->locked | 2;
            nodeMem->stream = hStream;
        }

        break;
    }
    case CU_MEMORYTYPE_ARRAY: {
        cgArrListType  *nodeArr;

        nodeArr = cgFindArr(cgFindCtx(ctx), pCopy->srcArray);

        if (nodeArr) {
            // Are we trying to read an array that's being written by different stream?
            if (nodeArr->locked && nodeArr->stream != hStream) {
                error++;
                VALGRIND_PRINTF("Error: Concurrent write and read access to array by different streams.\n");
            }

            nodeArr->locked = nodeArr->locked | 2;
            nodeArr->stream = hStream;
        }

        break;
    }
    }

    cgUnlock();

    if (error) {
        VALGRIND_PRINTF_BACKTRACE("");
    }

    return cuMemcpy2D(pCopy);
}