// pthread_mutex_destroy
PTH_FUNC(int, pthreadZumutexZudestroy, // pthread_mutex_destroy
              pthread_mutex_t *mutex)
{
   int    ret;
   OrigFn fn;
   VALGRIND_GET_ORIG_FN(fn);
   if (TRACE_PTH_FNS) {
      fprintf(stderr, "<< pthread_mxdestroy %p", mutex); fflush(stderr);
   }

   DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_DESTROY_PRE,
               pthread_mutex_t*,mutex);

   CALL_FN_W_W(ret, fn, mutex);

   if (ret != 0) {
      DO_PthAPIerror( "pthread_mutex_destroy", ret );
   }

   if (TRACE_PTH_FNS) {
      fprintf(stderr, " :: mxdestroy -> %d >>\n", ret);
   }
   return ret;
}
Example #2
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD2D32)(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgMemListType *nodeMemDst;
   
   int error = 0;
   long vgErrorAddress;
   size_t dstSize;

   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_5W(result, fn, dstDevice, dstPitch, ui, Width, Height);
   
   // Check if function parameters are defined.
   // TODO: Warning or error in case of a partially undefined ui?
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstPitch, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstPitch' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&ui, sizeof(ui));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Warning: 'ui' in call to cuMemsetD2D32 is not fully defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Width, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'Width' in call to cuMemsetD2D32 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&Height, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'Height' in call to cuMemsetD2D32 not defined.\n");
   }
   
   // Fetch current context
   cgGetCtx(&ctx);
   nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice);
   
   // Check if memory has been allocated
   if (!nodeMemDst) {
      error++;
      VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD2D32.\n");
   } else {
      // If memory is allocated, check size of available memory
      dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr);
      // The whole memory block of dstPitch*Height must fit into memory
      if (dstSize < sizeof(ui) * dstPitch * Height) {
         error++;
         VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD2D32.\n"
                         "       Expected %lu allocated bytes but only found %lu.\n",
                         sizeof(ui) * dstPitch * Height, dstSize);
      }
      
      // Check if dstDevice and dstPitch are both properly aligned
      // TODO: Is this a valid check? (see also cuMemsetD32)
      if (dstDevice % 4) {
         error++;
         VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD2D32 not four byte aligned.\n");
      }
      if (dstPitch % 4) {
         error++;
         VALGRIND_PRINTF("Error: Destination pitch in call to cuMemsetD2D32 not four byte aligned.\n");
      }
   }
   
   // Make sure pitch is big enough to accommodate asked for Width
   if (dstPitch < Width) {
      error++;
      VALGRIND_PRINTF("Error: dstPitch smaller than Width in call to cuMemsetD2D32.\n");
   }
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   cgUnlock();
   return result;
}
// pthread_create
PTH_FUNC(int, pthreadZucreateZa, // pthread_create*
         pthread_t *thread, const pthread_attr_t *attr,
         void *(*start) (void *), void *arg)
{
   int    res;
   int    ret;
   OrigFn fn;
#if defined(ALLOCATE_THREAD_ARGS_ON_THE_STACK)
   DrdPosixThreadArgs thread_args;
#endif
   DrdPosixThreadArgs* thread_args_p;

   VALGRIND_GET_ORIG_FN(fn);

#if defined(ALLOCATE_THREAD_ARGS_ON_THE_STACK)
   thread_args_p = &thread_args;
#else
   thread_args_p = malloc(sizeof(*thread_args_p));
#endif
   assert(thread_args_p);

   thread_args_p->start           = start;
   thread_args_p->arg             = arg;
#if defined(WAIT_UNTIL_CREATED_THREAD_STARTED)
   DRD_IGNORE_VAR(thread_args_p->wrapper_started);
   thread_args_p->wrapper_started = 0;
#endif
   /*
    * Find out whether the thread will be started as a joinable thread
    * or as a detached thread. If no thread attributes have been specified,
    * this means that the new thread will be started as a joinable thread.
    */
   thread_args_p->detachstate = PTHREAD_CREATE_JOINABLE;
   if (attr)
   {
      if (pthread_attr_getdetachstate(attr, &thread_args_p->detachstate) != 0)
      {
         assert(0);
      }
   }
   assert(thread_args_p->detachstate == PTHREAD_CREATE_JOINABLE
          || thread_args_p->detachstate == PTHREAD_CREATE_DETACHED);


   DRD_(entering_pthread_create)();
   CALL_FN_W_WWWW(ret, fn, thread, attr, DRD_(thread_wrapper), thread_args_p);
   DRD_(left_pthread_create)();

#if defined(WAIT_UNTIL_CREATED_THREAD_STARTED)
   if (ret == 0)
   {
      /*
       * Wait until the thread wrapper started.
       * @todo Find out why some regression tests fail if thread arguments are
       *   passed via dynamically allocated memory and if the loop below is
       *   removed.
       */
      while (! thread_args_p->wrapper_started)
      {
         sched_yield();
      }
   }

#if defined(ALLOCATE_THREAD_ARGS_DYNAMICALLY)
   free(thread_args_p);
#endif

#endif

   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__DRD_START_NEW_SEGMENT,
                              pthread_self(), 0, 0, 0, 0);

   return ret;
}
Example #4
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpy3D)(const CUDA_MEMCPY3D *pCopy) {
   OrigFn      fn;
   CUresult    result;
   CUcontext  ctx = NULL;
   int error = 0, error_addressable, error_defined;
   long vgErrorAddress = 0, vgErrorAddressDefined = 0;
   
     
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_W(result, fn, pCopy);

   // Check if pCopy is null, not allocated or undefined.
   // For obvious reasons we skip the following checks if either condition is true.
   if (!pCopy) {
      error++;
      VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D is NULL.\n");
      cgUnlock();
      return result;
   } else if ( vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(pCopy, sizeof(CUDA_MEMCPY3D)) ) {
      error++;
      VALGRIND_PRINTF_BACKTRACE("Error: pCopy in call to cuMemcpy3D points to unallocated memory.\n");
      cgUnlock();
      return result;
   } // It makes no sense to check _IS_DEFINED on the whole structure, since only part of it is used!
   
   // General checks of constaints imposed by reference manual
   if (pCopy->srcMemoryType != CU_MEMORYTYPE_ARRAY) {
      if (pCopy->srcPitch && pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes) {
         error++;
         VALGRIND_PRINTF("Error: srcPitch < WidthInBytes+srcXInBytes in cuMemcpy3D.\n");
      }
      if (pCopy->srcHeight && pCopy->srcHeight < pCopy->Height + pCopy->srcY) {
         error++;
         VALGRIND_PRINTF("Error: srcHeight < Height+srcY in cuMemcpy3D.\n");
      }
   }
   if (pCopy->dstMemoryType != CU_MEMORYTYPE_ARRAY) {
      if (pCopy->dstPitch && pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes) {
         error++;
         VALGRIND_PRINTF("Error: dstPitch < WidthInBytes+dstXInBytes in cuMemcpy3D.\n");
      }
      if (pCopy->dstHeight && pCopy->dstHeight < pCopy->Height + pCopy->dstY) {
         error++;
         VALGRIND_PRINTF("Error: dstHeight < Height+dstY in cuMemcpy3D.\n");
      }
   }
   switch (pCopy->srcMemoryType) {
      case CU_MEMORYTYPE_UNIFIED:
         // TODO: How do we handle unified memory?
         break;
      case CU_MEMORYTYPE_HOST: {
         void *line;
      
         error_addressable = 0;
         error_defined = 0;
         // TODO: Is Height, Depth > 1, even for 1D/2D copy operations?   
         for (int i = 0 ; i < pCopy->Height ; i++) {
            for (int j = 0 ; j < pCopy->Depth ; j++) {
               line = (void*)(
                        (char*)pCopy->srcHost 
                        + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i))*pCopy->srcPitch 
                        + pCopy->srcXInBytes
                     );
               vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes);
               if (vgErrorAddress) {
                  error_addressable++;
               } else {
                  vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(line, (size_t)pCopy->WidthInBytes);
                  if (vgErrorAddress) {
                     error_defined++;
                  }
               }
            }
         }
         // TODO: Can we give precise information about location of error?
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source host memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         if (error_defined) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source host memory not defined\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_DEVICE: {
         // ptrEnd points to the end of the memory area which pCopy->srcDevice points into
         CUdeviceptr line, ptrEnd;
         cgMemListType *nodeMem;
         
         // TODO: Check if pCopy->srcDevice is defined?
         cgGetCtx(&ctx);
         nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->srcDevice);
         
         // We only track addressable status (whether memory is allocated) for device memory regions
         error_addressable = 0;
         if (nodeMem) {
            ptrEnd = nodeMem->dptr + nodeMem->size;
            /*
            for (int i = 0 ; i < pCopy->Height ; i++) {
               for (int j = 0 ; j < pCopy->Depth ; j++) {
                  line = (CUdeviceptr)(
                           pCopy->srcDevice
                           + ((pCopy->srcZ + j) * pCopy->srcHeight + (pCopy->srcY + i)) * pCopy->srcPitch
                           + pCopy->srcXInBytes
                         );
                  
                  // Is there enough allocated memory left to statisfy the current line?
                  if (ptrEnd - line < pCopy->WidthInBytes) {
                     error_addressable++;
                  }
               }
            }
            */
            
            // Device memory should not be fragmented, so we only check the very last slice of memory
            line = (CUdeviceptr)(
                     pCopy->srcDevice 
                     + (
                        (pCopy->srcZ + pCopy->Depth - 1) * pCopy->srcHeight 
                        + (pCopy->srcY + pCopy->Height - 1)
                       ) * pCopy->srcPitch 
                     + pCopy->srcXInBytes);
            if (ptrEnd - line < pCopy->WidthInBytes) {
               error_addressable++;
            }
         } else {
            error_addressable++;
         }
         
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) source device memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_ARRAY: {
         CUDA_ARRAY3D_DESCRIPTOR descriptor;
         int bytesPerElement;
         int widthInBytes;
         
         // Fetch array descriptor ..
         cuArray3DGetDescriptor(&descriptor, pCopy->srcArray);
         bytesPerElement = cgArrDescBytesPerElement(&descriptor);
         if (!bytesPerElement) {
            error++;
            VALGRIND_PRINTF("Error: Unknown Format value in src array descriptor in cuMemcpy3D.\n");
         }
         widthInBytes = bytesPerElement * descriptor.Width;
         // .. and check if dimensions are conform to the ones requested in pCopy
         if (widthInBytes - pCopy->srcXInBytes < pCopy->WidthInBytes) {
            error++;
            VALGRIND_PRINTF("Error: Available width of %u bytes in source array is smaller than\n"
                            "       requested Width of %u bytes in pCopy of cuMemcpy3D.\n", 
                                    widthInBytes - pCopy->srcXInBytes, pCopy->WidthInBytes);
         }
         if (pCopy->Height > 1 && descriptor.Height - pCopy->srcY < pCopy->Height) {
            error++;
            VALGRIND_PRINTF("Error: Available Height of %u in source array is smaller than\n"
                            "       requested Height of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Height - pCopy->srcY, pCopy->Height);
         }
         if (pCopy->Depth > 1 && descriptor.Depth - pCopy->srcZ < pCopy->Depth) {
            error++;
            VALGRIND_PRINTF("Error: Available Depth of %u in source array is smaller than\n"
                            "       requested Depth of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Depth - pCopy->srcY, pCopy->Height);
         }
         break;
      }
      default:
         error++;
         VALGRIND_PRINTF("Error: Unknown source memory type %d in cuMemcpy3D\n");
         break;
   }
   
   switch (pCopy->dstMemoryType) {
      case CU_MEMORYTYPE_UNIFIED:
         // TODO: How do we handle unified memory?
         break;
      case CU_MEMORYTYPE_HOST: {
         void *line;
         
         error_addressable = 0;
         error_defined = 0;
         // TODO: Is Height, Depth > 1, even for 1D/2D copy operations?
         for (int i = 0 ; i < pCopy->Height ; i++) {
            for (int j = 0 ; j < pCopy->Depth ; j++) {
               line = (void*)(
                        (char*)pCopy->dstHost 
                        + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i))*pCopy->dstPitch 
                        + pCopy->dstXInBytes
                     );
               // Unlike for the source operand we only need to check allocation status here
               vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(line, (size_t)pCopy->WidthInBytes);
               if (vgErrorAddress) {
                  error_addressable++;
               }
            }
         }
         // TODO: Can we give precise information about location of error?
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) destination host memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_DEVICE: {
         // ptrEnd points to the end of the memory area which pCopy->dstDevice points into
         CUdeviceptr line, ptrEnd;
         cgMemListType *nodeMem;
         
         // TODO: Check if pCopy->dstDevice is defined?
         cgGetCtx(&ctx);
         nodeMem = cgFindMem(cgFindCtx(ctx), pCopy->dstDevice);
         
         // We only track addressable status (whether memory is allocated) for device memory regions
         error_addressable = 0;
         if (nodeMem) {
            ptrEnd = nodeMem->dptr + nodeMem->size;
            /*
            for (int i = 0 ; i < pCopy->Height ; i++) {
               for (int j = 0 ; j < pCopy->Depth ; j++) {
                  line = (CUdeviceptr)(
                           pCopy->dstDevice
                           + ((pCopy->dstZ + j) * pCopy->dstHeight + (pCopy->dstY + i)) * pCopy->dstPitch
                           + pCopy->dstXInBytes
                         );
                  
                  // Is there enough allocated memory left to statisfy the current line?
                  if (ptrEnd - line < pCopy->WidthInBytes) {
                     error_addressable++;
                  }
               }
            }
            */
            
            // Device memory should not be fragmented, so we only check the very last slice of memory
            line = (CUdeviceptr)(
                     pCopy->dstDevice 
                     + (
                        (pCopy->dstZ + pCopy->Depth - 1) * pCopy->dstHeight 
                        + (pCopy->dstY + pCopy->Height - 1)
                       ) * pCopy->dstPitch 
                     + pCopy->dstXInBytes);
            if (ptrEnd - line < pCopy->WidthInBytes) {
               error_addressable++;
            }
         } else {
            error_addressable++;
         }
         
         if (error_addressable) {
            error++;
            VALGRIND_PRINTF("Error: (Part of) destination device memory not allocated\n"
                            "       in call to cuMemcpy3D.\n");
         }
         break;
      }
      case CU_MEMORYTYPE_ARRAY: {
         CUDA_ARRAY3D_DESCRIPTOR descriptor;
         int bytesPerElement;
         int widthInBytes;
         
         // Fetch array descriptor ..
         cuArray3DGetDescriptor(&descriptor, pCopy->dstArray);
         bytesPerElement = cgArrDescBytesPerElement(&descriptor);
         if (!bytesPerElement) {
               error++;
               VALGRIND_PRINTF("Error: Unknown Format value in dst array descriptor in cuMemcpy3D.\n");
         }
         widthInBytes = bytesPerElement * descriptor.Width;
         // .. and check if dimensions are conform to the ones requested in pCopy
         if (widthInBytes - pCopy->dstXInBytes < pCopy->WidthInBytes) {
            error++;
            VALGRIND_PRINTF("Error: Available width of %u bytes in destination array is smaller than\n"
                            "       requested Width of %u bytes in pCopy of cuMemcpy3D.\n", 
                                    widthInBytes - pCopy->dstXInBytes, pCopy->WidthInBytes);
         }
         if (pCopy->Height > 1 && descriptor.Height - pCopy->dstY < pCopy->Height) {
            error++;
            VALGRIND_PRINTF("Error: Available Height of %u in destination array is smaller than\n"
                            "       requested Height of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Height - pCopy->dstY, pCopy->Height);
         }
         if (pCopy->Depth > 1 && descriptor.Depth - pCopy->dstZ < pCopy->Depth) {
            error++;
            VALGRIND_PRINTF("Error: Available Depth of %u in destination array is smaller than\n"
                            "       requested Depth of %u in pCopy of cuMemcpy3D.\n",
                            descriptor.Depth - pCopy->dstZ, pCopy->Depth);
         }
         break;
      }
      default:
         error++;
         VALGRIND_PRINTF("Error: Unknown destination memory type %d in cuMemcpy3D\n");
         break;
   }
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("   %d errors detected in call to cuMemcpy3D.", error);
   }
   
   cgUnlock();
   return result;
}
Example #5
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyHtoA)(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgCtxListType *nodeCtx;
   cgArrListType *nodeArrDst;
   cgMemListType *nodeMemSrc;
   
   long vgErrorAddress;
   int error = 0;
   
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_WWWW(result, fn, dstArray, dstOffset, srcHost, ByteCount);
   
   // Check if actual function parameters are defined
   if (VALGRIND_CHECK_MEM_IS_DEFINED(&dstArray, sizeof(CUarray))) {
      error++;
      VALGRIND_PRINTF("Error: dstArray in call to cuMemcpyAtoD is not defined.\n");
   } else if (!dstArray) {
      error++;
      VALGRIND_PRINTF("Error: dstArray in call to cuMemcpyAtoD is NULL.\n");
   }
   if (VALGRIND_CHECK_MEM_IS_DEFINED(&dstOffset, sizeof(size_t))) {
      error++;
      VALGRIND_PRINTF("Error: dstOffset in call to cuMemcpyAtoD is not defined.\n");
   }
   if (VALGRIND_CHECK_MEM_IS_DEFINED(&srcHost, sizeof(CUdeviceptr))) {
      error++;
      VALGRIND_PRINTF("Error: srcHost in call to cuMemcpyAtoD is not defined.\n");
   } else if (!srcHost) {
      error++;
      VALGRIND_PRINTF("Error: srcDevice in call to cuMemcpyAtoD is NULL");
   }
   if (VALGRIND_CHECK_MEM_IS_DEFINED(&ByteCount, sizeof(size_t))) {
      error++;
      VALGRIND_PRINTF("Error: ByteCount in call to cuMemcpyAtoD is not defined.\n");
   }
   
   cgGetCtx(&ctx);
   
   nodeCtx = cgFindCtx(ctx);
   
   nodeArrDst = cgFindArr(nodeCtx, dstArray);
   
   if (srcHost) {
      vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(srcHost, ByteCount);
      // Check if memory referenced by srcHost has been allocated.
      if (vgErrorAddress) {
         error++;
         VALGRIND_PRINTF("Error: Source host memory in cuMemcpyHtoA is not not allocated.\n"
                         "       Expected %l bytes but only found %l.\n",
                         ByteCount, vgErrorAddress - (long)srcHost);
      } else {
         // If allocated, now check if host memory is defined.
         vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(srcHost, ByteCount);
         if (vgErrorAddress) {
            error++;
            VALGRIND_PRINTF("Error: Source host memory in cuMemcpyHtoA is not defined.\n"
                            "       Expected %l bytes but only found %l.\n",
                            ByteCount, vgErrorAddress - (long)srcHost);
         }
      }
   }
   
   if (nodeArrDst) {
      // Check if array is 1-dimensional or big enough in first dimension
      if (nodeArrDst->desc.Height > 1 || nodeArrDst->desc.Depth > 1) {
         if (nodeArrDst->desc.Width - dstOffset < ByteCount) {
            error++;
            VALGRIND_PRINTF("Error: Destination array in cuMemcpyAtoD is 2-dimensional\n"
                            "       and ByteCount bigger than available width in first dimension.\n");
         } else {
            VALGRIND_PRINTF("Warning: Destination array in cuMemcpyAtoD is 2-dimensional.\n");
         }
      } else if (nodeArrDst->desc.Width - dstOffset < ByteCount) { 
            // If array is 1D, check size.
            VALGRIND_PRINTF("Error: Destination array in cuMemcpyAtoD is too small.\n"
                            "       Expected %l bytes but only found %l.\n", 
                            ByteCount, nodeArrDst->desc.Width - dstOffset);
            error++;
         }
   } else {
      error++;
      VALGRIND_PRINTF("Error: Destination array not allocated in call to cuMemcpyAtoD.\n");
   }
   
   cgUnlock();
   return result;
}
Example #6
0
// Copy Host->Device
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemcpyHtoD)(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgCtxListType *nodeCtx;
   cgMemListType *nodeMem;
   size_t dstSize;
   long vgErrorAddress, vgErrorAddressDstDevice, vgErrorAddressSrcHost;
   
   VALGRIND_GET_ORIG_FN(fn);
   cgLock();

   vgErrorAddressDstDevice = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(void*));
   vgErrorAddressSrcHost   = VALGRIND_CHECK_MEM_IS_DEFINED(&srcHost, sizeof(CUdeviceptr));
   // TODO: Currently errors are exclusive .. i.e. with undefined src and NULL
   //       dst pointer, only the undefined pointer is reported.
   if (vgErrorAddressDstDevice || vgErrorAddressSrcHost) {
      VALGRIND_PRINTF("Error:");
      if (vgErrorAddressDstDevice) {
         VALGRIND_PRINTF(" destination device");
         if (vgErrorAddressSrcHost) {
            VALGRIND_PRINTF(" and");
         }
      }
      if (vgErrorAddressSrcHost) {
         VALGRIND_PRINTF(" source host");
      }
      VALGRIND_PRINTF_BACKTRACE(" pointer in cuMemcpyHtoD not defined.\n");
   } else if (dstDevice != 0 && srcHost != NULL) {
      cgGetCtx(&ctx);
      // Check allocation status and available size on device
      nodeCtx = cgFindCtx(ctx);
      nodeMem = cgFindMem(nodeCtx, dstDevice);
      if (!nodeMem) {
         VALGRIND_PRINTF("Error: Device memory during host->device memory copy is not allocated.");
      } else {
         dstSize = nodeMem->size - (dstDevice - nodeMem->dptr);
         if (dstSize < ByteCount) {
            VALGRIND_PRINTF("Error: Allocated device memory too small for host->device memory copy.\n");
            VALGRIND_PRINTF("       Expected %lu allocated bytes but only found %lu.", ByteCount, dstSize);
         }
      }
      if (!nodeMem || dstSize < ByteCount) {
         VALGRIND_PRINTF_BACKTRACE("\n");
      }
      // Check allocation and definedness for host memory
      vgErrorAddress = VALGRIND_CHECK_MEM_IS_ADDRESSABLE(srcHost, ByteCount);
      if (vgErrorAddress) {
         VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not allocated.\n");
         VALGRIND_PRINTF("       Expected %lu allocated bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost);
      } else {
         vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(srcHost, ByteCount);
         if (vgErrorAddress) {
            VALGRIND_PRINTF("Error: Host memory during host->device memory copy is not defined.\n");
            VALGRIND_PRINTF("       Expected %lu defined bytes but only found %lu.", ByteCount, vgErrorAddress - (long)srcHost);
         }
      }
      if (vgErrorAddress) {
         VALGRIND_PRINTF_BACKTRACE("\n");
      }
   } else {
      VALGRIND_PRINTF("Error: cuMemcpyHtoD called with NULL");
      if (dstDevice == 0) {
	       VALGRIND_PRINTF(" device");
	       if (srcHost == NULL) VALGRIND_PRINTF(" and");
	   }
	   if (srcHost == NULL) {
	      VALGRIND_PRINTF(" host");
	   }
	   VALGRIND_PRINTF_BACKTRACE(" pointer.\n");
   }
   
   CALL_FN_W_WWW(result, fn, dstDevice, srcHost, ByteCount);
   cgUnlock();
   return result;
}
Example #7
0
CUresult I_WRAP_SONAME_FNNAME_ZZ(libcudaZdsoZa, cuMemsetD16)(CUdeviceptr dstDevice, unsigned short us, size_t N) {
   OrigFn fn;
   CUresult result;
   CUcontext ctx = NULL;
   cgMemListType *nodeMemDst;
   
   int error = 0;
   long vgErrorAddress;
   size_t dstSize;

   VALGRIND_GET_ORIG_FN(fn);
   cgLock();
   CALL_FN_W_WWW(result, fn, dstDevice, us, N);
   
   // Check if function parameters are defined.
   // TODO: Warning or error in case of a partially undefined us?
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&dstDevice, sizeof(CUdeviceptr));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'dstDevice' in call to cuMemsetD16 not defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&us, sizeof(us));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Warning: 'us' in call to cuMemsetD16 is not fully defined.\n");
   }
   vgErrorAddress = VALGRIND_CHECK_MEM_IS_DEFINED(&N, sizeof(size_t));
   if (vgErrorAddress) {
      error++;
      VALGRIND_PRINTF("Error: 'N' in call to cuMemsetD16 not defined.\n");
   }
   
   
   // Fetch current context
   cgGetCtx(&ctx);
   nodeMemDst = cgFindMem(cgFindCtx(ctx), dstDevice);
   
   // Check if memory has been allocated
   if (!nodeMemDst) {
      error++;
      VALGRIND_PRINTF("Error: Destination device memory not allocated in call to cuMemsetD16.\n");
   } else {
      // If memory is allocated, check size of available memory
      dstSize = nodeMemDst->size - (dstDevice - nodeMemDst->dptr);
      if (dstSize < sizeof(unsigned short) * N) {
         error++;
         VALGRIND_PRINTF("Error: Allocated device memory too small in call to cuMemsetD16.\n"
                         "       Expected %lu allocated bytes but only found %lu.\n",
                         sizeof(unsigned short) * N, dstSize);
      }
      
      // Check if pointer is properly two byte aligned.
      // TODO: Is this a valid check?
      if (dstDevice % 2) {
         error++;
         VALGRIND_PRINTF("Error: Pointer dstDevice in call to cuMemsetD16 not two byte aligned.\n");
      }
   }
   
   if (error) {
      VALGRIND_PRINTF_BACKTRACE("");
   }
   
   cgUnlock();
   return result;
}