Пример #1
0
TEST(HostAlloc, FlagRetrieval) {
    cudaError_t ret;
    void * ptrs[8];
    unsigned int flags[8];

    int device;

    ret = cudaGetDevice(&device);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaDeviceProp prop;
    ret = cudaGetDeviceProperties(&prop, device);
    ASSERT_EQ(cudaSuccess, ret);

    for (size_t i = 0; i < (sizeof(flags) / sizeof(flags[0])); i++) {
        unsigned int flag = cudaHostAllocDefault;

        if (i & 0x1) {
            flag |= cudaHostAllocPortable;
        }

        if (i & 0x2) {
            flag |= cudaHostAllocMapped;
        }

        if (i & 0x4) {
            flag |= cudaHostAllocWriteCombined;
        }

        ret = cudaHostAlloc(&ptrs[i], 4, flag);
        ASSERT_EQ(cudaSuccess, ret);

        flags[i] = flag;
    }

    for (size_t i = 0; i < (sizeof(flags) / sizeof(flags[0])); i++) {
        unsigned int flag;
        ret = cudaHostGetFlags(&flag, ptrs[i]);
        ASSERT_EQ(cudaSuccess, ret);

        const unsigned int expected = flags[i] |
                                      (prop.canMapHostMemory ? cudaHostAllocMapped : 0);
        EXPECT_EQ(expected, flag);
    }

    for (size_t i = 0; i < (sizeof(flags) / sizeof(flags[0])); i++) {
        ret = cudaFreeHost(ptrs[i]);
        EXPECT_EQ(cudaSuccess, ret);
    }
}
Пример #2
0
void AsyncCopier::copyHtoD(void* dest, const void* src, size_t size) {
  VLOG(1) << "copyHtoD " << size;
  auto pdest = static_cast<uint8_t*>(dest);
  auto psrc = static_cast<const uint8_t*>(src);

  unsigned int flags;
  auto err = cudaHostGetFlags(&flags, const_cast<void*>(src));
  if (err == cudaSuccess) {
    // Page-locked using cudaHostAlloc / cudaHostRegister, copy directly.
    checkCudaError(cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice),
                   "cudaMemcpyAsync");
    return;
  } else if (err != cudaErrorInvalidValue) {
    checkCudaError(err, "invalid return code from cudaMemcpyAsync");
  }
  cudaGetLastError();  // reset last error
  // This is dicey -- what if another kernel has completed with an error?
  // But there's nothing else we can do, as any cuda function may return an
  // error from a previous kernel launch.

  if (size > bufferSize_) {
    // Copy synchronously.
    checkCudaError(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice),
                   "cudaMemcpy");
    return;
  }

  Event* eventToWait = nullptr;

  auto copyRange = [this, &size, &pdest, &psrc] (AllocatedBlock& range) {
    size_t n = std::min(size, range.length);
    range.length = n;
    VLOG(1) << "Copy " << range.start << " + " << n;
    auto bufPtr = buffer_.get() + range.start;
    memcpy(bufPtr, psrc, n);
    checkCudaError(cudaMemcpyAsync(pdest, bufPtr, n, cudaMemcpyHostToDevice),
                   "cudaMemcpyAsync");
    pdest += n;
    psrc += n;
    size -= n;
    checkCudaError(cudaEventRecord(*range.event->event), "cudaEventRecord");
    allocated_.push_back(range);
  };

  for (;;) {
    {
      std::lock_guard<std::mutex> lock(mutex_);

      if (eventToWait) {
        releaseEventLocked(eventToWait);
        eventToWait = nullptr;
      }

      // Always reap
      while (!allocated_.empty() && pollEvent(allocated_.front().event)) {
        releaseEventLocked(allocated_.front().event);
        allocated_.pop_front();
      }

      auto ranges = getRangesLocked();
      if (!ranges.empty()) {
        auto ev = getEventLocked();
        for (auto it = ranges.begin(); size != 0 && it != ranges.end(); ++it) {
          auto& range = *it;
          ++ev->refCount;
          range.event = ev;
          copyRange(range);
        }
        releaseEventLocked(ev);
        if (size == 0) {
          break;
        }
      }
      // Sigh, we have to wait.
      eventToWait = allocated_.front().event;
      ++eventToWait->refCount;
    }

    DCHECK(eventToWait);
    VLOG(1) << "Waiting, remaining " << size;
    waitEvent(eventToWait);
  }
  VLOG(1) << "End copyHtoD";

  DCHECK(!eventToWait);
}
Пример #3
0
cudaError_t WINAPI wine_cudaHostGetFlags(unsigned int *pFlags, void *pHost) {
    WINE_TRACE("\n");
    return cudaHostGetFlags( pFlags, pHost );
}