Esempio n. 1
0
void GPUDataTransferer<ElemType>::CopyCPUToGPUAsync(ElemType* cpuBuffer, size_t numElements, ElemType* gpuBuffer)
{
    PrepareDevice(m_deviceId);

    cudaMemcpyAsync(gpuBuffer, cpuBuffer, numElements * sizeof(ElemType), cudaMemcpyHostToDevice, m_assignStream) || "cudaMemcpyAsync failed";
    cudaEventRecord(m_assignCompleteEvent, m_assignStream) || "cudaEventRecord failed";
}
Esempio n. 2
0
void GPUDataTransferer<ElemType>::CopyGPUToCPUAsync(ElemType* gpuBuffer, size_t numElements, ElemType* cpuBuffer)
{
    PrepareDevice(m_deviceId);

    cudaMemcpyAsync(cpuBuffer, gpuBuffer, numElements * sizeof(ElemType), cudaMemcpyDeviceToHost, m_fetchStream) || "cudaMemcpyAsync failed";
    cudaEventRecord(m_fetchCompleteEvent, m_fetchStream) || "cudaEventRecord failed";
}
Esempio n. 3
0
GPUDataTransferer<ElemType>::GPUDataTransferer(int deviceId, bool useConcurrentStreams)
    : m_deviceId(deviceId)
{
    PrepareDevice(m_deviceId);

    // events
    // Note: Do NOT use cudaEventBlockingSync (which supposedly yields the process)--it will totally break cudaEventSynchronize(), causing it to take 50 or 100 ms randomly.
    cudaEventCreateWithFlags(&m_fetchCompleteEvent, cudaEventDisableTiming) || "cudaEventCreateWithFlags failed";
    cudaEventCreateWithFlags(&m_assignCompleteEvent, cudaEventDisableTiming) || "cudaEventCreateWithFlags failed";

#pragma warning(disable : 4127)
    if (useConcurrentStreams && (m_fetchStream == NULL))
    {
        cudaStreamCreateWithFlags(&m_fetchStream, cudaStreamNonBlocking) || "cudaStreamCreateWithFlags failed";
        cudaStreamCreateWithFlags(&m_assignStream, cudaStreamNonBlocking) || "cudaStreamCreateWithFlags failed";
    }
}
Esempio n. 4
0
PrefetchGPUDataTransferer::~PrefetchGPUDataTransferer()
{
    try
    {
        PrepareDevice(m_deviceId);
    }
    catch (...)
    {
        // the error is already logged
        return;
    }

    auto code = cudaStreamDestroy(m_stream);
    if (code != cudaSuccess)
    {
        std::cerr << "cudaStreamDestroy failed (PrefetchGPUDataTransferer dtor): "
            << cudaGetErrorString(code) << " (cuda error " <<  code << ")"<< std::endl;
    }
}
Esempio n. 5
0
void GranularGPUDataTransferer::CopyCPUToGPUAsync(const void* cpuBuffer, size_t numElements, size_t elementSize, void* gpuBuffer)
{
    PrepareDevice(m_deviceId);
    cudaMemcpyAsync(gpuBuffer, cpuBuffer, numElements * elementSize, cudaMemcpyHostToDevice, m_assignStream) || "cudaMemcpyAsync failed";
}
Esempio n. 6
0
void GranularGPUDataTransferer::WaitForCopyGPUToCPU()
{
    PrepareDevice(m_deviceId);
    cudaEventSynchronize(m_fetchCompleteEvent) || "cudaEventSynchronize failed";
}
Esempio n. 7
0
void GPUDataTransferer::WaitForCopyCPUToGPUAsync()
{
    PrepareDevice(m_inner->m_deviceId);
    SyncEvent(m_inner->m_assignCompleteEvent);
}
Esempio n. 8
0
void GranularGPUDataTransferer::WaitForSyncPointOnAssignStreamAsync()
{
    PrepareDevice(m_deviceId);
    cudaStreamWaitEvent(m_assignStream, m_syncEvent, 0 /*flags 'must be 0'*/) || "cudaStreamWaitEvent failed";
}
Esempio n. 9
0
void GranularGPUDataTransferer::RecordComputeStreamSyncPoint()
{
    PrepareDevice(m_deviceId);
    cudaEventRecord(m_syncEvent, GetStream()) || "cudeEventRecord failed";
}
Esempio n. 10
0
void GPUDataTransferer<ElemType>::WaitForCopyGPUToCPUAsync()
{
    PrepareDevice(m_deviceId);

    SyncEvent(m_fetchCompleteEvent);
}