예제 #1
0
파일: gpu_data.cpp 프로젝트: 20337112/dlib
    void memcpy (
        gpu_data& dest, 
        const gpu_data& src
    )
    {
        DLIB_CASSERT(dest.size() == src.size());
        if (src.size() == 0 || &dest == &src)
            return;

        memcpy(dest,0, src, 0, src.size());
    }
예제 #2
0
파일: gpu_data.cpp 프로젝트: 20337112/dlib
    void memcpy (
        gpu_data& dest, 
        size_t dest_offset,
        const gpu_data& src,
        size_t src_offset,
        size_t num
    )
    {
        DLIB_CASSERT(dest_offset + num <= dest.size());
        DLIB_CASSERT(src_offset + num <= src.size());
        if (num == 0)
            return;

        // if there is aliasing
        if (&dest == &src && std::max(dest_offset, src_offset) < std::min(dest_offset,src_offset)+num)
        {
            // if they perfectly alias each other then there is nothing to do
            if (dest_offset == src_offset)
                return;
            else
                std::memmove(dest.host()+dest_offset, src.host()+src_offset, sizeof(float)*num);
        }
        else
        {
            // if we write to the entire thing then we can use device_write_only()
            if (dest_offset == 0 && num == dest.size())
            {
                // copy the memory efficiently based on which copy is current in each object.
                if (src.device_ready())
                    CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.device()+src_offset,  num*sizeof(float), cudaMemcpyDeviceToDevice));
                else 
                    CHECK_CUDA(cudaMemcpy(dest.device_write_only(), src.host()+src_offset,    num*sizeof(float), cudaMemcpyHostToDevice));
            }
            else
            {
                // copy the memory efficiently based on which copy is current in each object.
                if (dest.device_ready() && src.device_ready())
                    CHECK_CUDA(cudaMemcpy(dest.device()+dest_offset, src.device()+src_offset, num*sizeof(float), cudaMemcpyDeviceToDevice));
                else if (!dest.device_ready() && src.device_ready())
                    CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.device()+src_offset,   num*sizeof(float), cudaMemcpyDeviceToHost));
                else if (dest.device_ready() && !src.device_ready())
                    CHECK_CUDA(cudaMemcpy(dest.device()+dest_offset, src.host()+src_offset,   num*sizeof(float), cudaMemcpyHostToDevice));
                else 
                    CHECK_CUDA(cudaMemcpy(dest.host()+dest_offset, src.host()+src_offset,     num*sizeof(float), cudaMemcpyHostToHost));
            }
        }
    }
예제 #3
0
파일: gpu_data.cpp 프로젝트: AbdealiJK/dlib
    void memcpy (
        gpu_data& dest, 
        const gpu_data& src
    )
    {
        DLIB_CASSERT(dest.size() == src.size(), "");
        if (src.size() == 0)
            return;

        // copy the memory efficiently based on which copy is current in each object.
        if (dest.device_ready() && src.device_ready())
            CHECK_CUDA(cudaMemcpy(dest.device(), src.device(),          src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
        else if (!dest.device_ready() && src.device_ready())
            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
        else if (dest.device_ready() && !src.device_ready())
            CHECK_CUDA(cudaMemcpy(dest.device(), src.host(),            src.size()*sizeof(float), cudaMemcpyHostToDevice));
        else 
            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(),   src.size()*sizeof(float), cudaMemcpyHostToHost));
    }