Ejemplo n.º 1
0
/**
 * @returns #hipSuccess #hipErrorMemoryAllocation
 */
hipError_t hipMalloc(void** ptr, size_t sizeBytes)
{
    HIP_INIT_API(ptr, sizeBytes);

    hipError_t  hip_status = hipSuccess;

	auto device = ihipGetTlsDefaultDevice();

    if (device) {
        const unsigned am_flags = 0;
        *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);

        if (sizeBytes && (*ptr == NULL)) {
            hip_status = hipErrorMemoryAllocation;
        } else {
            hc::am_memtracker_update(*ptr, device->_device_index, 0);
            {
                LockedAccessor_DeviceCrit_t crit(device->criticalData());
                if (crit->peerCnt()) {
                    hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
                }
            }
        }
    } else {
        hip_status = hipErrorMemoryAllocation;
    }

    return ihipLogStatus(hip_status);
}
Ejemplo n.º 2
0
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
    HIP_INIT_API(ptr, sizeBytes, flags);

    hipError_t hip_status = hipSuccess;

    auto device = ihipGetTlsDefaultDevice();

    if(device){
        if(flags == hipHostMallocDefault){
            *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
            if(sizeBytes < 1 && (*ptr == NULL)){
                hip_status = hipErrorMemoryAllocation;
            }else{
                hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned);
            }
            tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
        } else if(flags & hipHostMallocMapped){
            *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
            if(sizeBytes && (*ptr == NULL)){
                hip_status = hipErrorMemoryAllocation;
            }else{
                hc::am_memtracker_update(*ptr, device->_device_index, flags);
                {
                    LockedAccessor_DeviceCrit_t crit(device->criticalData());
                    if (crit->peerCnt()) {
                        hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
                    }
                }
            }
            tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
        }
    }
    return ihipLogStatus(hip_status);
}
Ejemplo n.º 3
0
//-------------------------------------------------------------------------------------------------
UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, 
                                       bool isLargeBar, int thresholdH2DDirectStaging, 
                                       int thresholdH2DStagingPinInPlace, int thresholdD2H) :
    _hsaAgent(hsaAgent),
    _cpuAgent(cpuAgent),
    _bufferSize(bufferSize),
    _numBuffers(numBuffers > _max_buffers ? _max_buffers : numBuffers),
    _isLargeBar(isLargeBar),
    _hipH2DTransferThresholdDirectOrStaging(thresholdH2DDirectStaging),
    _hipH2DTransferThresholdStagingOrPininplace(thresholdH2DStagingPinInPlace),
    _hipD2HTransferThreshold(thresholdD2H)
{
    hsa_amd_memory_pool_t sys_pool;
    hsa_status_t err = hsa_amd_agent_iterate_memory_pools(_cpuAgent, findGlobalPool, &sys_pool);

    // Generate a packed C-style array of agents, for use below with hsa_amd_agents_allow_access
    std::vector<hsa_agent_t> agents;
    err = hsa_iterate_agents(&find_gpu, &agents);
    ErrorCheck(err);
    hsa_agent_t * agentBlock = new hsa_agent_t[agents.size()];
    int i=0;
    for (auto iter=agents.begin(); iter!= agents.end(); iter++) {
        agentBlock[i++] = *iter;
        assert (i<=agents.size());
    };

    ErrorCheck(err);
    for (int i=0; i<_numBuffers; i++) {
        // TODO - experiment with alignment here.
        err = hsa_amd_memory_pool_allocate(sys_pool, _bufferSize, 0, (void**)(&_pinnedStagingBuffer[i]));
        ErrorCheck(err);

        if ((err != HSA_STATUS_SUCCESS) || (_pinnedStagingBuffer[i] == NULL)) {
            THROW_ERROR(hipErrorMemoryAllocation, err);
        }

        // Allow access from every agent:
        // This is used in peer-to-peer copies, since we use the buffers to copy from different agents.
        // TODO - may want to review this algorithm for NUMA locality - it might be faster to use staging buffer closer to devices?
        err = hsa_amd_agents_allow_access(agents.size(), agentBlock, NULL, _pinnedStagingBuffer[i]);
        ErrorCheck(err);

        hsa_signal_create(0, 0, NULL, &_completionSignal[i]);
        hsa_signal_create(0, 0, NULL, &_completionSignal2[i]);
    }

};