//--- hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { std::call_once(hip_initialized, ihipInit); ihipEvent_t *eh = event._handle; if (eh && eh->_state != hipEventStatusUnitialized) { eh->_stream = stream; if (stream == NULL) { // If stream == NULL, wait on all queues. // Behavior matches "use standard default semantics". // TODO-HCC fix this - conservative or use device timestamps? // TODO-HCC can we use barrier or event marker to implement better solution? ihipDevice_t *device = ihipGetTlsDefaultDevice(); device->locked_syncDefaultStream(true); eh->_timestamp = hc::get_system_ticks(); eh->_state = hipEventStatusRecorded; return ihipLogStatus(hipSuccess); } else { eh->_state = hipEventStatusRecording; // Clear timestamps eh->_timestamp = 0; eh->_marker = stream->_av.create_marker(); eh->_copy_seq_id = stream->locked_lastCopySeqId(); return ihipLogStatus(hipSuccess); } } else { return ihipLogStatus(hipErrorInvalidResourceHandle); } }
//--- hipError_t hipEventSynchronize(hipEvent_t event) { std::call_once(hip_initialized, ihipInit); ihipEvent_t *eh = event._handle; if (eh) { if (eh->_state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidResourceHandle); } else if (eh->_state == hipEventStatusCreated ) { // Created but not actually recorded on any device: return ihipLogStatus(hipSuccess); } else if (eh->_stream == NULL) { ihipDevice_t *device = ihipGetTlsDefaultDevice(); device->locked_syncDefaultStream(true); return ihipLogStatus(hipSuccess); } else { #if __hcc_workweek__ >= 16033 eh->_marker.wait((eh->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); #else eh->_marker.wait(); #endif eh->_stream->locked_reclaimSignals(eh->_copy_seq_id); return ihipLogStatus(hipSuccess); } } else { return ihipLogStatus(hipErrorInvalidResourceHandle); } }
//--- hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { HIP_INIT_API(hipStreamGetFlags, stream, flags); if (flags == NULL) { return ihipLogStatus(hipErrorInvalidValue); } else if (stream == hipStreamNull) { return ihipLogStatus(hipErrorInvalidResourceHandle); } else { *flags = stream->_flags; return ihipLogStatus(hipSuccess); } }
//--- hipError_t hipEventQuery(hipEvent_t event) { std::call_once(hip_initialized, ihipInit); ihipEvent_t *eh = event._handle; // TODO-stream - need to read state of signal here: The event may have become ready after recording.. // TODO-HCC - use get_hsa_signal here. if (eh->_state == hipEventStatusRecording) { return ihipLogStatus(hipErrorNotReady); } else { return ihipLogStatus(hipSuccess); } }
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(ptr, sizeBytes, flags); hipError_t hip_status = hipSuccess; auto device = ihipGetTlsDefaultDevice(); if(device){ if(flags == hipHostMallocDefault){ *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; }else{ hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned); } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } else if(flags & hipHostMallocMapped){ *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); if(sizeBytes && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; }else{ hc::am_memtracker_update(*ptr, device->_device_index, flags); { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); } } } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } } return ihipLogStatus(hip_status); }
/** * @return #hipSuccess, #hipErrorInvalidResourceHandle */ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(stream); hipError_t e = hipSuccess; //--- Drain the stream: if (stream == NULL) { ihipDevice_t *device = ihipGetTlsDefaultDevice(); device->locked_syncDefaultStream(true/*waitOnSelf*/); } else { stream->locked_wait(); e = hipSuccess; } ihipDevice_t *device = stream->getDevice(); if (device) { device->locked_removeStream(stream); delete stream; } else { e = hipErrorInvalidResourceHandle; } return ihipLogStatus(e); }
/** * @return #hipSuccess, #hipErrorInvalidResourceHandle */ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(hipStreamDestroy, stream); hipError_t e = hipSuccess; //--- Drain the stream: if (stream == NULL) { if (!HIP_FORCE_NULL_STREAM) { e = hipErrorInvalidResourceHandle; } } else { stream->locked_wait(); ihipCtx_t* ctx = stream->getCtx(); if (ctx) { ctx->locked_removeStream(stream); delete stream; } else { e = hipErrorInvalidResourceHandle; } } return ihipLogStatus(e); }
/* * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue (if free != NULL due to bug)S * @warning On HCC, the free memory only accounts for memory allocated by this process and may be optimistic. */ hipError_t hipMemGetInfo (size_t *free, size_t *total) { HIP_INIT_API(free, total); hipError_t e = hipSuccess; ihipDevice_t * hipDevice = ihipGetTlsDefaultDevice(); if (hipDevice) { if (total) { *total = hipDevice->_props.totalGlobalMem; } if (free) { // TODO - replace with kernel-level for reporting free memory: size_t deviceMemSize, hostMemSize, userMemSize; hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize); printf ("deviceMemSize=%zu\n", deviceMemSize); *free = hipDevice->_props.totalGlobalMem - deviceMemSize; } } else { e = hipErrorInvalidDevice; } return ihipLogStatus(e); }
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags); hipError_t e = hipSuccess; auto ecd = event->locked_copyCrit(); if (event == nullptr) { e = hipErrorInvalidResourceHandle; } else if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { // conservative wait on host for the specified event to complete: // return _stream->locked_eventWaitComplete(this, waitMode); // ecd._stream->locked_eventWaitComplete( ecd.marker(), (event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { stream = ihipSyncAndResolveStream(stream); // This will use create_blocking_marker to wait on the specified queue. stream->locked_streamWaitEvent(ecd); } } // else event not recorded, return immediately and don't create marker. return ihipLogStatus(e); };
//--- hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); if (leastPriority != NULL) *leastPriority = priority_low; if (greatestPriority != NULL) *greatestPriority = priority_high; return ihipLogStatus(hipSuccess); }
//--- hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(stream, flags); return ihipLogStatus(ihipStreamCreate(stream, flags)); }
/** * @returns #hipSuccess #hipErrorMemoryAllocation */ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_INIT_API(ptr, sizeBytes); hipError_t hip_status = hipSuccess; auto device = ihipGetTlsDefaultDevice(); if (device) { const unsigned am_flags = 0; *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; } else { hc::am_memtracker_update(*ptr, device->_device_index, 0); { LockedAccessor_DeviceCrit_t crit(device->criticalData()); if (crit->peerCnt()) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); } } } } else { hip_status = hipErrorMemoryAllocation; } return ihipLogStatus(hip_status); }
//--- hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); // clamp priority to range [priority_high:priority_low] priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority)); return ihipLogStatus(ihipStreamCreate(stream, flags, priority)); }
hipError_t hipPeekAtLastError() { HIP_INIT_API(); // peek at last error, but don't reset it. return ihipLogStatus(tls_lastHipError); }
hipError_t hipGetLastError() { HIP_INIT_API(hipGetLastError); // Return last error, but then reset the state: hipError_t e = ihipLogStatus(tls_lastHipError); tls_lastHipError = hipSuccess; return e; }
//-- hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { HIP_INIT_API(hipStreamGetPriority, stream, priority); if (priority == NULL) { return ihipLogStatus(hipErrorInvalidValue); } else if (stream == hipStreamNull) { return ihipLogStatus(hipErrorInvalidResourceHandle); } else { #if defined(__HCC__) && (__hcc_minor__ < 3) *priority = 0; #else LockedAccessor_StreamCrit_t crit(stream->_criticalData); *priority = crit->_av.get_queue_priority(); #endif return ihipLogStatus(hipSuccess); } }
//--- hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hostPtr, sizeBytes, flags); hipError_t hip_status = hipSuccess; auto device = ihipGetTlsDefaultDevice(); if(hostPtr == NULL){ return ihipLogStatus(hipErrorInvalidValue); } hc::accelerator acc; hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); am_status_t am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr); if(am_status == AM_SUCCESS){ hip_status = hipErrorHostMemoryAlreadyRegistered; }else{ auto device = ihipGetTlsDefaultDevice(); if(hostPtr == NULL){ return ihipLogStatus(hipErrorInvalidValue); } if(device){ if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){ std::vector<hc::accelerator>vecAcc; for(int i=0;i<g_deviceCnt;i++){ vecAcc.push_back(g_devices[i]._acc); } #if USE_HCC_LOCK_API am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); #else am_status = AM_ERROR_MISC; #endif if(am_status == AM_SUCCESS){ hip_status = hipSuccess; }else{ hip_status = hipErrorMemoryAllocation; } }else{ hip_status = hipErrorInvalidValue; } } } return ihipLogStatus(hip_status); }
//--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, src, count, offset, kind); #ifdef USE_MEMCPYTOSYMBOL if(kind != hipMemcpyHostToDevice) { return ihipLogStatus(hipErrorInvalidValue); } auto device = ihipGetTlsDefaultDevice(); //hsa_signal_t depSignal; //int depSignalCnt = device._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D); assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL. device->_acc.memcpy_symbol(symbolName, (void*) src,count, offset); #endif return ihipLogStatus(hipSuccess); }
//--- hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); hipError_t e = hipSuccess; // Create a thread in detached mode to handle callback ihipStreamCallback_t* cb = new ihipStreamCallback_t(stream, callback, userData); std::thread(ihipStreamCallbackHandler, cb).detach(); return ihipLogStatus(e); }
//--- hipError_t hipEventDestroy(hipEvent_t event) { std::call_once(hip_initialized, ihipInit); event._handle->_state = hipEventStatusUnitialized; delete event._handle; event._handle = NULL; // TODO - examine return additional error codes return ihipLogStatus(hipSuccess); }
hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) { hipStream_t stream = hipStreamNull; // TODO - call an ihip memset so HIP_TRACE is correct. HIP_INIT_API(dst, value, sizeBytes, stream); hipError_t e = hipSuccess; stream = ihipSyncAndResolveStream(stream); if (stream) { stream->lockopen_preKernelCommand(); hc::completion_future cf ; if ((sizeBytes & 0x3) == 0) { // use a faster dword-per-workitem copy: try { value = value & 0xff; unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; cf = ihipMemsetKernel<unsigned> (stream, static_cast<unsigned*> (dst), value32, sizeBytes/sizeof(unsigned)); } catch (std::exception &ex) { e = hipErrorInvalidValue; } } else { // use a slow byte-per-workitem copy: try { cf = ihipMemsetKernel<char> (stream, static_cast<char*> (dst), value, sizeBytes); } catch (std::exception &ex) { e = hipErrorInvalidValue; } } cf.wait(); stream->lockclose_postKernelCommand(cf); if (HIP_LAUNCH_BLOCKING) { tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset [stream:%p].\n", __func__, (void*)stream); cf.wait(); tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed [stream:%p].\n", __func__, (void*)stream); } } else { e = hipErrorInvalidValue; } return ihipLogStatus(e); }
/** * @bug This function conservatively waits for all work in the specified stream to complete. */ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(stream, event, flags); hipError_t e = hipSuccess; { // TODO-hcc Convert to use create_blocking_marker(...) functionality. // Currently we have a super-conservative version of this - block on host, and drain the queue. // This should create a barrier packet in the target queue. stream->locked_wait(); e = hipSuccess; } return ihipLogStatus(e); };
//--- hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); hipError_t e = hipSuccess; if (stream == NULL) { ihipDevice_t *device = ihipGetTlsDefaultDevice(); device->locked_syncDefaultStream(true/*waitOnSelf*/); } else { stream->locked_wait(); e = hipSuccess; } return ihipLogStatus(e); };
//--- hipError_t hipHostUnregister(void *hostPtr) { HIP_INIT_API(hostPtr); auto device = ihipGetTlsDefaultDevice(); hipError_t hip_status = hipSuccess; if(hostPtr == NULL){ hip_status = hipErrorInvalidValue; }else{ #if USE_HCC_LOCK_API am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr); #else am_status_t am_status = AM_ERROR_MISC; #endif if(am_status != AM_SUCCESS){ hip_status = hipErrorHostMemoryNotRegistered; } } return ihipLogStatus(hip_status); }
/** * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice */ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) { std::call_once(hip_initialized, ihipInit); hipError_t e = hipSuccess; hc::accelerator acc; hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if (status == AM_SUCCESS) { attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost; attributes->hostPointer = amPointerInfo._hostPointer; attributes->devicePointer = amPointerInfo._devicePointer; attributes->isManaged = 0; if(attributes->memoryType == hipMemoryTypeHost){ attributes->hostPointer = ptr; } if(attributes->memoryType == hipMemoryTypeDevice){ attributes->devicePointer = ptr; } attributes->allocationFlags = amPointerInfo._appAllocationFlags; attributes->device = amPointerInfo._appId; if (attributes->device < 0) { e = hipErrorInvalidDevice; } } else { attributes->memoryType = hipMemoryTypeDevice; attributes->hostPointer = 0; attributes->devicePointer = 0; attributes->device = -1; attributes->isManaged = 0; attributes->allocationFlags = 0; e = hipErrorUnknown; // TODO - should be hipErrorInvalidValue ? } return ihipLogStatus(e); }
//--- hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) { std::call_once(hip_initialized, ihipInit); ihipEvent_t *start_eh = start._handle; ihipEvent_t *stop_eh = stop._handle; ihipSetTs(start); ihipSetTs(stop); hipError_t status = hipSuccess; *ms = 0.0f; if (start_eh && stop_eh) { if ((start_eh->_state == hipEventStatusRecorded) && (stop_eh->_state == hipEventStatusRecorded)) { // Common case, we have good information for both events. int64_t tickDiff = (stop_eh->_timestamp - start_eh->_timestamp); // TODO-move this to a variable saved with each agent. uint64_t freqHz; hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); if (freqHz) { *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; status = hipSuccess; } else { * ms = 0.0f; status = hipErrorInvalidValue; } } else if ((start_eh->_state == hipEventStatusRecording) || (stop_eh->_state == hipEventStatusRecording)) { status = hipErrorNotReady; } else if ((start_eh->_state == hipEventStatusUnitialized) || (stop_eh->_state == hipEventStatusUnitialized)) { status = hipErrorInvalidResourceHandle; } } return ihipLogStatus(status); }
//--- hipError_t hipStreamQuery(hipStream_t stream) { HIP_INIT_SPECIAL_API(hipStreamQuery, TRACE_QUERY, stream); // Use default stream if 0 specified: if (stream == hipStreamNull) { ihipCtx_t* device = ihipGetTlsDefaultCtx(); stream = device->_defaultStream; } bool isEmpty = 0; { LockedAccessor_StreamCrit_t crit(stream->_criticalData); isEmpty = crit->_av.get_is_empty(); } hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady; return ihipLogStatus(e); }
/** * @returns #hipSuccess, * @returns #hipErrorInvalidValue if flags are not 0 * @returns #hipErrorMemoryAllocation if hostPointer is not a tracked allocation. */ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsigned flags) { std::call_once(hip_initialized, ihipInit); hipError_t e = hipSuccess; // Flags must be 0: if (flags != 0) { e = hipErrorInvalidValue; } else { hc::accelerator acc; hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer); if (status == AM_SUCCESS) { *devicePointer = amPointerInfo._devicePointer; } else { e = hipErrorMemoryAllocation; *devicePointer = NULL; } } return ihipLogStatus(e); }
hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); // TODO - ensure this pointer was created by hipMallocHost and not hipMalloc std::call_once(hip_initialized, ihipInit); hipError_t hipStatus = hipErrorInvalidDevicePointer; if (ptr) { hc::accelerator acc; hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if(status == AM_SUCCESS){ if(amPointerInfo._hostPointer == ptr){ hc::am_free(ptr); hipStatus = hipSuccess; } } } return ihipLogStatus(hipStatus); };
//--- hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(dst, src, sizeBytes, kind); hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); hc::completion_future marker; hipError_t e = hipSuccess; try { stream->locked_copySync(dst, src, sizeBytes, kind); } catch (ihipException ex) { e = ex._code; } return ihipLogStatus(e); }