extern "C" void _Offload_shared_aligned_arena_free( MyoArena arena, void *ptr ) { OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr); if (__offload_myoLoadLibrary()) { myo_wrapper.SharedAlignedArenaFree(arena, ptr); } else { _mm_free(ptr); } }
extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align) { OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align); if (__offload_myoLoadLibrary()) { return myo_wrapper.SharedAlignedMalloc(size, align); } else { if (align < sizeof(void*)) { align = sizeof(void*); } return _mm_malloc(size, align); } }
static void __offload_myo_shared_vtable_process( SharedTableEntry *entry ) { SharedTableEntry *start = entry; int entries = 0; OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); // allocate shared memory for vtables for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) { #ifdef TARGET_WINNT if (entry->varName == 0) { OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedVTable entry\n"); continue; } #endif // TARGET_WINNT OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedVTable entry for %s @%p\n", entry->varName, entry); // Invoke the function to create shared memory reinterpret_cast<void(*)(MyoArena)>(entry->sharedAddr)( myo_wrapper.GetVtableArena()); entries++; } // add table to the list if it is not empty if (entries > 0) { mutex_locker_t locker(__myo_table_lock); __myo_table_list.push_back(MyoTable(start, entries)); __myo_tables = true; } }
extern void *OFFLOAD_MALLOC( size_t size, size_t align ) { void *ptr; int err; OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align); if (align < sizeof(void*)) { align = sizeof(void*); } ptr = _mm_malloc(size, align); if (ptr == NULL) { LIBOFFLOAD_ERROR(c_offload_malloc, size, align); exit(1); } OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr); return ptr; }
extern "C" void __offload_myoiRemoteIThunkCall( void *thunk, void *arg, int target_number ) { OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg, target_number); myo_wrapper.Release(); myo_wrapper.RemoteThunkCall(thunk, arg, target_number); myo_wrapper.Acquire(); ORSL::release(target_number); }
void __offload_myo_shared_init_table_process(InitTableEntry* entry) { OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); #ifdef TARGET_WINNT for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { if (entry->funcName == 0) { OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n"); continue; } // Invoke the function to init the shared memory OFFLOAD_DEBUG_TRACE(4, "execute MyoSharedInit routine for %s\n", entry->funcName); entry->func(myo_wrapper.GetVtableArena()); } #else // TARGET_WINNT for (; entry->func != 0; entry++) { // Invoke the function to init the shared memory entry->func(myo_wrapper.GetVtableArena()); } #endif // TARGET_WINNT }
// Process contents of all Init tables void MYOInitTableList::process() { OFFLOAD_DEBUG_TRACE(2, "Process MYO Init tables:\n"); m_lock.lock(); for (Node *n = m_head; n != 0; n = n->next) { __offload_myo_shared_init_table_process( (InitTableEntry*)n->table.entries); } for (Node *n = m_head; n != 0; n = n->next) { remove_table(n); } m_lock.unlock(); }
void MyoWrapper::CreateVtableArena() { MyoArena* vtable_arena; // Check if this MYO supports arenas for vtables if (myo_wrapper.PostInitFuncSupported()) { // Create arena for vtables vtable_arena = (MyoArena *)myo_wrapper.SharedMalloc(sizeof(MyoArena)); myo_wrapper.ArenaCreate( MYO_ARENA_OURS, MYO_NO_CONSISTENCY, vtable_arena); m_vtable_arena = *vtable_arena; OFFLOAD_DEBUG_TRACE(4, "created arena = %d\n", m_vtable_arena); } else { m_vtable_arena = 0; } }
static bool fptr_table_entries( FptrTableEntry *entry ) { OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { #ifdef TARGET_WINNT if (entry->funcName == 0) { continue; } #endif // TARGET_WINNT return true; } return false; }
extern "C" void __offload_myoRegisterTables( SharedTableEntry *shared_table, FptrTableEntry *fptr_table ) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); // one time registration of Intel(R) Cilk(TM) language entries static pthread_once_t once_control = PTHREAD_ONCE_INIT; pthread_once(&once_control, __offload_myo_once_init); // register module's tables if (shared_table->varName == 0 && fptr_table->funcName == 0) { return; } __offload_myo_shared_table_register(shared_table); __offload_myo_fptr_table_register(fptr_table); }
extern "C" int __offload_myoIsAvailable(int target_number) { OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number); if (target_number >= -2) { bool is_default_number = (target_number == -2); if (__offload_myoInit()) { if (target_number >= 0) { // User provided the device number int num = target_number % mic_engines_total; // reserve device in ORSL target_number = ORSL::reserve(num) ? num : -1; } else { // try to use device 0 target_number = ORSL::reserve(0) ? 0 : -1; } // make sure device is initialized if (target_number >= 0) { mic_engines[target_number].init(); } } else { // fallback to CPU target_number = -1; } if (target_number < 0 && !is_default_number) { LIBOFFLOAD_ERROR(c_device_is_not_available); exit(1); } } else { LIBOFFLOAD_ERROR(c_invalid_device_number); exit(1); } return target_number; }
extern "C" bool __offload_myoProcessTables( const void* image, MYOInitTableList::Node *init_table, MYOVarTableList::Node *shared_table, MYOVarTableList::Node *shared_vtable, MYOFuncTableList::Node *fptr_table ) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); // Collect the tables in this .dll/.so __offload_myoRegisterTables1( init_table, shared_table, shared_vtable, fptr_table); // Now check what type of module we are dealing with if (__offload_target_image_is_executable(image)) { OFFLOAD_DEBUG_TRACE(2, "Main encountered\n"); OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n"); // MYO tables across dlls have been collected // Now init MYO and process the tables __offload_myoProcessDeferredTables(); // Return true to indicate that atexit needs to be calld by ofldbegin return true; } else { // This is a shared library, either auto-loaded or dynamically loaded // If __target_exe is set, then main has started running if (__target_exe != 0) { // Main is running: this is a dynamic load of a shared library // Finish processing the tables in this library OFFLOAD_DEBUG_TRACE(2, "Dynamically loaded shared library encountered\n"); OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n"); __offload_myoProcessDeferredTables(); } else { // Main is not running: this is an auto-loaded shared library // Tables have been collected, nothing else to do OFFLOAD_DEBUG_TRACE(2, "Auto-loaded shared library encountered\n"); OFFLOAD_DEBUG_TRACE(2, "Deferring initialization of MYO\n"); } return false; } }
void Engine::unload_library(const void *data, const char *name) { if (m_process == 0) { return; } for (DynLibList::iterator it = m_dyn_libs.begin(); it != m_dyn_libs.end(); it++) { if (it->data == data) { COIRESULT res; OFFLOAD_DEBUG_TRACE(2, "Unloading library \"%s\"\n",name); res = COI::ProcessUnloadLibrary(m_process,it->lib); m_dyn_libs.erase(it); if (res != COI_SUCCESS) { check_result(res, c_unload_library, m_index, res); } return; } } }
pid_t Engine::init_device(void) { struct init_data { int device_index; int devices_total; int console_level; int offload_report_level; } data; COIRESULT res; COIEVENT event; pid_t pid; OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init, "Initializing device with logical index %d " "and physical index %d\n", m_index, m_physical_index); // setup misc data data.device_index = m_index; data.devices_total = mic_engines_total; data.console_level = console_enabled; data.offload_report_level = offload_report_level; res = COI::PipelineRunFunction(get_pipeline(), m_funcs[c_func_init], 0, 0, 0, 0, 0, &data, sizeof(data), &pid, sizeof(pid), &event); check_result(res, c_pipeline_run_func, m_index, res); res = COI::EventWait(1, &event, -1, 1, 0, 0); check_result(res, c_event_wait, res); OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid); return pid; }
DLL_LOCAL void __offload_myoFini(void) { if (myo_is_available) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); COIEVENT events[MIC_ENGINES_MAX]; // kick off myoiLibFini calls on all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].fini_myo(&events[i]); } // cleanup myo runtime on host myo_wrapper.LibFini(); // wait for the target fini calls to finish COIRESULT res; res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); if (res != COI_SUCCESS) { LIBOFFLOAD_ERROR(c_event_wait, res); exit(1); } } }
void OffloadDescriptor::offload( uint32_t buffer_count, void** buffers, void* misc_data, uint16_t misc_data_len, void* return_data, uint16_t return_data_len ) { FunctionDescriptor *func = (FunctionDescriptor*) misc_data; const char *name = func->data; OffloadDescriptor ofld; char *in_data = 0; char *out_data = 0; char *timer_data = 0; console_enabled = func->console_enabled; timer_enabled = func->timer_enabled; offload_report_level = func->offload_report_level; offload_number = func->offload_number; ofld.set_offload_number(func->offload_number); #ifdef SEP_SUPPORT if (sep_monitor) { if (__sync_fetch_and_add(&sep_counter, 1) == 0) { OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); VTResumeSampling(); } } #endif // SEP_SUPPORT OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), c_offload_start_target_func, "Offload \"%s\" started\n", name); // initialize timer data OFFLOAD_TIMER_INIT(); OFFLOAD_TIMER_START(c_offload_target_total_time); OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); // get input/output buffer addresses if (func->in_datalen > 0 || func->out_datalen > 0) { if (func->data_offset != 0) { in_data = (char*) misc_data + func->data_offset; out_data = (char*) return_data; } else { char *inout_buf = (char*) buffers[--buffer_count]; in_data = inout_buf; out_data = inout_buf; } } // assign variable descriptors ofld.m_vars_total = func->vars_num; if (ofld.m_vars_total > 0) { uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); ofld.m_vars = (VarDesc*) malloc(var_data_len); if (ofld.m_vars == NULL) LIBOFFLOAD_ERROR(c_malloc); memcpy(ofld.m_vars, in_data, var_data_len); in_data += var_data_len; func->in_datalen -= var_data_len; } // timer data if (func->timer_enabled) { uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); timer_data = out_data; out_data += timer_data_len; func->out_datalen -= timer_data_len; } // init Marshallers ofld.m_in.init_buffer(in_data, func->in_datalen); ofld.m_out.init_buffer(out_data, func->out_datalen); // copy buffers to offload descriptor std::copy(buffers, buffers + buffer_count, std::back_inserter(ofld.m_buffers)); OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); // find offload entry address OFFLOAD_TIMER_START(c_offload_target_func_lookup); offload_func_with_parms entry = (offload_func_with_parms) __offload_entries.find_addr(name); if (entry == NULL) { #if OFFLOAD_DEBUG > 0 if (console_enabled > 2) { __offload_entries.dump(); } #endif LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); exit(1); } OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); OFFLOAD_TIMER_START(c_offload_target_func_time); // execute offload entry entry(&ofld); OFFLOAD_TIMER_STOP(c_offload_target_func_time); OFFLOAD_TIMER_STOP(c_offload_target_total_time); // copy timer data to the buffer OFFLOAD_TIMER_TARGET_DATA(timer_data); OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); #ifdef SEP_SUPPORT if (sep_monitor) { if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); VTPauseSampling(); } } #endif // SEP_SUPPORT }
void Engine::init_process(void) { COIENGINE engine; COIRESULT res; const char **environ; char buf[4096]; // For exe path name // create environment for the target process environ = (const char**) mic_env_vars.create_environ_for_card(m_index); if (environ != 0) { for (const char **p = environ; *p != 0; p++) { OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); } } // Create execution context in the specified device OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, m_physical_index); res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine); check_result(res, c_get_engine_handle, m_index, res); // Get engine info on threads and cores. // The values of core number and thread number will be used later at stream // creation by call to _Offload_stream_create(device,number_of_cpus). COI_ENGINE_INFO engine_info; res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info); check_result(res, c_get_engine_info, m_index, res); // m_cpus bitset has 1 for available thread. At the begining all threads // are available and m_cpus(i) is set to // 1 for i = [0...engine_info.NumThreads]. m_cpus.reset(); for (int i = 0; i < engine_info.NumThreads; i++) { m_cpus.set(i); } // The following values will be used at pipeline creation for streams m_num_cores = engine_info.NumCores; m_num_threads = engine_info.NumThreads; // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2 // Only the value 2 is supported in 16.0 if (mic_dma_channel_count == 2) { if (COI::ProcessConfigureDMA) { // Set DMA channels using COI API COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE); } else { // Set environment variable COI_DMA_CHANNEL_COUNT // use putenv instead of setenv as Windows has no setenv. // Note: putenv requires its argument can't be freed or modified. // So no free after call to putenv or elsewhere. char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2"); if (env_var == NULL) LIBOFFLOAD_ERROR(c_malloc); putenv(env_var); } } // Target executable is not available then use compiler provided offload_main if (__target_exe == 0) { if (mic_device_main == 0) LIBOFFLOAD_ERROR(c_report_no_host_exe); OFFLOAD_DEBUG_TRACE(2, "Loading target executable %s\n",mic_device_main); res = COI::ProcessCreateFromFile( engine, // in_Engine mic_device_main, // in_pBinaryName 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath &m_process // out_pProcess ); } else { // Target executable should be available by the time when we // attempt to initialize the device // Need the full path of the FAT exe for VTUNE { #ifndef TARGET_WINNT ssize_t len = readlink("/proc/self/exe", buf,1000); #else int len = GetModuleFileName(NULL, buf,1000); #endif // TARGET_WINNT if (len == -1) { LIBOFFLOAD_ERROR(c_report_no_host_exe); exit(1); } else if (len > 999) { LIBOFFLOAD_ERROR(c_report_path_buff_overflow); exit(1); } buf[len] = '\0'; } OFFLOAD_DEBUG_TRACE(2, "Loading target executable \"%s\" from %p, size %lld, host file %s\n", __target_exe->name, __target_exe->data, __target_exe->size, buf); res = COI::ProcessCreateFromMemory( engine, // in_Engine __target_exe->name, // in_pBinaryName __target_exe->data, // in_pBinaryBuffer __target_exe->size, // in_BinaryBufferLength, 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath buf, // in_FileOfOrigin -1, // in_FileOfOriginOffset use -1 to indicate to // COI that is is a FAT binary &m_process // out_pProcess ); } check_result(res, c_process_create, m_index, res); if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) { // available only in MPSS 4.2 and greater if (COI::ProcessSetCacheSize != 0 ) { int flags; // Need compiler to use MPSS 3.2 or greater to get these // definition so currently hardcoding it // COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC; flags = 0x00020002; res = COI::ProcessSetCacheSize( m_process, // in_Process mic_2m_buffer_size, // in_HugePagePoolSize flags, // inHugeFlags mic_4k_buffer_size, // in_SmallPagePoolSize flags, // inSmallFlags 0, // in_NumDependencies 0, // in_pDependencies 0 // out_PCompletion ); OFFLOAD_DEBUG_TRACE(2, "Reserve target buffers 4K pages = %d 2M pages = %d\n", mic_4k_buffer_size, mic_2m_buffer_size); check_result(res, c_process_set_cache_size, m_index, res); } else { OFFLOAD_DEBUG_TRACE(2, "Reserve target buffers not supported in current MPSS\n"); } } // get function handles res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, m_func_names, m_funcs); check_result(res, c_process_get_func_handles, m_index, res); // initialize device side pid_t pid = init_device(); // For IDB if (__dbg_is_attached) { // TODO: we have in-memory executable now. // Check with IDB team what should we provide them now? if (strlen(__target_exe->name) < MAX_TARGET_NAME) { strcpy(__dbg_target_exe_name, __target_exe->name); } __dbg_target_so_pid = pid; __dbg_target_id = m_physical_index; __dbg_target_so_loaded(); } }
void Engine::init_process(void) { COIENGINE engine; COIRESULT res; const char **environ; // create environment for the target process environ = (const char**) mic_env_vars.create_environ_for_card(m_index); if (environ != 0) { for (const char **p = environ; *p != 0; p++) { OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); } } // Create execution context in the specified device OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, m_physical_index); res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine); check_result(res, c_get_engine_handle, m_index, res); // Target executable should be available by the time when we // attempt to initialize the device if (__target_exe == 0) { LIBOFFLOAD_ERROR(c_no_target_exe); exit(1); } OFFLOAD_DEBUG_TRACE(2, "Loading target executable \"%s\" from %p, size %lld\n", __target_exe->name, __target_exe->data, __target_exe->size); res = COI::ProcessCreateFromMemory( engine, // in_Engine __target_exe->name, // in_pBinaryName __target_exe->data, // in_pBinaryBuffer __target_exe->size, // in_BinaryBufferLength, 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath __target_exe->origin, // in_FileOfOrigin __target_exe->offset, // in_FileOfOriginOffset &m_process // out_pProcess ); check_result(res, c_process_create, m_index, res); // get function handles res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, m_func_names, m_funcs); check_result(res, c_process_get_func_handles, m_index, res); // initialize device side pid_t pid = init_device(); // For IDB if (__dbg_is_attached) { // TODO: we have in-memory executable now. // Check with IDB team what should we provide them now? if (strlen(__target_exe->name) < MAX_TARGET_NAME) { strcpy(__dbg_target_exe_name, __target_exe->name); } __dbg_target_so_pid = pid; __dbg_target_id = m_physical_index; __dbg_target_so_loaded(); } }
bool init(void) { #ifndef TARGET_WINNT const char *lib_name = "libcoi_host.so.0"; #else // TARGET_WINNT const char *lib_name = "coi_host.dll"; #endif // TARGET_WINNT OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name); lib_handle = DL_open(lib_name); if (lib_handle == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n"); return false; } EngineGetCount = (COIRESULT (*)(COI_ISA_TYPE, uint32_t*)) DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1); if (EngineGetCount == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEngineGetCount"); fini(); return false; } EngineGetHandle = (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*)) DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1); if (EngineGetHandle == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEngineGetHandle"); fini(); return false; } ProcessCreateFromMemory = (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int, const char**, uint8_t, const char**, uint8_t, const char*, uint64_t, const char*, const char*, uint64_t, COIPROCESS*)) DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1); if (ProcessCreateFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessCreateFromMemory"); fini(); return false; } ProcessDestroy = (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*)) DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1); if (ProcessDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessDestroy"); fini(); return false; } ProcessGetFunctionHandles = (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*)) DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1); if (ProcessGetFunctionHandles == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessGetFunctionHandles"); fini(); return false; } ProcessLoadLibraryFromMemory = (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*, const char*, const char*, uint64_t, uint32_t, COILIBRARY*)) DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2); if (ProcessLoadLibraryFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessLoadLibraryFromMemory"); fini(); return false; } ProcessRegisterLibraries = (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**, const uint64_t*)) DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1); if (ProcessRegisterLibraries == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessRegisterLibraries"); fini(); return false; } PipelineCreate = (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*)) DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1); if (PipelineCreate == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineCreate"); fini(); return false; } PipelineDestroy = (COIRESULT (*)(COIPIPELINE)) DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1); if (PipelineDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineDestroy"); fini(); return false; } PipelineRunFunction = (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*, const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*, const void*, uint16_t, void*, uint16_t, COIEVENT*)) DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1); if (PipelineRunFunction == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineRunFunction"); fini(); return false; } BufferCreate = (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*, uint32_t, const COIPROCESS*, COIBUFFER*)) DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1); if (BufferCreate == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCreate"); fini(); return false; } BufferCreateFromMemory = (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*, uint32_t, const COIPROCESS*, COIBUFFER*)) DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1); if (BufferCreateFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCreateFromMemory"); fini(); return false; } BufferDestroy = (COIRESULT (*)(COIBUFFER)) DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1); if (BufferDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferDestroy"); fini(); return false; } BufferMap = (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t, const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**)) DL_sym(lib_handle, "COIBufferMap", COI_VERSION1); if (BufferMap == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferMap"); fini(); return false; } BufferUnmap = (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1); if (BufferUnmap == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferUnmap"); fini(); return false; } BufferWrite = (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1); if (BufferWrite == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferWrite"); fini(); return false; } BufferRead = (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferRead", COI_VERSION1); if (BufferRead == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferRead"); fini(); return false; } BufferCopy = (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1); if (BufferCopy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCopy"); fini(); return false; } BufferGetSinkAddress = (COIRESULT (*)(COIBUFFER, uint64_t*)) DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1); if (BufferGetSinkAddress == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferGetSinkAddress"); fini(); return false; } BufferSetState = (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE, COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1); if (BufferSetState == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferSetState"); fini(); return false; } EventWait = (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*, uint32_t*)) DL_sym(lib_handle, "COIEventWait", COI_VERSION1); if (EventWait == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEventWait"); fini(); return false; } PerfGetCycleFrequency = (uint64_t (*)(void)) DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1); if (PerfGetCycleFrequency == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPerfGetCycleFrequency"); fini(); return false; } is_available = true; return true; }
bool init(void) { #ifndef TARGET_WINNT const char *lib_name = "libcoi_host.so.0"; #else // TARGET_WINNT const char *lib_name = "coi_host.dll"; #endif // TARGET_WINNT OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name); lib_handle = DL_open(lib_name); if (lib_handle == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n"); return false; } EngineGetCount = (COIRESULT (*)(COI_ISA_TYPE, uint32_t*)) DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1); if (EngineGetCount == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEngineGetCount"); fini(); return false; } EngineGetHandle = (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*)) DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1); if (EngineGetHandle == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEngineGetHandle"); fini(); return false; } ProcessCreateFromMemory = (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int, const char**, uint8_t, const char**, uint8_t, const char*, uint64_t, const char*, const char*, uint64_t, COIPROCESS*)) DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1); if (ProcessCreateFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessCreateFromMemory"); fini(); return false; } ProcessSetCacheSize = (COIRESULT (*)(COIPROCESS, uint64_t, uint32_t, uint64_t, uint32_t, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIProcessSetCacheSize", COI_VERSION1); if (ProcessSetCacheSize == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessSetCacheSize"); #if 0 // for now disable as ProcessSetCacheSize is not available on < MPSS 3.4 fini(); return false; #endif } ProcessCreateFromFile = (COIRESULT (*)(COIENGINE, const char*, int, const char**, uint8_t, const char**, uint8_t, const char*, uint64_t, const char*, COIPROCESS*)) DL_sym(lib_handle, "COIProcessCreateFromFile", COI_VERSION1); if (ProcessCreateFromFile == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessCreateFromFile"); fini(); return false; } ProcessDestroy = (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*)) DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1); if (ProcessDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessDestroy"); fini(); return false; } ProcessGetFunctionHandles = (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*)) DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1); if (ProcessGetFunctionHandles == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessGetFunctionHandles"); fini(); return false; } ProcessLoadLibraryFromMemory = (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*, const char*, const char*, uint64_t, uint32_t, COILIBRARY*)) DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2); if (ProcessLoadLibraryFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessLoadLibraryFromMemory"); fini(); return false; } ProcessUnloadLibrary = (COIRESULT (*)(COIPROCESS, COILIBRARY)) DL_sym(lib_handle, "COIProcessUnloadLibrary", COI_VERSION1); if (ProcessUnloadLibrary == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessUnloadLibrary"); fini(); return false; } ProcessRegisterLibraries = (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**, const uint64_t*)) DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1); if (ProcessRegisterLibraries == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIProcessRegisterLibraries"); fini(); return false; } PipelineCreate = (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*)) DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1); if (PipelineCreate == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineCreate"); fini(); return false; } PipelineDestroy = (COIRESULT (*)(COIPIPELINE)) DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1); if (PipelineDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineDestroy"); fini(); return false; } PipelineRunFunction = (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*, const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*, const void*, uint16_t, void*, uint16_t, COIEVENT*)) DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1); if (PipelineRunFunction == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineRunFunction"); fini(); return false; } BufferCreate = (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*, uint32_t, const COIPROCESS*, COIBUFFER*)) DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1); if (BufferCreate == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCreate"); fini(); return false; } BufferCreateFromMemory = (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*, uint32_t, const COIPROCESS*, COIBUFFER*)) DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1); if (BufferCreateFromMemory == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCreateFromMemory"); fini(); return false; } BufferDestroy = (COIRESULT (*)(COIBUFFER)) DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1); if (BufferDestroy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferDestroy"); fini(); return false; } BufferMap = (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t, const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**)) DL_sym(lib_handle, "COIBufferMap", COI_VERSION1); if (BufferMap == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferMap"); fini(); return false; } BufferUnmap = (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1); if (BufferUnmap == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferUnmap"); fini(); return false; } BufferWrite = (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1); if (BufferWrite == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferWrite"); fini(); return false; } BufferRead = (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferRead", COI_VERSION1); if (BufferRead == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferRead"); fini(); return false; } BufferReadMultiD = (COIRESULT (*)(COIBUFFER, uint64_t, void *, void *, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferReadMultiD", COI_VERSION1); // We accept that coi library has no COIBufferReadMultiD routine. // So there is no check for zero value BufferWriteMultiD = (COIRESULT (*)(COIBUFFER, const COIPROCESS, uint64_t, void *, void *, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferWriteMultiD", COI_VERSION1); // We accept that coi library has no COIBufferWriteMultiD routine. // So there is no check for zero value BufferCopy = (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t, COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1); if (BufferCopy == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferCopy"); fini(); return false; } BufferGetSinkAddress = (COIRESULT (*)(COIBUFFER, uint64_t*)) DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1); if (BufferGetSinkAddress == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferGetSinkAddress"); fini(); return false; } BufferSetState = (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE, COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*, COIEVENT*)) DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1); if (BufferSetState == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIBufferSetState"); fini(); return false; } EventWait = (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*, uint32_t*)) DL_sym(lib_handle, "COIEventWait", COI_VERSION1); if (EventWait == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEventWait"); fini(); return false; } PerfGetCycleFrequency = (uint64_t (*)(void)) DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1); if (PerfGetCycleFrequency == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPerfGetCycleFrequency"); fini(); return false; } PipelineClearCPUMask = (COIRESULT (*)(COI_CPU_MASK)) DL_sym(lib_handle, "COIPipelineClearCPUMask", COI_VERSION1); if (PipelineClearCPUMask == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineClearCPUMask"); fini(); return false; } PipelineSetCPUMask = (COIRESULT (*)(COIPROCESS, uint32_t,uint8_t, COI_CPU_MASK)) DL_sym(lib_handle, "COIPipelineSetCPUMask", COI_VERSION1); if (PipelineSetCPUMask == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIPipelineSetCPUMask"); fini(); return false; } EngineGetInfo = (COIRESULT (*)(COIENGINE, uint32_t, COI_ENGINE_INFO*)) DL_sym(lib_handle, "COIEngineGetInfo", COI_VERSION1); if (EngineGetInfo == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n", "COIEngineGetInfo"); fini(); return false; } EventRegisterCallback = (COIRESULT (*)(COIEVENT, void (*)(COIEVENT, const COIRESULT, const void*), const void*, const uint64_t)) DL_sym(lib_handle, "COIEventRegisterCallback", COI_VERSION1); ProcessConfigureDMA = (COIRESULT (*)(const uint64_t, const int)) DL_sym(lib_handle, "COIProcessConfigureDMA", COI_VERSION1); is_available = true; return true; }
extern "C" void __offload_myoAcquire(void) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); CheckResult("myoAcquire", myoAcquire()); }
extern "C" void __offload_myoLibFini() { OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__); myoiLibFini(); }
// temporary workaround for blocking behavior of myoiLibInit/Fini calls extern "C" void __offload_myoLibInit() { OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__); CheckResult("myoiLibInit", myoiLibInit(0, 0)); }
extern "C" void _Offload_shared_aligned_free(void *ptr) { OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); myoSharedAlignedFree(ptr); }
extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align) { OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align); return myoSharedAlignedMalloc(size, align); }
DLL_LOCAL bool MyoWrapper::LoadLibrary(void) { #ifndef TARGET_WINNT const char *lib_name = "libmyo-client.so"; #else // TARGET_WINNT const char *lib_name = "myo-client.dll"; #endif // TARGET_WINNT OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name); m_lib_handle = DL_open(lib_name); if (m_lib_handle == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n", errno); return false; } m_lib_init = (MyoError (*)(void*, void*)) DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1); if (m_lib_init == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiLibInit"); UnloadLibrary(); return false; } m_lib_fini = (void (*)(void)) DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1); if (m_lib_fini == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiLibFini"); UnloadLibrary(); return false; } m_shared_malloc = (void* (*)(size_t)) DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1); if (m_shared_malloc == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoSharedMalloc"); UnloadLibrary(); return false; } m_shared_free = (void (*)(void*)) DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1); if (m_shared_free == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoSharedFree"); UnloadLibrary(); return false; } m_shared_aligned_malloc = (void* (*)(size_t, size_t)) DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1); if (m_shared_aligned_malloc == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoSharedAlignedMalloc"); UnloadLibrary(); return false; } m_shared_aligned_free = (void (*)(void*)) DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1); if (m_shared_aligned_free == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoSharedAlignedFree"); UnloadLibrary(); return false; } m_acquire = (MyoError (*)(void)) DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1); if (m_acquire == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoAcquire"); UnloadLibrary(); return false; } m_release = (MyoError (*)(void)) DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1); if (m_release == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoRelease"); UnloadLibrary(); return false; } m_host_var_table_propagate = (MyoError (*)(void*, int)) DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1); if (m_host_var_table_propagate == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiHostVarTablePropagate"); UnloadLibrary(); return false; } m_host_fptr_table_register = (MyoError (*)(void*, int, int)) DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1); if (m_host_fptr_table_register == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiHostFptrTableRegister"); UnloadLibrary(); return false; } m_remote_thunk_call = (MyoError (*)(void*, void*, int)) DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1); if (m_remote_thunk_call == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiRemoteThunkCall"); UnloadLibrary(); return false; } m_remote_call = (MyoiRFuncCallHandle (*)(const char*, void*, int)) DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1); if (m_remote_call == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiRemoteCall"); UnloadLibrary(); return false; } m_get_result = (MyoError (*)(MyoiRFuncCallHandle)) DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1); if (m_get_result == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiGetResult"); UnloadLibrary(); return false; } m_arena_create = (MyoError (*)(MyoOwnershipType, int, MyoArena*)) DL_sym(m_lib_handle, "myoArenaCreate", MYO_VERSION1); if (m_arena_create == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoArenaCreate"); UnloadLibrary(); return false; } m_arena_aligned_malloc = (void* (*)(MyoArena, size_t, size_t)) DL_sym(m_lib_handle, "myoArenaAlignedMalloc", MYO_VERSION1); if (m_arena_aligned_malloc == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoArenaAlignedMalloc"); UnloadLibrary(); return false; } m_arena_aligned_free = (void* (*)(MyoArena, void*)) DL_sym(m_lib_handle, "myoArenaAlignedFree", MYO_VERSION1); if (m_arena_aligned_free == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoArenaAlignedFree"); UnloadLibrary(); return false; } m_arena_acquire = (MyoError (*)(MyoArena)) DL_sym(m_lib_handle, "myoArenaAcquire", MYO_VERSION1); if (m_acquire == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoArenaAcquire"); UnloadLibrary(); return false; } m_arena_release = (MyoError (*)(MyoArena)) DL_sym(m_lib_handle, "myoArenaRelease", MYO_VERSION1); if (m_release == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoArenaRelease"); UnloadLibrary(); return false; } // Check for "feature-available" API added in MPSS 3.3. // Not finding it is not an error. m_feature_available = (MyoError (*)(int)) DL_sym(m_lib_handle, "myoiSupportsFeature", MYO_VERSION1); if (m_feature_available == 0) { OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", "myoiSupportsFeature"); } OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n"); // Create arena if supported CreateVtableArena(); OFFLOAD_DEBUG_TRACE(3, "Vtable arena created\n"); m_is_available = true; return true; }
void OffloadDescriptor::gather_copyout_data() { OFFLOAD_TIMER_STOP(c_offload_target_compute); OFFLOAD_TIMER_START(c_offload_target_gather_outputs); for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); switch (m_vars[i].type.src) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.out && !m_vars[i].flags.is_static) { m_out.send_data( static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, m_vars[i].size); } break; case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].free_if && src_is_for_mic && !m_vars[i].flags.is_static) { void *buf = *static_cast<char**>(m_vars[i].ptr) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; case c_func_ptr: if (m_vars[i].direction.out) { m_out.send_func_ptr(*((void**) m_vars[i].ptr)); } break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (src_is_for_mic && m_vars[i].free_if && !m_vars[i].flags.is_static) { ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || m_vars[i].type.src == c_dv_data_slice) ? static_cast<ArrDesc*>(m_vars[i].ptr) : *static_cast<ArrDesc**>(m_vars[i].ptr); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } if (m_vars[i].into) { switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].direction.in && m_vars[i].free_if && !m_vars[i].flags.is_static_dstn) { void *buf = *static_cast<char**>(m_vars[i].into) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].free_if && m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { ArrDesc *dvp = (m_vars[i].type.dst == c_dv_data_slice || m_vars[i].type.dst == c_dv_data) ? static_cast<ArrDesc*>(m_vars[i].into) : *static_cast<ArrDesc**>(m_vars[i].into); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } } OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, "Total copyout data sent to host: [%lld] bytes\n", m_out.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); }
static void __offload_myoInit_once(void) { if (!__offload_myoLoadLibrary()) { return; } // initialize all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init(); } // load and initialize MYO library OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n"); COIEVENT events[MIC_ENGINES_MAX]; // One entry per device + // A pair of entries for the Host postInit func + // A pair of entries for the MIC postInit func + // end marker MyoiUserParams params[MIC_ENGINES_MAX+5]; // Load target library to all devices and // create libinit parameters for all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init_myo(&events[i]); params[i].type = MYOI_USERPARAMS_DEVID; params[i].nodeid = mic_engines[i].get_physical_index() + 1; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", i, params[i].type, params[i].nodeid); } // Check if V2 myoLibInit is available if (myo_wrapper.PostInitFuncSupported()) { // Set the host post libInit function indicator params[mic_engines_total].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC; params[mic_engines_total].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total, params[mic_engines_total].type, params[mic_engines_total].nodeid); // Set the host post libInit host function address ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitHostFuncAddress = (void (*)())&__offload_propagate_shared_vars; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n", mic_engines_total+1, ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitHostFuncAddress); // Set the target post libInit function indicator params[mic_engines_total+2].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC; params[mic_engines_total+2].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES; // Set the target post libInit target function name ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+3]))-> postLibInitRemoveFuncName = "--vtable_initializer--"; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n", mic_engines_total+3, ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitRemoveFuncName); params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG; params[mic_engines_total+4].nodeid = 0; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total+4, params[mic_engines_total+4].type, params[mic_engines_total+4].nodeid); } else { params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG; params[mic_engines_total].nodeid = 0; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total, params[mic_engines_total].type, params[mic_engines_total].nodeid); } // initialize myo runtime on host myo_wrapper.LibInit(params, 0); // wait for the target init calls to finish COIRESULT res; res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); if (res != COI_SUCCESS) { LIBOFFLOAD_ERROR(c_event_wait, res); exit(1); } myo_is_available = true; OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available); OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n"); }
COIPIPELINE Engine::get_pipeline(_Offload_stream handle) { Stream * stream = Stream::find_stream(handle, false); if (!stream) { LIBOFFLOAD_ERROR(c_offload_no_stream, m_index); LIBOFFLOAD_ABORT; } COIPIPELINE pipeline = stream->get_pipeline(); if (pipeline == 0) { COIRESULT res; int proc_num; COI_CPU_MASK in_Mask ; #ifndef TARGET_WINNT proc_num = __sync_fetch_and_add(&m_proc_number, 1); #else // TARGET_WINNT proc_num = _InterlockedIncrement(&m_proc_number); #endif // TARGET_WINNT if (proc_num > COI_PIPELINE_MAX_PIPELINES) { LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); LIBOFFLOAD_ABORT; } m_stream_lock.lock(); // start process if not done yet if (m_process == 0) { init_process(); } // create CPUmask res = COI::PipelineClearCPUMask(in_Mask); check_result(res, c_clear_cpu_mask, m_index, res); int stream_cpu_num = stream->get_cpu_number(); stream->m_stream_cpus.reset(); int threads_per_core = m_num_threads / m_num_cores; // The "stream_cpu_num" available threads is set in mask. // Available threads are defined by examining of m_cpus bitset. // We skip thread 0 . for (int i = 1; i < m_num_threads; i++) { // for available thread i m_cpus[i] is equal to 1 if (m_cpus[i]) { res = COI::PipelineSetCPUMask(m_process, i / threads_per_core, i % threads_per_core, in_Mask); check_result(res, c_set_cpu_mask, res); // mark thread i as nonavailable m_cpus.set(i,0); // Mark thread i as given for the stream. // In case of stream destroying by call to // _Offload_stream_destroy we can mark the thread i as // available. stream->m_stream_cpus.set(i); if (--stream_cpu_num <= 0) { break; } } } // if stream_cpu_num is greater than 0 there are not enough // available threads if (stream_cpu_num > 0) { LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads); LIBOFFLOAD_ABORT; } // create pipeline for this thread OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n" "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n" "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n", in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3], in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7], in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11], in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]); res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline); check_result(res, c_pipeline_create, m_index, res); // Set stream's affinities { struct affinity_spec affinity_spec; char* affinity_type; int i; // "compact" by default affinity_spec.affinity_type = affinity_compact; // Check if user has specified type of affinity if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) != NULL) { char affinity_str[16]; int affinity_str_len; OFFLOAD_DEBUG_TRACE(2, "User has specified OFFLOAD_STREAM_AFFINITY=%s\n", affinity_type); // Set type of affinity requested affinity_str_len = strlen(affinity_type); for (i=0; i<affinity_str_len && i<15; i++) { affinity_str[i] = tolower(affinity_type[i]); } affinity_str[i] = '\0'; if (strcmp(affinity_str, "compact") == 0) { affinity_spec.affinity_type = affinity_compact; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); } else if (strcmp(affinity_str, "scatter") == 0) { affinity_spec.affinity_type = affinity_scatter; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n"); } else { LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str); affinity_spec.affinity_type = affinity_compact; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); } } // Make flat copy of sink mask because COI's mask is opaque for (i=0; i<16; i++) { affinity_spec.sink_mask[i] = in_Mask[i]; } // Set number of cores and threads affinity_spec.num_cores = m_num_cores; affinity_spec.num_threads = m_num_threads; COIEVENT event; res = COI::PipelineRunFunction(pipeline, m_funcs[c_func_set_stream_affinity], 0, 0, 0, 0, 0, &affinity_spec, sizeof(affinity_spec), 0, 0, &event); check_result(res, c_pipeline_run_func, m_index, res); res = COI::EventWait(1, &event, -1, 1, 0, 0); check_result(res, c_event_wait, res); } m_stream_lock.unlock(); stream->set_pipeline(pipeline); } return pipeline; }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars[i].type.src : m_vars[i].type.dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); m_vars[i].offset = -offset; } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects if (type = c_data_ptr && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars[i].type.dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }