void OffloadDescriptor::merge_var_descs( VarDesc *vars, VarDesc2 *vars2, int vars_total ) { // number of variable descriptors received from host and generated // locally should match if (m_vars_total < vars_total) { LIBOFFLOAD_ERROR(c_merge_var_descs1); exit(1); } for (int i = 0; i < m_vars_total; i++) { if (i < vars_total) { // variable type must match if (m_vars[i].type.bits != vars[i].type.bits) { LIBOFFLOAD_ERROR(c_merge_var_descs2); exit(1); } m_vars[i].ptr = vars[i].ptr; m_vars[i].into = vars[i].into; const char *var_sname = ""; if (vars2 != NULL) { if (vars2[i].sname != NULL) { var_sname = vars2[i].sname; } } OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, " VarDesc %d, var=%s, %s, %s\n", i, var_sname, vardesc_direction_as_string[m_vars[i].direction.bits], vardesc_type_as_string[m_vars[i].type.src]); if (vars2 != NULL && vars2[i].dname != NULL) { OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, vardesc_type_as_string[m_vars[i].type.dst]); } } OFFLOAD_TRACE(2, " type_src=%d, type_dstn=%d, direction=%d, " "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", m_vars[i].type.src, m_vars[i].type.dst, m_vars[i].direction.bits, m_vars[i].alloc_if, m_vars[i].free_if, m_vars[i].align, m_vars[i].mic_offset, m_vars[i].flags.bits, m_vars[i].offset, m_vars[i].size, m_vars[i].count, m_vars[i].ptr, m_vars[i].into); } }
char const * report_get_target_stage_str(int i) { switch (i) { case c_offload_target_total_time: return (offload_get_message_str(msg_c_report_target_total_time)); case c_offload_target_descriptor_setup: return ( offload_get_message_str(msg_c_report_target_descriptor_setup)); case c_offload_target_func_lookup: return (offload_get_message_str(msg_c_report_target_func_lookup)); case c_offload_target_func_time: return (offload_get_message_str(msg_c_report_target_func_time)); case c_offload_target_scatter_inputs: return ( offload_get_message_str(msg_c_report_target_scatter_inputs)); case c_offload_target_add_buffer_refs: return ( offload_get_message_str(msg_c_report_target_add_buffer_refs)); case c_offload_target_compute: return (offload_get_message_str(msg_c_report_target_compute)); case c_offload_target_gather_outputs: return (offload_get_message_str (msg_c_report_target_gather_outputs)); case c_offload_target_release_buffer_refs: return (offload_get_message_str( msg_c_report_target_release_buffer_refs)); default: LIBOFFLOAD_ERROR(c_report_unknown_timer_node); abort(); } }
char const * report_get_host_stage_str(int i) { switch (i) { case c_offload_host_total_offload: return ( offload_get_message_str(msg_c_report_host_total_offload_time)); case c_offload_host_initialize: return (offload_get_message_str(msg_c_report_host_initialize)); case c_offload_host_target_acquire: return ( offload_get_message_str(msg_c_report_host_target_acquire)); case c_offload_host_wait_deps: return (offload_get_message_str(msg_c_report_host_wait_deps)); case c_offload_host_setup_buffers: return (offload_get_message_str(msg_c_report_host_setup_buffers)); case c_offload_host_alloc_buffers: return (offload_get_message_str(msg_c_report_host_alloc_buffers)); case c_offload_host_setup_misc_data: return ( offload_get_message_str(msg_c_report_host_setup_misc_data)); case c_offload_host_alloc_data_buffer: return ( offload_get_message_str(msg_c_report_host_alloc_data_buffer)); case c_offload_host_send_pointers: return (offload_get_message_str(msg_c_report_host_send_pointers)); case c_offload_host_gather_inputs: return (offload_get_message_str(msg_c_report_host_gather_inputs)); case c_offload_host_map_in_data_buffer: return ( offload_get_message_str(msg_c_report_host_map_in_data_buffer)); case c_offload_host_unmap_in_data_buffer: return (offload_get_message_str( msg_c_report_host_unmap_in_data_buffer)); case c_offload_host_start_compute: return (offload_get_message_str(msg_c_report_host_start_compute)); case c_offload_host_wait_compute: return (offload_get_message_str(msg_c_report_host_wait_compute)); case c_offload_host_start_buffers_reads: return (offload_get_message_str( msg_c_report_host_start_buffers_reads)); case c_offload_host_scatter_outputs: return ( offload_get_message_str(msg_c_report_host_scatter_outputs)); case c_offload_host_map_out_data_buffer: return (offload_get_message_str( msg_c_report_host_map_out_data_buffer)); case c_offload_host_unmap_out_data_buffer: return (offload_get_message_str( msg_c_report_host_unmap_out_data_buffer)); case c_offload_host_wait_buffers_reads: return ( offload_get_message_str(msg_c_report_host_wait_buffers_reads)); case c_offload_host_destroy_buffers: return ( offload_get_message_str(msg_c_report_host_destroy_buffers)); default: LIBOFFLOAD_ERROR(c_report_unknown_timer_node); abort(); } }
COIPIPELINE Engine::get_pipeline(void) { Thread* thread = (Thread*) thread_getspecific(mic_thread_key); if (thread == 0) { thread = new Thread(&m_proc_number); thread_setspecific(mic_thread_key, thread); } COIPIPELINE pipeline = thread->get_pipeline(m_index); if (pipeline == 0) { COIRESULT res; int proc_num; #ifndef TARGET_WINNT proc_num = __sync_fetch_and_add(&m_proc_number, 1); #else // TARGET_WINNT proc_num = _InterlockedIncrement(&m_proc_number); #endif // TARGET_WINNT if (proc_num > COI_PIPELINE_MAX_PIPELINES) { LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); LIBOFFLOAD_ABORT; } // create pipeline for this thread res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline); check_result(res, c_pipeline_create, m_index, res); thread->set_pipeline(m_index, pipeline); } return pipeline; }
// Send function pointer void Marshaller::send_func_ptr( const void* data ) { const char* name; size_t length; if (data != 0) { name = __offload_funcs.find_name(data); if (name == 0) { #if OFFLOAD_DEBUG > 0 if (console_enabled > 2) { __offload_funcs.dump(); } #endif // OFFLOAD_DEBUG > 0 LIBOFFLOAD_ERROR(c_send_func_ptr, data); exit(1); } length = strlen(name) + 1; } else { name = ""; length = 1; } memcpy(buffer_ptr, name, length); buffer_ptr += length; tfr_size += length; }
// Receive function pointer void Marshaller::receive_func_ptr( const void** data ) { const char* name; size_t length; name = (const char*) buffer_ptr; if (name[0] != '\0') { *data = __offload_funcs.find_addr(name); if (*data == 0) { #if OFFLOAD_DEBUG > 0 if (console_enabled > 2) { __offload_funcs.dump(); } #endif // OFFLOAD_DEBUG > 0 LIBOFFLOAD_ERROR(c_receive_func_ptr, name); exit(1); } length = strlen(name) + 1; } else { *data = 0; length = 1; } buffer_ptr += length; tfr_size += length; }
extern "C" int __offload_myoIsAvailable(int target_number) { OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number); if (target_number >= -2) { bool is_default_number = (target_number == -2); if (__offload_myoInit()) { if (target_number >= 0) { // User provided the device number int num = target_number % mic_engines_total; // reserve device in ORSL target_number = ORSL::reserve(num) ? num : -1; } else { // try to use device 0 target_number = ORSL::reserve(0) ? 0 : -1; } // make sure device is initialized if (target_number >= 0) { mic_engines[target_number].init(); } } else { // fallback to CPU target_number = -1; } if (target_number < 0 && !is_default_number) { LIBOFFLOAD_ERROR(c_device_is_not_available); exit(1); } } else { LIBOFFLOAD_ERROR(c_invalid_device_number); exit(1); } return target_number; }
extern "C" void __intel_cilk_for_64_offload( int size, void (*copy_constructor)(void*, void*), int target_number, void *raddr, void *closure_object, uint64_t iters, uint64_t grain_size) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); target_number = __offload_myoIsAvailable(target_number); if (target_number >= 0) { struct S { void *M1; uint64_t M2; uint64_t M3; char closure[]; } *args; args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size); if (args == NULL) LIBOFFLOAD_ERROR(c_malloc); args->M1 = raddr; args->M2 = iters; args->M3 = grain_size; if (copy_constructor == 0) { memcpy(args->closure, closure_object, size); } else { copy_constructor(args->closure, closure_object); } myo_wrapper.Release(); myo_wrapper.GetResult( myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args, target_number) ); myo_wrapper.Acquire(); _Offload_shared_free(args); ORSL::release(target_number); } else { __cilkrts_cilk_for_64(raddr, closure_object, iters, grain_size); } }
void Engine::fini_process(bool verbose) { if (m_process != 0) { uint32_t sig; int8_t ret; // destroy target process OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n", m_index); COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig); m_process = 0; if (res == COI_SUCCESS) { OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n", sig, ret); if (verbose) { if (sig != 0) { LIBOFFLOAD_ERROR( c_mic_process_exit_sig, m_index, sig, c_signal_names[sig >= c_signal_max ? 0 : sig]); } else { LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret); } } // for idb if (__dbg_is_attached) { __dbg_target_so_unloaded(); } } else { if (verbose) { LIBOFFLOAD_ERROR(c_mic_process_exit, m_index); } } } }
void Engine::stream_destroy(_Offload_stream handle) { // get stream Stream * stream = Stream::find_stream(handle, true); if (stream) { // return cpus for future use for (int i = 0; i < m_num_threads; i++) { if (stream->m_stream_cpus.test(i)) { m_cpus.set(i); } } delete stream; } else { LIBOFFLOAD_ERROR(c_offload_no_stream, m_index); LIBOFFLOAD_ABORT; } }
// Create CeanReadRanges data for reading contiguous ranges of // noncontiguous array defined by the argument CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp) { int64_t len; int count; int rank = dvp->Rank; CeanReadRanges *res = NULL; if (rank != 0) { int i = 0; len = dvp->Len; if (dvp->Dim[0].Mult == len) { for (i = 1; i < rank; i++) { len *= dvp->Dim[i-1].Extent; if (dvp->Dim[i].Mult != len) { break; } } } res = (CeanReadRanges *)malloc( sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim)); if (res == NULL) LIBOFFLOAD_ERROR(c_malloc); res -> last_noncont_ind = rank - i - 1; count = 1; for (; i < rank; i++) { res->Dim[rank - i - 1].count = count; res->Dim[rank - i - 1].size = dvp->Dim[i].Mult; count *= dvp->Dim[i].Extent; } res -> range_max_number = count; res -> range_size = len; res -> ptr = (void*)dvp->Base; res -> current_number = 0; res -> init_offset = 0; } return res; }
DLL_LOCAL void __offload_myoFini(void) { if (myo_is_available) { OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); COIEVENT events[MIC_ENGINES_MAX]; // kick off myoiLibFini calls on all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].fini_myo(&events[i]); } // cleanup myo runtime on host myo_wrapper.LibFini(); // wait for the target fini calls to finish COIRESULT res; res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); if (res != COI_SUCCESS) { LIBOFFLOAD_ERROR(c_event_wait, res); exit(1); } } }
extern void *OFFLOAD_MALLOC( size_t size, size_t align ) { void *ptr; int err; OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align); if (align < sizeof(void*)) { align = sizeof(void*); } ptr = _mm_malloc(size, align); if (ptr == NULL) { LIBOFFLOAD_ERROR(c_offload_malloc, size, align); exit(1); } OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr); return ptr; }
COIPIPELINE Engine::get_pipeline(_Offload_stream handle) { Stream * stream = Stream::find_stream(handle, false); if (!stream) { LIBOFFLOAD_ERROR(c_offload_no_stream, m_index); LIBOFFLOAD_ABORT; } COIPIPELINE pipeline = stream->get_pipeline(); if (pipeline == 0) { COIRESULT res; int proc_num; COI_CPU_MASK in_Mask ; #ifndef TARGET_WINNT proc_num = __sync_fetch_and_add(&m_proc_number, 1); #else // TARGET_WINNT proc_num = _InterlockedIncrement(&m_proc_number); #endif // TARGET_WINNT if (proc_num > COI_PIPELINE_MAX_PIPELINES) { LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); LIBOFFLOAD_ABORT; } m_stream_lock.lock(); // start process if not done yet if (m_process == 0) { init_process(); } // create CPUmask res = COI::PipelineClearCPUMask(in_Mask); check_result(res, c_clear_cpu_mask, m_index, res); int stream_cpu_num = stream->get_cpu_number(); stream->m_stream_cpus.reset(); int threads_per_core = m_num_threads / m_num_cores; // The "stream_cpu_num" available threads is set in mask. // Available threads are defined by examining of m_cpus bitset. // We skip thread 0 . for (int i = 1; i < m_num_threads; i++) { // for available thread i m_cpus[i] is equal to 1 if (m_cpus[i]) { res = COI::PipelineSetCPUMask(m_process, i / threads_per_core, i % threads_per_core, in_Mask); check_result(res, c_set_cpu_mask, res); // mark thread i as nonavailable m_cpus.set(i,0); // Mark thread i as given for the stream. // In case of stream destroying by call to // _Offload_stream_destroy we can mark the thread i as // available. stream->m_stream_cpus.set(i); if (--stream_cpu_num <= 0) { break; } } } // if stream_cpu_num is greater than 0 there are not enough // available threads if (stream_cpu_num > 0) { LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads); LIBOFFLOAD_ABORT; } // create pipeline for this thread OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n" "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n" "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n", in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3], in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7], in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11], in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]); res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline); check_result(res, c_pipeline_create, m_index, res); // Set stream's affinities { struct affinity_spec affinity_spec; char* affinity_type; int i; // "compact" by default affinity_spec.affinity_type = affinity_compact; // Check if user has specified type of affinity if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) != NULL) { char affinity_str[16]; int affinity_str_len; OFFLOAD_DEBUG_TRACE(2, "User has specified OFFLOAD_STREAM_AFFINITY=%s\n", affinity_type); // Set type of affinity requested affinity_str_len = strlen(affinity_type); for (i=0; i<affinity_str_len && i<15; i++) { affinity_str[i] = tolower(affinity_type[i]); } affinity_str[i] = '\0'; if (strcmp(affinity_str, "compact") == 0) { affinity_spec.affinity_type = affinity_compact; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); } else if (strcmp(affinity_str, "scatter") == 0) { affinity_spec.affinity_type = affinity_scatter; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n"); } else { LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str); affinity_spec.affinity_type = affinity_compact; OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); } } // Make flat copy of sink mask because COI's mask is opaque for (i=0; i<16; i++) { affinity_spec.sink_mask[i] = in_Mask[i]; } // Set number of cores and threads affinity_spec.num_cores = m_num_cores; affinity_spec.num_threads = m_num_threads; COIEVENT event; res = COI::PipelineRunFunction(pipeline, m_funcs[c_func_set_stream_affinity], 0, 0, 0, 0, 0, &affinity_spec, sizeof(affinity_spec), 0, 0, &event); check_result(res, c_pipeline_run_func, m_index, res); res = COI::EventWait(1, &event, -1, 1, 0, 0); check_result(res, c_event_wait, res); } m_stream_lock.unlock(); stream->set_pipeline(pipeline); } return pipeline; }
static void __offload_myoInit_once(void) { if (!__offload_myoLoadLibrary()) { return; } // initialize all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init(); } // load and initialize MYO library OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n"); COIEVENT events[MIC_ENGINES_MAX]; // One entry per device + // A pair of entries for the Host postInit func + // A pair of entries for the MIC postInit func + // end marker MyoiUserParams params[MIC_ENGINES_MAX+5]; // Load target library to all devices and // create libinit parameters for all devices for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init_myo(&events[i]); params[i].type = MYOI_USERPARAMS_DEVID; params[i].nodeid = mic_engines[i].get_physical_index() + 1; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", i, params[i].type, params[i].nodeid); } // Check if V2 myoLibInit is available if (myo_wrapper.PostInitFuncSupported()) { // Set the host post libInit function indicator params[mic_engines_total].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC; params[mic_engines_total].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total, params[mic_engines_total].type, params[mic_engines_total].nodeid); // Set the host post libInit host function address ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitHostFuncAddress = (void (*)())&__offload_propagate_shared_vars; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n", mic_engines_total+1, ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitHostFuncAddress); // Set the target post libInit function indicator params[mic_engines_total+2].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC; params[mic_engines_total+2].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES; // Set the target post libInit target function name ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+3]))-> postLibInitRemoveFuncName = "--vtable_initializer--"; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n", mic_engines_total+3, ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))-> postLibInitRemoveFuncName); params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG; params[mic_engines_total+4].nodeid = 0; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total+4, params[mic_engines_total+4].type, params[mic_engines_total+4].nodeid); } else { params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG; params[mic_engines_total].nodeid = 0; OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n", mic_engines_total, params[mic_engines_total].type, params[mic_engines_total].nodeid); } // initialize myo runtime on host myo_wrapper.LibInit(params, 0); // wait for the target init calls to finish COIRESULT res; res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); if (res != COI_SUCCESS) { LIBOFFLOAD_ERROR(c_event_wait, res); exit(1); } myo_is_available = true; OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available); OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n"); }
void Engine::init_process(void) { COIENGINE engine; COIRESULT res; const char **environ; char buf[4096]; // For exe path name // create environment for the target process environ = (const char**) mic_env_vars.create_environ_for_card(m_index); if (environ != 0) { for (const char **p = environ; *p != 0; p++) { OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); } } // Create execution context in the specified device OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, m_physical_index); res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine); check_result(res, c_get_engine_handle, m_index, res); // Get engine info on threads and cores. // The values of core number and thread number will be used later at stream // creation by call to _Offload_stream_create(device,number_of_cpus). COI_ENGINE_INFO engine_info; res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info); check_result(res, c_get_engine_info, m_index, res); // m_cpus bitset has 1 for available thread. At the begining all threads // are available and m_cpus(i) is set to // 1 for i = [0...engine_info.NumThreads]. m_cpus.reset(); for (int i = 0; i < engine_info.NumThreads; i++) { m_cpus.set(i); } // The following values will be used at pipeline creation for streams m_num_cores = engine_info.NumCores; m_num_threads = engine_info.NumThreads; // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2 // Only the value 2 is supported in 16.0 if (mic_dma_channel_count == 2) { if (COI::ProcessConfigureDMA) { // Set DMA channels using COI API COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE); } else { // Set environment variable COI_DMA_CHANNEL_COUNT // use putenv instead of setenv as Windows has no setenv. // Note: putenv requires its argument can't be freed or modified. // So no free after call to putenv or elsewhere. char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2"); if (env_var == NULL) LIBOFFLOAD_ERROR(c_malloc); putenv(env_var); } } // Target executable is not available then use compiler provided offload_main if (__target_exe == 0) { if (mic_device_main == 0) LIBOFFLOAD_ERROR(c_report_no_host_exe); OFFLOAD_DEBUG_TRACE(2, "Loading target executable %s\n",mic_device_main); res = COI::ProcessCreateFromFile( engine, // in_Engine mic_device_main, // in_pBinaryName 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath &m_process // out_pProcess ); } else { // Target executable should be available by the time when we // attempt to initialize the device // Need the full path of the FAT exe for VTUNE { #ifndef TARGET_WINNT ssize_t len = readlink("/proc/self/exe", buf,1000); #else int len = GetModuleFileName(NULL, buf,1000); #endif // TARGET_WINNT if (len == -1) { LIBOFFLOAD_ERROR(c_report_no_host_exe); exit(1); } else if (len > 999) { LIBOFFLOAD_ERROR(c_report_path_buff_overflow); exit(1); } buf[len] = '\0'; } OFFLOAD_DEBUG_TRACE(2, "Loading target executable \"%s\" from %p, size %lld, host file %s\n", __target_exe->name, __target_exe->data, __target_exe->size, buf); res = COI::ProcessCreateFromMemory( engine, // in_Engine __target_exe->name, // in_pBinaryName __target_exe->data, // in_pBinaryBuffer __target_exe->size, // in_BinaryBufferLength, 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath buf, // in_FileOfOrigin -1, // in_FileOfOriginOffset use -1 to indicate to // COI that is is a FAT binary &m_process // out_pProcess ); } check_result(res, c_process_create, m_index, res); if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) { // available only in MPSS 4.2 and greater if (COI::ProcessSetCacheSize != 0 ) { int flags; // Need compiler to use MPSS 3.2 or greater to get these // definition so currently hardcoding it // COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC; flags = 0x00020002; res = COI::ProcessSetCacheSize( m_process, // in_Process mic_2m_buffer_size, // in_HugePagePoolSize flags, // inHugeFlags mic_4k_buffer_size, // in_SmallPagePoolSize flags, // inSmallFlags 0, // in_NumDependencies 0, // in_pDependencies 0 // out_PCompletion ); OFFLOAD_DEBUG_TRACE(2, "Reserve target buffers 4K pages = %d 2M pages = %d\n", mic_4k_buffer_size, mic_2m_buffer_size); check_result(res, c_process_set_cache_size, m_index, res); } else { OFFLOAD_DEBUG_TRACE(2, "Reserve target buffers not supported in current MPSS\n"); } } // get function handles res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, m_func_names, m_funcs); check_result(res, c_process_get_func_handles, m_index, res); // initialize device side pid_t pid = init_device(); // For IDB if (__dbg_is_attached) { // TODO: we have in-memory executable now. // Check with IDB team what should we provide them now? if (strlen(__target_exe->name) < MAX_TARGET_NAME) { strcpy(__dbg_target_exe_name, __target_exe->name); } __dbg_target_so_pid = pid; __dbg_target_id = m_physical_index; __dbg_target_so_loaded(); } }
void Engine::init_process(void) { COIENGINE engine; COIRESULT res; const char **environ; // create environment for the target process environ = (const char**) mic_env_vars.create_environ_for_card(m_index); if (environ != 0) { for (const char **p = environ; *p != 0; p++) { OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); } } // Create execution context in the specified device OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, m_physical_index); res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine); check_result(res, c_get_engine_handle, m_index, res); // Target executable should be available by the time when we // attempt to initialize the device if (__target_exe == 0) { LIBOFFLOAD_ERROR(c_no_target_exe); exit(1); } OFFLOAD_DEBUG_TRACE(2, "Loading target executable \"%s\" from %p, size %lld\n", __target_exe->name, __target_exe->data, __target_exe->size); res = COI::ProcessCreateFromMemory( engine, // in_Engine __target_exe->name, // in_pBinaryName __target_exe->data, // in_pBinaryBuffer __target_exe->size, // in_BinaryBufferLength, 0, // in_Argc 0, // in_ppArgv environ == 0, // in_DupEnv environ, // in_ppAdditionalEnv mic_proxy_io, // in_ProxyActive mic_proxy_fs_root, // in_ProxyfsRoot mic_buffer_size, // in_BufferSpace mic_library_path, // in_LibrarySearchPath __target_exe->origin, // in_FileOfOrigin __target_exe->offset, // in_FileOfOriginOffset &m_process // out_pProcess ); check_result(res, c_process_create, m_index, res); // get function handles res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, m_func_names, m_funcs); check_result(res, c_process_get_func_handles, m_index, res); // initialize device side pid_t pid = init_device(); // For IDB if (__dbg_is_attached) { // TODO: we have in-memory executable now. // Check with IDB team what should we provide them now? if (strlen(__target_exe->name) < MAX_TARGET_NAME) { strcpy(__dbg_target_exe_name, __target_exe->name); } __dbg_target_so_pid = pid; __dbg_target_id = m_physical_index; __dbg_target_so_loaded(); } }
static void CheckResult(const char *func, MyoError error) { if (error != MYO_SUCCESS) { LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error); exit(1); } }
MicEnvVarKind MicEnvVar::get_env_var_kind( char *env_var_string, int *card_number, char **env_var_name, int *env_var_name_length, char **env_var_def ) { int len = strlen(prefix); char *c = env_var_string; int num = 0; bool card_is_set = false; if (strncmp(c, prefix, len) != 0 || c[len] != '_') { return c_no_mic; } c += len + 1; *card_number = any_card; if (isdigit(*c)) { while (isdigit (*c)) { num = (*c++ - '0') + (num * 10); } if (*c != '_') { return c_no_mic; } c++; *card_number = num; card_is_set = true; } if (!isalpha(*c)) { return c_no_mic; } *env_var_name = *env_var_def = c; if (strncmp(c, "ENV=", 4) == 0) { if (!card_is_set) { *env_var_name_length = 3; *env_var_name = *env_var_def = c; *env_var_def = strdup(*env_var_def); if (*env_var_def == NULL) LIBOFFLOAD_ERROR(c_malloc); return c_mic_var; } *env_var_def = c + strlen("ENV="); *env_var_def = strdup(*env_var_def); if (*env_var_def == NULL) LIBOFFLOAD_ERROR(c_malloc); return c_mic_card_env; } if (isalpha(*c)) { *env_var_name_length = 0; while (isalnum(*c) || *c == '_') { c++; (*env_var_name_length)++; } } if (*c != '=') { return c_no_mic; } *env_var_def = strdup(*env_var_def); if (*env_var_def == NULL) LIBOFFLOAD_ERROR(c_malloc); return card_is_set? c_mic_card_var : c_mic_var; }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars[i].type.src : m_vars[i].type.dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); m_vars[i].offset = -offset; } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects if (type = c_data_ptr && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars[i].type.dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }
char const * report_get_message_str(error_types input_tag) { switch (input_tag) { case c_report_title: return (offload_get_message_str(msg_c_report_title)); case c_report_from_file: return (offload_get_message_str(msg_c_report_from_file)); case c_report_offload: return (offload_get_message_str(msg_c_report_offload)); case c_report_mic: return (offload_get_message_str(msg_c_report_mic)); case c_report_file: return (offload_get_message_str(msg_c_report_file)); case c_report_line: return (offload_get_message_str(msg_c_report_line)); case c_report_host: return (offload_get_message_str(msg_c_report_host)); case c_report_tag: return (offload_get_message_str(msg_c_report_tag)); case c_report_cpu_time: return (offload_get_message_str(msg_c_report_cpu_time)); case c_report_seconds: return (offload_get_message_str(msg_c_report_seconds)); case c_report_cpu_to_mic_data: return (offload_get_message_str(msg_c_report_cpu_to_mic_data)); case c_report_bytes: return (offload_get_message_str(msg_c_report_bytes)); case c_report_mic_time: return (offload_get_message_str(msg_c_report_mic_time)); case c_report_mic_to_cpu_data: return (offload_get_message_str(msg_c_report_mic_to_cpu_data)); case c_report_compute: return (offload_get_message_str(msg_c_report_compute)); case c_report_copyin_data: return (offload_get_message_str(msg_c_report_copyin_data)); case c_report_copyout_data: return (offload_get_message_str(msg_c_report_copyout_data)); case c_report_create_buf_host: return (offload_get_message_str(c_report_create_buf_host)); case c_report_create_buf_mic: return (offload_get_message_str(msg_c_report_create_buf_mic)); case c_report_destroy: return (offload_get_message_str(msg_c_report_destroy)); case c_report_gather_copyin_data: return (offload_get_message_str(msg_c_report_gather_copyin_data)); case c_report_gather_copyout_data: return (offload_get_message_str(msg_c_report_gather_copyout_data)); case c_report_state_signal: return (offload_get_message_str(msg_c_report_state_signal)); case c_report_signal: return (offload_get_message_str(msg_c_report_signal)); case c_report_wait: return (offload_get_message_str(msg_c_report_wait)); case c_report_init: return (offload_get_message_str(msg_c_report_init)); case c_report_init_func: return (offload_get_message_str(msg_c_report_init_func)); case c_report_logical_card: return (offload_get_message_str(msg_c_report_logical_card)); case c_report_mic_myo_fptr: return (offload_get_message_str(msg_c_report_mic_myo_fptr)); case c_report_mic_myo_shared: return (offload_get_message_str(msg_c_report_mic_myo_shared)); case c_report_myoacquire: return (offload_get_message_str(msg_c_report_myoacquire)); case c_report_myofini: return (offload_get_message_str(msg_c_report_myofini)); case c_report_myoinit: return (offload_get_message_str(msg_c_report_myoinit)); case c_report_myoregister: return (offload_get_message_str(msg_c_report_myoregister)); case c_report_myorelease: return (offload_get_message_str(msg_c_report_myorelease)); case c_report_myosharedalignedfree: return ( offload_get_message_str(msg_c_report_myosharedalignedfree)); case c_report_myosharedalignedmalloc: return ( offload_get_message_str(msg_c_report_myosharedalignedmalloc)); case c_report_myosharedfree: return (offload_get_message_str(msg_c_report_myosharedfree)); case c_report_myosharedmalloc: return (offload_get_message_str(msg_c_report_myosharedmalloc)); case c_report_physical_card: return (offload_get_message_str(msg_c_report_physical_card)); case c_report_receive_pointer_data: return ( offload_get_message_str(msg_c_report_receive_pointer_data)); case c_report_received_pointer_data: return ( offload_get_message_str(msg_c_report_received_pointer_data)); case c_report_register: return (offload_get_message_str(msg_c_report_register)); case c_report_scatter_copyin_data: return (offload_get_message_str(msg_c_report_scatter_copyin_data)); case c_report_scatter_copyout_data: return ( offload_get_message_str(msg_c_report_scatter_copyout_data)); case c_report_send_pointer_data: return (offload_get_message_str(msg_c_report_send_pointer_data)); case c_report_sent_pointer_data: return (offload_get_message_str(msg_c_report_sent_pointer_data)); case c_report_start: return (offload_get_message_str(msg_c_report_start)); case c_report_start_target_func: return (offload_get_message_str(msg_c_report_start_target_func)); case c_report_state: return (offload_get_message_str(msg_c_report_state)); case c_report_unregister: return (offload_get_message_str(msg_c_report_unregister)); case c_report_var: return (offload_get_message_str(msg_c_report_var)); default: LIBOFFLOAD_ERROR(c_report_unknown_trace_node); abort(); } }
void OffloadDescriptor::offload( uint32_t buffer_count, void** buffers, void* misc_data, uint16_t misc_data_len, void* return_data, uint16_t return_data_len ) { FunctionDescriptor *func = (FunctionDescriptor*) misc_data; const char *name = func->data; OffloadDescriptor ofld; char *in_data = 0; char *out_data = 0; char *timer_data = 0; console_enabled = func->console_enabled; timer_enabled = func->timer_enabled; offload_report_level = func->offload_report_level; offload_number = func->offload_number; ofld.set_offload_number(func->offload_number); #ifdef SEP_SUPPORT if (sep_monitor) { if (__sync_fetch_and_add(&sep_counter, 1) == 0) { OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n"); VTResumeSampling(); } } #endif // SEP_SUPPORT OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(), c_offload_start_target_func, "Offload \"%s\" started\n", name); // initialize timer data OFFLOAD_TIMER_INIT(); OFFLOAD_TIMER_START(c_offload_target_total_time); OFFLOAD_TIMER_START(c_offload_target_descriptor_setup); // get input/output buffer addresses if (func->in_datalen > 0 || func->out_datalen > 0) { if (func->data_offset != 0) { in_data = (char*) misc_data + func->data_offset; out_data = (char*) return_data; } else { char *inout_buf = (char*) buffers[--buffer_count]; in_data = inout_buf; out_data = inout_buf; } } // assign variable descriptors ofld.m_vars_total = func->vars_num; if (ofld.m_vars_total > 0) { uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc); ofld.m_vars = (VarDesc*) malloc(var_data_len); if (ofld.m_vars == NULL) LIBOFFLOAD_ERROR(c_malloc); memcpy(ofld.m_vars, in_data, var_data_len); in_data += var_data_len; func->in_datalen -= var_data_len; } // timer data if (func->timer_enabled) { uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN(); timer_data = out_data; out_data += timer_data_len; func->out_datalen -= timer_data_len; } // init Marshallers ofld.m_in.init_buffer(in_data, func->in_datalen); ofld.m_out.init_buffer(out_data, func->out_datalen); // copy buffers to offload descriptor std::copy(buffers, buffers + buffer_count, std::back_inserter(ofld.m_buffers)); OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup); // find offload entry address OFFLOAD_TIMER_START(c_offload_target_func_lookup); offload_func_with_parms entry = (offload_func_with_parms) __offload_entries.find_addr(name); if (entry == NULL) { #if OFFLOAD_DEBUG > 0 if (console_enabled > 2) { __offload_entries.dump(); } #endif LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name); exit(1); } OFFLOAD_TIMER_STOP(c_offload_target_func_lookup); OFFLOAD_TIMER_START(c_offload_target_func_time); // execute offload entry entry(&ofld); OFFLOAD_TIMER_STOP(c_offload_target_func_time); OFFLOAD_TIMER_STOP(c_offload_target_total_time); // copy timer data to the buffer OFFLOAD_TIMER_TARGET_DATA(timer_data); OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name); #ifdef SEP_SUPPORT if (sep_monitor) { if (__sync_sub_and_fetch(&sep_counter, 1) == 0) { OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n"); VTPauseSampling(); } } #endif // SEP_SUPPORT }
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1( const int* device_num, const char* file, uint64_t line ) { int target_number; // make sure libray is initialized and at least one device is available if (!__offload_init_library()) { LIBOFFLOAD_ERROR(c_device_is_not_available); exit(1); } // OFFLOAD_TIMER_INIT must follow call to __offload_init_library OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line); OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload); OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); if (__offload_init_type == c_init_on_offload_all) { for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init(); } } OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire); // use default device number if it is not provided if (device_num != 0) { target_number = *device_num; } else { target_number = __omp_device_num; } // device number should be a non-negative integer value if (target_number < 0) { LIBOFFLOAD_ERROR(c_omp_invalid_device_num); exit(1); } // should we do this for OpenMP? target_number %= mic_engines_total; // reserve device in ORSL if (!ORSL::reserve(target_number)) { LIBOFFLOAD_ERROR(c_device_is_not_available); exit(1); } // initialize device(s) OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); if (__offload_init_type == c_init_on_offload) { mic_engines[target_number].init(); } OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); OFFLOAD ofld = new OffloadDescriptor(target_number, 0, true, true, timer_data); OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number); Offload_Report_Prolog(timer_data); OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start, "Starting OpenMP offload, device = %d\n", target_number); OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); return ofld; }
void OffloadDescriptor::scatter_copyin_data() { OFFLOAD_TIMER_START(c_offload_target_scatter_inputs); OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n", m_in.get_buffer_start(), m_in.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(), m_in.get_buffer_size()); // receive data for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); void** ptr_addr = src_is_for_mic ? static_cast<void**>(m_vars[i].ptr) : static_cast<void**>(m_vars[i].into); int type = src_is_for_mic ? m_vars_extra[i].type_src : m_vars_extra[i].type_dst; bool is_static = src_is_for_mic ? m_vars[i].flags.is_static : m_vars[i].flags.is_static_dstn; void *ptr = NULL; if (m_vars[i].flags.alloc_disp) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); } if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)? reinterpret_cast<ArrDesc*>(ptr_addr) : *reinterpret_cast<ArrDesc**>(ptr_addr); ptr_addr = reinterpret_cast<void**>(&dvp->Base); } // Set pointer values switch (type) { case c_data_ptr_array: { int j = m_vars[i].ptr_arr_offset; int max_el = j + m_vars[i].count; char *dst_arr_ptr = (src_is_for_mic)? *(reinterpret_cast<char**>(m_vars[i].ptr)) : reinterpret_cast<char*>(m_vars[i].into); // if is_pointer is 1 it means that pointer array itself // is defined either via pointer or as class member. // i.e. arr_ptr[0:5] or this->ARR[0:5] if (m_vars[i].flags.is_pointer) { int64_t offset = 0; m_in.receive_data(&offset, sizeof(offset)); dst_arr_ptr = *((char**)dst_arr_ptr) + offset; } for (; j < max_el; j++) { if (src_is_for_mic) { m_vars[j].ptr = dst_arr_ptr + m_vars[j].ptr_arr_offset; } else { m_vars[j].into = dst_arr_ptr + m_vars[j].ptr_arr_offset; } } } break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: // Don't need ptr_addr value for variables from stack buffer. // Stack buffer address is set at var_desc with #0. if (i != 0 && m_vars[i].flags.is_stack_buf) { break; } if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) || TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) { int64_t offset; m_in.receive_data(&offset, sizeof(offset)); ptr_addr = reinterpret_cast<void**>( reinterpret_cast<char*>(*ptr_addr) + offset); } if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) { void *buf = NULL; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); OFFLOAD_TRACE(1, " AddRef count = %d\n", ((RefInfo *) ref_data[buf])->count); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); void *ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + (m_vars[i].flags.is_stack_buf ? 0 : m_vars[i].offset); *ptr_addr = ptr; } break; case c_func_ptr: case c_func_ptr_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].alloc_if) { void *buf; if (m_vars[i].flags.sink_addr) { m_in.receive_data(&buf, sizeof(buf)); } else { buf = m_buffers.front(); m_buffers.pop_front(); } if (buf) { if (!is_static) { if (!m_vars[i].flags.sink_addr) { // increment buffer reference OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs); BufferAddRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs); } add_ref_count(buf, 0 == m_vars[i].flags.sink_addr); } ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; } *ptr_addr = ptr; } else if (m_vars[i].flags.sink_addr) { void *buf; m_in.receive_data(&buf, sizeof(buf)); ptr = static_cast<char*>(buf) + m_vars[i].mic_offset + m_vars[i].offset; *ptr_addr = ptr; } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, type); abort(); } // Release obsolete buffers for stack of persistent objects. // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for // stack buffer pointer. if (i == 0 && m_vars[i].flags.is_stack_buf && !m_vars[i].direction.bits && m_vars[i].alloc_if && m_vars[i].size != 0) { for (int j=0; j < m_vars[i].size; j++) { void *buf; m_in.receive_data(&buf, sizeof(buf)); OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf); BufferReleaseRef(buf); ref_data.erase(buf); } } // Do copyin switch (m_vars_extra[i].type_dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_void_ptr_ptr: case c_cean_var: if (m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { int64_t size; int64_t disp; char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); if (m_vars_extra[i].type_dst == c_cean_var) { m_in.receive_data((&size), sizeof(int64_t)); m_in.receive_data((&disp), sizeof(int64_t)); } else { size = m_vars[i].size; disp = 0; } m_in.receive_data(ptr + disp, size); } break; case c_dv: if (m_vars[i].direction.bits || m_vars[i].alloc_if || m_vars[i].free_if) { char* ptr = m_vars[i].into ? static_cast<char*>(m_vars[i].into) : static_cast<char*>(m_vars[i].ptr); m_in.receive_data(ptr + sizeof(uint64_t), m_vars[i].size - sizeof(uint64_t)); } break; case c_string_ptr: case c_data_ptr: case c_string_ptr_ptr: case c_data_ptr_ptr: case c_cean_var_ptr: case c_cean_var_ptr_ptr: case c_dv_ptr: case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: break; case c_func_ptr: case c_func_ptr_ptr: if (m_vars[i].direction.in) { m_in.receive_func_ptr((const void**) m_vars[i].ptr); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst); abort(); } } OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n", m_in.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs); OFFLOAD_TIMER_START(c_offload_target_compute); }
void MicEnvVar::mic_parse_env_var_list( int card_number, char *env_vars_def_list) { char *c = env_vars_def_list; char *env_var_name; int env_var_name_length; char *env_var_def; bool var_is_quoted; if (*c == '"') { c++; } while (*c != 0) { var_is_quoted = false; env_var_name = c; env_var_name_length = 0; if (isalpha(*c)) { while (isalnum(*c) || *c == '_') { c++; env_var_name_length++; } } else { LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1); return; } if (*c != '=') { LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2); return; } c++; if (*c == '"') { var_is_quoted = true; c++; } // Environment variable values that contain | will need to be escaped. while (*c != 0 && *c != '|' && (!var_is_quoted || *c != '"')) { // skip escaped symbol if (*c == '\\') { c++; } c++; } if (var_is_quoted) { c++; // for " while (*c != 0 && *c != '|') { c++; } } int sz = c - env_var_name; env_var_def = (char*)malloc(sz); if (env_var_def == NULL) LIBOFFLOAD_ERROR(c_malloc); memcpy(env_var_def, env_var_name, sz); env_var_def[sz] = 0; if (*c == '|') { c++; while (*c != 0 && *c == ' ') { c++; } } add_env_var(card_number, env_var_name, env_var_name_length, env_var_def); } }
void CheckResult(const char *func, MyoError error) const { if (error != MYO_SUCCESS) { LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error); exit(1); } }
void OffloadDescriptor::gather_copyout_data() { OFFLOAD_TIMER_STOP(c_offload_target_compute); OFFLOAD_TIMER_START(c_offload_target_gather_outputs); for (int i = 0; i < m_vars_total; i++) { bool src_is_for_mic = (m_vars[i].direction.out || m_vars[i].into == NULL); switch (m_vars[i].type.src) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: if (m_vars[i].direction.out && !m_vars[i].flags.is_static) { m_out.send_data( static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp, m_vars[i].size); } break; case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].free_if && src_is_for_mic && !m_vars[i].flags.is_static) { void *buf = *static_cast<char**>(m_vars[i].ptr) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; case c_func_ptr: if (m_vars[i].direction.out) { m_out.send_func_ptr(*((void**) m_vars[i].ptr)); } break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (src_is_for_mic && m_vars[i].free_if && !m_vars[i].flags.is_static) { ArrDesc *dvp = (m_vars[i].type.src == c_dv_data || m_vars[i].type.src == c_dv_data_slice) ? static_cast<ArrDesc*>(m_vars[i].ptr) : *static_cast<ArrDesc**>(m_vars[i].ptr); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } if (m_vars[i].into) { switch (m_vars[i].type.dst) { case c_data_ptr_array: break; case c_data: case c_void_ptr: case c_cean_var: case c_dv: break; case c_string_ptr: case c_data_ptr: case c_cean_var_ptr: case c_dv_ptr: if (m_vars[i].direction.in && m_vars[i].free_if && !m_vars[i].flags.is_static_dstn) { void *buf = *static_cast<char**>(m_vars[i].into) - m_vars[i].mic_offset - (m_vars[i].flags.is_stack_buf? 0 : m_vars[i].offset); if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; case c_func_ptr: break; case c_dv_data: case c_dv_ptr_data: case c_dv_data_slice: case c_dv_ptr_data_slice: if (m_vars[i].free_if && m_vars[i].direction.in && !m_vars[i].flags.is_static_dstn) { ArrDesc *dvp = (m_vars[i].type.dst == c_dv_data_slice || m_vars[i].type.dst == c_dv_data) ? static_cast<ArrDesc*>(m_vars[i].into) : *static_cast<ArrDesc**>(m_vars[i].into); void *buf = reinterpret_cast<char*>(dvp->Base) - m_vars[i].mic_offset - m_vars[i].offset; if (buf == NULL) { break; } // decrement buffer reference count OFFLOAD_TIMER_START( c_offload_target_release_buffer_refs); BufReleaseRef(buf); OFFLOAD_TIMER_STOP( c_offload_target_release_buffer_refs); } break; default: LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst); abort(); } } } OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n", m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_DUMP_BYTES(2, m_out.get_buffer_start(), m_out.get_buffer_size()); OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data, "Total copyout data sent to host: [%lld] bytes\n", m_out.get_tfr_size()); OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs); }
void OffloadDescriptor::merge_var_descs( VarDesc *vars, VarDesc2 *vars2, int vars_total ) { // number of variable descriptors received from host and generated // locally should match if (m_vars_total < vars_total) { LIBOFFLOAD_ERROR(c_merge_var_descs1); exit(1); } for (int i = 0; i < m_vars_total; i++) { // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src if (i < vars_total) { // variable type must match if (m_vars[i].type.bits != vars[i].type.bits) { OFFLOAD_TRACE(2, "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n", i, m_vars[i].type.bits, i, vars[i].type.bits); LIBOFFLOAD_ERROR(c_merge_var_descs2); exit(1); } if (m_vars[i].type.src == c_extended_type) { VarDescExtendedType *etype = reinterpret_cast<VarDescExtendedType*>(vars[i].ptr); m_vars_extra[i].type_src = etype->extended_type; m_vars[i].ptr = etype->ptr; } else { m_vars_extra[i].type_src = m_vars[i].type.src; if (!(m_vars[i].flags.use_device_ptr && m_vars[i].type.src == c_dv)) { m_vars[i].ptr = vars[i].ptr; } } // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst if (m_vars[i].type.dst == c_extended_type && i < vars_total) { VarDescExtendedType *etype = reinterpret_cast<VarDescExtendedType*>(vars[i].into); m_vars_extra[i].type_dst = etype->extended_type; m_vars[i].into = etype->ptr; } else { m_vars_extra[i].type_dst = m_vars[i].type.dst; m_vars[i].into = vars[i].into; } const char *var_sname = ""; if (vars2 != NULL) { if (vars2[i].sname != NULL) { var_sname = vars2[i].sname; } } OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var, " VarDesc %d, var=%s, %s, %s\n", i, var_sname, vardesc_direction_as_string[m_vars[i].direction.bits], vardesc_type_as_string[m_vars_extra[i].type_src]); if (vars2 != NULL && vars2[i].dname != NULL) { OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, vardesc_type_as_string[m_vars_extra[i].type_dst]); } } else { m_vars_extra[i].type_src = m_vars[i].type.src; m_vars_extra[i].type_dst = m_vars[i].type.dst; } OFFLOAD_TRACE(2, " type_src=%d, type_dstn=%d, direction=%d, " "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n", m_vars_extra[i].type_src, m_vars_extra[i].type_dst, m_vars[i].direction.bits, m_vars[i].alloc_if, m_vars[i].free_if, m_vars[i].align, m_vars[i].mic_offset, m_vars[i].flags.bits, m_vars[i].offset, m_vars[i].size, m_vars[i].count, m_vars[i].ptr, m_vars[i].into); } }
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE( TARGET_TYPE target_type, int target_number, int is_optional, _Offload_status* status, const char* file, uint64_t line ) { bool retval; OFFLOAD ofld; // initialize status if (status != 0) { status->result = OFFLOAD_UNAVAILABLE; status->device_number = -1; status->data_sent = 0; status->data_received = 0; } // make sure libray is initialized retval = __offload_init_library(); // OFFLOAD_TIMER_INIT must follow call to __offload_init_library OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line); OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload); OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); // initialize all devices is init_type is on_offload_all if (retval && __offload_init_type == c_init_on_offload_all) { for (int i = 0; i < mic_engines_total; i++) { mic_engines[i].init(); } } OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire); if (target_type == TARGET_HOST) { // Host always available retval = true; } else if (target_type == TARGET_MIC) { if (target_number >= -1) { if (retval) { if (target_number >= 0) { // User provided the device number target_number = target_number % mic_engines_total; } else { // use device 0 target_number = 0; } // reserve device in ORSL if (is_optional) { if (!ORSL::try_reserve(target_number)) { target_number = -1; } } else { if (!ORSL::reserve(target_number)) { target_number = -1; } } // initialize device if (target_number >= 0 && __offload_init_type == c_init_on_offload) { OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize); mic_engines[target_number].init(); OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize); } } else { // fallback to CPU target_number = -1; } if (target_number < 0 || !retval) { if (!is_optional && status == 0) { LIBOFFLOAD_ERROR(c_device_is_not_available); exit(1); } retval = false; } } else { LIBOFFLOAD_ERROR(c_invalid_device_number); exit(1); } } if (retval) { ofld = new OffloadDescriptor(target_number, status, !is_optional, false, timer_data); OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number); Offload_Report_Prolog(timer_data); OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start, "Starting offload: target_type = %d, " "number = %d, is_optional = %d\n", target_type, target_number, is_optional); OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); } else { ofld = NULL; OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire); OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload); offload_report_free_data(timer_data); } return ofld; }