示例#1
0
void OffloadDescriptor::merge_var_descs(
    VarDesc *vars,
    VarDesc2 *vars2,
    int vars_total
)
{
    // number of variable descriptors received from host and generated
    // locally should match
    if (m_vars_total < vars_total) {
        LIBOFFLOAD_ERROR(c_merge_var_descs1);
        exit(1);
    }

    for (int i = 0; i < m_vars_total; i++) {
        if (i < vars_total) {
            // variable type must match
            if (m_vars[i].type.bits != vars[i].type.bits) {
                LIBOFFLOAD_ERROR(c_merge_var_descs2);
                exit(1);
            }

            m_vars[i].ptr = vars[i].ptr;
            m_vars[i].into = vars[i].into;

            const char *var_sname = "";
            if (vars2 != NULL) {
                if (vars2[i].sname != NULL) {
                    var_sname = vars2[i].sname;
                }
            }
            OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
                "   VarDesc %d, var=%s, %s, %s\n",
                i, var_sname,
                vardesc_direction_as_string[m_vars[i].direction.bits],
                vardesc_type_as_string[m_vars[i].type.src]);
            if (vars2 != NULL && vars2[i].dname != NULL) {
                OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
                    vardesc_type_as_string[m_vars[i].type.dst]);
            }
        }
        OFFLOAD_TRACE(2,
            "              type_src=%d, type_dstn=%d, direction=%d, "
            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
            m_vars[i].type.src,
            m_vars[i].type.dst,
            m_vars[i].direction.bits,
            m_vars[i].alloc_if,
            m_vars[i].free_if,
            m_vars[i].align,
            m_vars[i].mic_offset,
            m_vars[i].flags.bits,
            m_vars[i].offset,
            m_vars[i].size,
            m_vars[i].count,
            m_vars[i].ptr,
            m_vars[i].into);
    }
}
示例#2
0
char const * report_get_target_stage_str(int i)
{
    switch (i) {
        case c_offload_target_total_time:
            return (offload_get_message_str(msg_c_report_target_total_time));
        case c_offload_target_descriptor_setup:
            return (
                offload_get_message_str(msg_c_report_target_descriptor_setup));
        case c_offload_target_func_lookup:
            return (offload_get_message_str(msg_c_report_target_func_lookup));
        case c_offload_target_func_time:
            return (offload_get_message_str(msg_c_report_target_func_time));
        case c_offload_target_scatter_inputs:
            return (
                offload_get_message_str(msg_c_report_target_scatter_inputs));
        case c_offload_target_add_buffer_refs:
            return (
                offload_get_message_str(msg_c_report_target_add_buffer_refs));
        case c_offload_target_compute:
            return (offload_get_message_str(msg_c_report_target_compute));
        case c_offload_target_gather_outputs:
            return (offload_get_message_str
                (msg_c_report_target_gather_outputs));
        case c_offload_target_release_buffer_refs:
            return (offload_get_message_str(
                msg_c_report_target_release_buffer_refs));
        default:
            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
            abort();
    }
}
示例#3
0
char const * report_get_host_stage_str(int i)
{
    switch (i) {
        case c_offload_host_total_offload:
            return (
               offload_get_message_str(msg_c_report_host_total_offload_time));
        case c_offload_host_initialize:
            return (offload_get_message_str(msg_c_report_host_initialize));
        case c_offload_host_target_acquire:
            return (
                offload_get_message_str(msg_c_report_host_target_acquire));
        case c_offload_host_wait_deps:
            return (offload_get_message_str(msg_c_report_host_wait_deps));
        case c_offload_host_setup_buffers:
            return (offload_get_message_str(msg_c_report_host_setup_buffers));
        case c_offload_host_alloc_buffers:
            return (offload_get_message_str(msg_c_report_host_alloc_buffers));
        case c_offload_host_setup_misc_data:
            return (
                offload_get_message_str(msg_c_report_host_setup_misc_data));
        case c_offload_host_alloc_data_buffer:
            return (
                offload_get_message_str(msg_c_report_host_alloc_data_buffer));
        case c_offload_host_send_pointers:
            return (offload_get_message_str(msg_c_report_host_send_pointers));
        case c_offload_host_gather_inputs:
            return (offload_get_message_str(msg_c_report_host_gather_inputs));
        case c_offload_host_map_in_data_buffer:
            return (
                offload_get_message_str(msg_c_report_host_map_in_data_buffer));
        case c_offload_host_unmap_in_data_buffer:
            return (offload_get_message_str(
                msg_c_report_host_unmap_in_data_buffer));
        case c_offload_host_start_compute:
            return (offload_get_message_str(msg_c_report_host_start_compute));
        case c_offload_host_wait_compute:
            return (offload_get_message_str(msg_c_report_host_wait_compute));
        case c_offload_host_start_buffers_reads:
            return (offload_get_message_str(
                msg_c_report_host_start_buffers_reads));
        case c_offload_host_scatter_outputs:
            return (
                offload_get_message_str(msg_c_report_host_scatter_outputs));
        case c_offload_host_map_out_data_buffer:
            return (offload_get_message_str(
                msg_c_report_host_map_out_data_buffer));
        case c_offload_host_unmap_out_data_buffer:
            return (offload_get_message_str(
                msg_c_report_host_unmap_out_data_buffer));
        case c_offload_host_wait_buffers_reads:
            return (
                offload_get_message_str(msg_c_report_host_wait_buffers_reads));
        case c_offload_host_destroy_buffers:
            return (
                offload_get_message_str(msg_c_report_host_destroy_buffers));
        default:
            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
            abort();
    }
}
示例#4
0
COIPIPELINE Engine::get_pipeline(void)
{
    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
    if (thread == 0) {
        thread = new Thread(&m_proc_number);
        thread_setspecific(mic_thread_key, thread);
    }

    COIPIPELINE pipeline = thread->get_pipeline(m_index);
    if (pipeline == 0) {
        COIRESULT res;
        int proc_num;

#ifndef TARGET_WINNT
        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
#else // TARGET_WINNT
        proc_num = _InterlockedIncrement(&m_proc_number);
#endif // TARGET_WINNT

        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
            LIBOFFLOAD_ABORT;
        }
        // create pipeline for this thread
        res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
        check_result(res, c_pipeline_create, m_index, res);
        thread->set_pipeline(m_index, pipeline);
    }
    return pipeline;
}
示例#5
0
// Send function pointer
void Marshaller::send_func_ptr(
    const void* data
)
{
    const char* name;
    size_t      length;

    if (data != 0) {
        name = __offload_funcs.find_name(data);
        if (name == 0) {
#if OFFLOAD_DEBUG > 0
            if (console_enabled > 2) {
                __offload_funcs.dump();
            }
#endif // OFFLOAD_DEBUG > 0

            LIBOFFLOAD_ERROR(c_send_func_ptr, data);
            exit(1);
        }
        length = strlen(name) + 1;
    }
    else {
        name = "";
        length = 1;
    }

    memcpy(buffer_ptr, name, length);
    buffer_ptr += length;
    tfr_size += length;
}
示例#6
0
// Receive function pointer
void Marshaller::receive_func_ptr(
    const void** data
)
{
    const char* name;
    size_t      length;

    name = (const char*) buffer_ptr;
    if (name[0] != '\0') {
        *data = __offload_funcs.find_addr(name);
        if (*data == 0) {
#if OFFLOAD_DEBUG > 0
            if (console_enabled > 2) {
                __offload_funcs.dump();
            }
#endif // OFFLOAD_DEBUG > 0

            LIBOFFLOAD_ERROR(c_receive_func_ptr, name);
            exit(1);
        }
        length = strlen(name) + 1;
    }
    else {
        *data = 0;
        length = 1;
    }

    buffer_ptr += length;
    tfr_size += length;
}
示例#7
0
extern "C" int __offload_myoIsAvailable(int target_number)
{
    OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number);

    if (target_number >= -2) {
        bool is_default_number = (target_number == -2);

        if (__offload_myoInit()) {
            if (target_number >= 0) {
                // User provided the device number
                int num = target_number % mic_engines_total;

                // reserve device in ORSL
                target_number = ORSL::reserve(num) ? num : -1;
            }
            else {
                // try to use device 0
                target_number = ORSL::reserve(0) ? 0 : -1;
            }

            // make sure device is initialized
            if (target_number >= 0) {
                mic_engines[target_number].init();
            }
        }
        else {
            // fallback to CPU
            target_number = -1;
        }

        if (target_number < 0 && !is_default_number) {
            LIBOFFLOAD_ERROR(c_device_is_not_available);
            exit(1);
        }
    }
    else {
        LIBOFFLOAD_ERROR(c_invalid_device_number);
        exit(1);
    }

    return target_number;
}
示例#8
0
extern "C" void __intel_cilk_for_64_offload(
    int size,
    void (*copy_constructor)(void*, void*),
    int target_number,
    void *raddr,
    void *closure_object,
    uint64_t iters,
    uint64_t grain_size)
{
    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);

    target_number = __offload_myoIsAvailable(target_number);
    if (target_number >= 0) {
        struct S {
            void *M1;
            uint64_t M2;
            uint64_t M3;
            char closure[];
        } *args;

        args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
        if (args == NULL)
          LIBOFFLOAD_ERROR(c_malloc);
        args->M1 = raddr;
        args->M2 = iters;
        args->M3 = grain_size;

        if (copy_constructor == 0) {
            memcpy(args->closure, closure_object, size);
        }
        else {
            copy_constructor(args->closure, closure_object);
        }

        myo_wrapper.Release();
        myo_wrapper.GetResult(
            myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args,
                                   target_number)
        );
        myo_wrapper.Acquire();

        _Offload_shared_free(args);

        ORSL::release(target_number);
    }
    else {
        __cilkrts_cilk_for_64(raddr,
                              closure_object,
                              iters,
                              grain_size);
    }
}
示例#9
0
void Engine::fini_process(bool verbose)
{
    if (m_process != 0) {
        uint32_t sig;
        int8_t ret;

        // destroy target process
        OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
                            m_index);

        COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
        m_process = 0;

        if (res == COI_SUCCESS) {
            OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
                                sig, ret);
            if (verbose) {
                if (sig != 0) {
                    LIBOFFLOAD_ERROR(
                        c_mic_process_exit_sig, m_index, sig,
                        c_signal_names[sig >= c_signal_max ? 0 : sig]);
                }
                else {
                    LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
                }
            }

            // for idb
            if (__dbg_is_attached) {
                __dbg_target_so_unloaded();
            }
        }
        else {
            if (verbose) {
                LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
            }
        }
    }
}
示例#10
0
void Engine::stream_destroy(_Offload_stream handle)
{
    // get stream
    Stream * stream =  Stream::find_stream(handle, true);

    if (stream) {
        // return cpus for future use
        for (int i = 0; i < m_num_threads; i++) {
            if (stream->m_stream_cpus.test(i)) {
                m_cpus.set(i);
            }
        }
        delete stream;
    }
    else {
        LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
        LIBOFFLOAD_ABORT;
    }
}
示例#11
0
// Create CeanReadRanges data for reading contiguous ranges of
// noncontiguous array defined by the argument
CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
{
    int64_t         len;
    int             count;
    int             rank = dvp->Rank;
    CeanReadRanges *res = NULL;

    if (rank != 0) {
        int i = 0;
        len = dvp->Len;
        if (dvp->Dim[0].Mult == len) {
            for (i = 1; i < rank; i++) {
                len *= dvp->Dim[i-1].Extent;
                if (dvp->Dim[i].Mult != len) {
                    break;
                }
            }
        }
        res = (CeanReadRanges *)malloc(
            sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
	if (res == NULL)
          LIBOFFLOAD_ERROR(c_malloc);
        res -> last_noncont_ind = rank - i - 1;
        count = 1;
        for (; i < rank; i++) {
            res->Dim[rank - i - 1].count = count;
            res->Dim[rank - i - 1].size = dvp->Dim[i].Mult;
            count *= dvp->Dim[i].Extent;
        }
        res -> range_max_number = count;
        res -> range_size = len;
        res -> ptr = (void*)dvp->Base;
        res -> current_number = 0;
        res -> init_offset = 0;
    }
    return res;
}
示例#12
0
DLL_LOCAL void __offload_myoFini(void)
{
    if (myo_is_available) {
        OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);

        COIEVENT events[MIC_ENGINES_MAX];

        // kick off myoiLibFini calls on all devices
        for (int i = 0; i < mic_engines_total; i++) {
            mic_engines[i].fini_myo(&events[i]);
        }

        // cleanup myo runtime on host
        myo_wrapper.LibFini();

        // wait for the target fini calls to finish
        COIRESULT res;
        res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
        if (res != COI_SUCCESS) {
            LIBOFFLOAD_ERROR(c_event_wait, res);
            exit(1);
        }
    }
}
示例#13
0
extern void *OFFLOAD_MALLOC(
    size_t size,
    size_t align
)
{
    void *ptr;
    int   err;

    OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align);

    if (align < sizeof(void*)) {
        align = sizeof(void*);
    }

    ptr = _mm_malloc(size, align);
    if (ptr == NULL) {
        LIBOFFLOAD_ERROR(c_offload_malloc, size, align);
        exit(1);
    }

    OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr);

    return ptr;
}
示例#14
0
COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
{
    Stream * stream = Stream::find_stream(handle, false);

    if (!stream) {
        LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
        LIBOFFLOAD_ABORT;
    }

    COIPIPELINE pipeline = stream->get_pipeline();

    if (pipeline == 0) {
        COIRESULT     res;
        int           proc_num;
        COI_CPU_MASK  in_Mask ;

#ifndef TARGET_WINNT
        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
#else // TARGET_WINNT
        proc_num = _InterlockedIncrement(&m_proc_number);
#endif // TARGET_WINNT

        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
            LIBOFFLOAD_ABORT;
        }

        m_stream_lock.lock();

        // start process if not done yet
        if (m_process == 0) {
            init_process();
        }

        // create CPUmask
        res = COI::PipelineClearCPUMask(in_Mask);
        check_result(res, c_clear_cpu_mask, m_index, res);

        int stream_cpu_num = stream->get_cpu_number();

        stream->m_stream_cpus.reset();

        int threads_per_core = m_num_threads / m_num_cores;

        // The "stream_cpu_num" available threads is set in mask.
        // Available threads are defined by examining of m_cpus bitset.
        // We skip thread 0 .
        for (int i = 1; i < m_num_threads; i++) {
            // for available thread i m_cpus[i] is equal to 1
            if (m_cpus[i]) {
                res = COI::PipelineSetCPUMask(m_process,
                    i / threads_per_core,
                    i % threads_per_core,
                    in_Mask);

                check_result(res, c_set_cpu_mask, res);
                // mark thread i as nonavailable
                m_cpus.set(i,0);
                // Mark thread i as given for the stream.
                // In case of stream destroying by call to
                // _Offload_stream_destroy we can mark the thread i as
                // available.
                stream->m_stream_cpus.set(i);
                if (--stream_cpu_num <= 0) {
                    break;
                }
            }
        }

        // if stream_cpu_num is greater than 0 there are not enough
        // available threads
        if (stream_cpu_num > 0) {
            LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads);
            LIBOFFLOAD_ABORT;
        }
        // create pipeline for this thread
        OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n"
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
                               "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
                               in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
                               in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
                               in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
                               in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
        res = COI::PipelineCreate(m_process, in_Mask,
                                  mic_stack_size, &pipeline);
        check_result(res, c_pipeline_create, m_index, res);

        // Set stream's affinities
        {
            struct affinity_spec affinity_spec;
            char* affinity_type;
            int i;

            // "compact" by default
            affinity_spec.affinity_type = affinity_compact;

            // Check if user has specified type of affinity
            if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) !=
                                        NULL)
            {
                char affinity_str[16];
                int affinity_str_len;

                OFFLOAD_DEBUG_TRACE(2,
                    "User has specified OFFLOAD_STREAM_AFFINITY=%s\n",
                    affinity_type);

                // Set type of affinity requested
                affinity_str_len = strlen(affinity_type);
                for (i=0; i<affinity_str_len && i<15; i++)
                {
                    affinity_str[i] = tolower(affinity_type[i]);
                }
                affinity_str[i] = '\0';
                if (strcmp(affinity_str, "compact") == 0) {
                    affinity_spec.affinity_type = affinity_compact;
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
                } else if (strcmp(affinity_str, "scatter") == 0) {
                    affinity_spec.affinity_type = affinity_scatter;
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n");
                } else {
                    LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str);
                    affinity_spec.affinity_type = affinity_compact;
                    OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
                }
            }
            // Make flat copy of sink mask because COI's mask is opaque
            for (i=0; i<16; i++) {
                affinity_spec.sink_mask[i] = in_Mask[i];
            }
            // Set number of cores and threads
            affinity_spec.num_cores = m_num_cores;
            affinity_spec.num_threads = m_num_threads;

            COIEVENT event;
            res = COI::PipelineRunFunction(pipeline,
                                   m_funcs[c_func_set_stream_affinity],
                                   0, 0, 0,
                                   0, 0,
                                   &affinity_spec, sizeof(affinity_spec),
                                   0, 0,
                                   &event);
            check_result(res, c_pipeline_run_func, m_index, res);
    
            res = COI::EventWait(1, &event, -1, 1, 0, 0);
            check_result(res, c_event_wait, res);
        }

        m_stream_lock.unlock();
        stream->set_pipeline(pipeline);
    }
    return pipeline;
}
示例#15
0
static void __offload_myoInit_once(void)
{
    if (!__offload_myoLoadLibrary()) {
        return;
    }

    // initialize all devices
    for (int i = 0; i < mic_engines_total; i++) {
        mic_engines[i].init();
    }

    // load and initialize MYO library
    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");

    COIEVENT events[MIC_ENGINES_MAX];

    // One entry per device + 
    // A pair of entries for the Host postInit func +
    // A pair of entries for the MIC postInit func +
    // end marker
    MyoiUserParams params[MIC_ENGINES_MAX+5];

    // Load target library to all devices and
    // create libinit parameters for all devices
    for (int i = 0; i < mic_engines_total; i++) {
        mic_engines[i].init_myo(&events[i]);

        params[i].type = MYOI_USERPARAMS_DEVID;
        params[i].nodeid = mic_engines[i].get_physical_index() + 1;
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
            i, params[i].type, params[i].nodeid);
    }

    // Check if V2 myoLibInit is available
    if (myo_wrapper.PostInitFuncSupported()) {
        // Set the host post libInit function indicator
        params[mic_engines_total].type =
            MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
        params[mic_engines_total].nodeid =
            MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE;
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
            mic_engines_total,
            params[mic_engines_total].type, params[mic_engines_total].nodeid);
    
        // Set the host post libInit host function address
        ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
            postLibInitHostFuncAddress =
                (void (*)())&__offload_propagate_shared_vars;
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n",
            mic_engines_total+1,
            ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
                postLibInitHostFuncAddress);
    
        // Set the target post libInit function indicator
        params[mic_engines_total+2].type =
            MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
        params[mic_engines_total+2].nodeid =
            MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
    
        // Set the target post libInit target function name
        ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+3]))->
            postLibInitRemoveFuncName = "--vtable_initializer--";
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n",
            mic_engines_total+3,
            ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
                postLibInitRemoveFuncName);
    
        params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG;
        params[mic_engines_total+4].nodeid = 0;
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
            mic_engines_total+4,
            params[mic_engines_total+4].type,
            params[mic_engines_total+4].nodeid);
    } else {
        params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
        params[mic_engines_total].nodeid = 0;
        OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
            mic_engines_total,
            params[mic_engines_total].type, params[mic_engines_total].nodeid);
    }

    // initialize myo runtime on host
    myo_wrapper.LibInit(params, 0);

    // wait for the target init calls to finish
    COIRESULT res;
    res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
    if (res != COI_SUCCESS) {
        LIBOFFLOAD_ERROR(c_event_wait, res);
        exit(1);
    }

    myo_is_available = true;
    OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available);

    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
}
示例#16
0
void Engine::init_process(void)
{
    COIENGINE engine;
    COIRESULT res;
    const char **environ;
    char buf[4096];  // For exe path name

    // create environment for the target process
    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
    if (environ != 0) {
        for (const char **p = environ; *p != 0; p++) {
            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
        }
    }

    // Create execution context in the specified device
    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
                        m_physical_index);
    res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine);
    check_result(res, c_get_engine_handle, m_index, res);

    // Get engine info on threads and cores.
    // The values of core number and thread number will be used later at stream
    // creation by call to _Offload_stream_create(device,number_of_cpus).

    COI_ENGINE_INFO engine_info;

    res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
    check_result(res, c_get_engine_info, m_index, res);

    // m_cpus bitset has 1 for available thread. At the begining all threads
    // are available and m_cpus(i) is set to
    // 1 for i = [0...engine_info.NumThreads].
    m_cpus.reset();
    for (int i = 0; i < engine_info.NumThreads; i++) {
         m_cpus.set(i);
    }

    // The following values will be used at pipeline creation for streams
    m_num_cores = engine_info.NumCores;
    m_num_threads = engine_info.NumThreads;

    // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
    // Only the value 2 is supported in 16.0
    if (mic_dma_channel_count == 2) {
        if (COI::ProcessConfigureDMA) {
            // Set DMA channels using COI API
            COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE);
        }
        else {
            // Set environment variable COI_DMA_CHANNEL_COUNT
            // use putenv instead of setenv as Windows has no setenv.
            // Note: putenv requires its argument can't be freed or modified.
            // So no free after call to putenv or elsewhere.
            char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2");
	    if (env_var == NULL)
	      LIBOFFLOAD_ERROR(c_malloc);
            putenv(env_var);  
        }
    }

    // Target executable is not available then use compiler provided offload_main
    if (__target_exe == 0) {
       if (mic_device_main == 0)
          LIBOFFLOAD_ERROR(c_report_no_host_exe);

       OFFLOAD_DEBUG_TRACE(2,
           "Loading target executable %s\n",mic_device_main);

       res = COI::ProcessCreateFromFile(
           engine,                 // in_Engine
           mic_device_main,        // in_pBinaryName
           0,                      // in_Argc
           0,                      // in_ppArgv
           environ == 0,           // in_DupEnv
           environ,                // in_ppAdditionalEnv
           mic_proxy_io,           // in_ProxyActive
           mic_proxy_fs_root,      // in_ProxyfsRoot
           mic_buffer_size,        // in_BufferSpace
           mic_library_path,       // in_LibrarySearchPath
           &m_process              // out_pProcess
       );
    }
    else {
    // Target executable should be available by the time when we
    // attempt to initialize the device

       //  Need the full path of the FAT exe for VTUNE
       {
#ifndef TARGET_WINNT
          ssize_t len = readlink("/proc/self/exe", buf,1000);
#else
          int len = GetModuleFileName(NULL, buf,1000);
#endif // TARGET_WINNT
          if  (len == -1) {
             LIBOFFLOAD_ERROR(c_report_no_host_exe);
             exit(1);
          }
          else if (len > 999) {
             LIBOFFLOAD_ERROR(c_report_path_buff_overflow);
             exit(1);
          }
          buf[len] = '\0';
       }

       OFFLOAD_DEBUG_TRACE(2,
           "Loading target executable \"%s\" from %p, size %lld, host file %s\n",
           __target_exe->name, __target_exe->data, __target_exe->size,
           buf);

       res = COI::ProcessCreateFromMemory(
           engine,                 // in_Engine
           __target_exe->name,     // in_pBinaryName
           __target_exe->data,     // in_pBinaryBuffer
           __target_exe->size,     // in_BinaryBufferLength,
           0,                      // in_Argc
           0,                      // in_ppArgv
           environ == 0,           // in_DupEnv
           environ,                // in_ppAdditionalEnv
           mic_proxy_io,           // in_ProxyActive
           mic_proxy_fs_root,      // in_ProxyfsRoot
           mic_buffer_size,        // in_BufferSpace
           mic_library_path,       // in_LibrarySearchPath
           buf,                    // in_FileOfOrigin
           -1,                     // in_FileOfOriginOffset use -1 to indicate to
                                   // COI that is is a FAT binary
           &m_process              // out_pProcess
       );
    }
    check_result(res, c_process_create, m_index, res);

    if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) {
       // available only in MPSS 4.2 and greater
       if (COI::ProcessSetCacheSize != 0 ) { 
          int flags;
          //  Need compiler to use MPSS 3.2 or greater to get these
          // definition so currently hardcoding it
          //  COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC;
          flags = 0x00020002; 
          res = COI::ProcessSetCacheSize(
               m_process,             // in_Process
               mic_2m_buffer_size,    // in_HugePagePoolSize
               flags,                 // inHugeFlags
               mic_4k_buffer_size,    // in_SmallPagePoolSize
               flags,                 // inSmallFlags
               0,                     // in_NumDependencies
               0,                     // in_pDependencies
               0                      // out_PCompletion
          );
          OFFLOAD_DEBUG_TRACE(2,
              "Reserve target buffers 4K pages = %d  2M pages = %d\n",
                  mic_4k_buffer_size, mic_2m_buffer_size);
           check_result(res, c_process_set_cache_size, m_index, res);
       }
       else {
             OFFLOAD_DEBUG_TRACE(2,
                 "Reserve target buffers not supported in current MPSS\n");
       }
    }

    // get function handles
    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
                                         m_func_names, m_funcs);
    check_result(res, c_process_get_func_handles, m_index, res);

    // initialize device side
    pid_t pid = init_device();

    // For IDB
    if (__dbg_is_attached) {
        // TODO: we have in-memory executable now.
        // Check with IDB team what should we provide them now?
        if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
            strcpy(__dbg_target_exe_name, __target_exe->name);
        }
        __dbg_target_so_pid = pid;
        __dbg_target_id = m_physical_index;
        __dbg_target_so_loaded();
    }
}
示例#17
0
void Engine::init_process(void)
{
    COIENGINE engine;
    COIRESULT res;
    const char **environ;

    // create environment for the target process
    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
    if (environ != 0) {
        for (const char **p = environ; *p != 0; p++) {
            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
        }
    }

    // Create execution context in the specified device
    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
                        m_physical_index);
    res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
    check_result(res, c_get_engine_handle, m_index, res);

    // Target executable should be available by the time when we
    // attempt to initialize the device
    if (__target_exe == 0) {
        LIBOFFLOAD_ERROR(c_no_target_exe);
        exit(1);
    }

    OFFLOAD_DEBUG_TRACE(2,
        "Loading target executable \"%s\" from %p, size %lld\n",
        __target_exe->name, __target_exe->data, __target_exe->size);

    res = COI::ProcessCreateFromMemory(
        engine,                 // in_Engine
        __target_exe->name,     // in_pBinaryName
        __target_exe->data,     // in_pBinaryBuffer
        __target_exe->size,     // in_BinaryBufferLength,
        0,                      // in_Argc
        0,                      // in_ppArgv
        environ == 0,           // in_DupEnv
        environ,                // in_ppAdditionalEnv
        mic_proxy_io,           // in_ProxyActive
        mic_proxy_fs_root,      // in_ProxyfsRoot
        mic_buffer_size,        // in_BufferSpace
        mic_library_path,       // in_LibrarySearchPath
        __target_exe->origin,   // in_FileOfOrigin
        __target_exe->offset,   // in_FileOfOriginOffset
        &m_process              // out_pProcess
    );
    check_result(res, c_process_create, m_index, res);

    // get function handles
    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
                                         m_func_names, m_funcs);
    check_result(res, c_process_get_func_handles, m_index, res);

    // initialize device side
    pid_t pid = init_device();

    // For IDB
    if (__dbg_is_attached) {
        // TODO: we have in-memory executable now.
        // Check with IDB team what should we provide them now?
        if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
            strcpy(__dbg_target_exe_name, __target_exe->name);
        }
        __dbg_target_so_pid = pid;
        __dbg_target_id = m_physical_index;
        __dbg_target_so_loaded();
    }
}
示例#18
0
static void CheckResult(const char *func, MyoError error) {
    if (error != MYO_SUCCESS) {
       LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error);
        exit(1);
    }
}
示例#19
0
MicEnvVarKind MicEnvVar::get_env_var_kind(
    char *env_var_string,
    int *card_number,
    char **env_var_name,
    int *env_var_name_length,
    char **env_var_def
)
{
    int len = strlen(prefix);
    char *c = env_var_string;
    int num = 0;
    bool card_is_set = false;

    if (strncmp(c, prefix, len) != 0 || c[len] != '_') {
            return c_no_mic;
    }
    c += len + 1;

    *card_number = any_card;
    if (isdigit(*c)) {
        while (isdigit (*c)) {
            num = (*c++ - '0') + (num * 10);
        }
    if (*c != '_') {
        return c_no_mic;
    }
    c++;
        *card_number = num;
        card_is_set = true;
    }
    if (!isalpha(*c)) {
        return c_no_mic;
    }
    *env_var_name = *env_var_def = c;
    if (strncmp(c, "ENV=", 4) == 0) {
        if (!card_is_set) {
            *env_var_name_length = 3;
            *env_var_name = *env_var_def = c;
            *env_var_def = strdup(*env_var_def);
	    if (*env_var_def == NULL)
	      LIBOFFLOAD_ERROR(c_malloc);
            return  c_mic_var;
        }
        *env_var_def = c + strlen("ENV=");
        *env_var_def = strdup(*env_var_def);
	if (*env_var_def == NULL)
	  LIBOFFLOAD_ERROR(c_malloc);
        return c_mic_card_env;
    }
    if (isalpha(*c)) {
        *env_var_name_length = 0;
        while (isalnum(*c) || *c == '_') {
            c++;
            (*env_var_name_length)++;
        }
    }
    if (*c != '=') {
        return c_no_mic;
    }
    *env_var_def = strdup(*env_var_def);
    if (*env_var_def == NULL)
      LIBOFFLOAD_ERROR(c_malloc);
    return card_is_set? c_mic_card_var : c_mic_var;
}
示例#20
0
void OffloadDescriptor::scatter_copyin_data()
{
    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);

    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
                        m_in.get_buffer_start(),
                        m_in.get_buffer_size());
    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
                             m_in.get_buffer_size());

    // receive data
    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);
        void** ptr_addr = src_is_for_mic ?
                          static_cast<void**>(m_vars[i].ptr) :
                          static_cast<void**>(m_vars[i].into);
        int type = src_is_for_mic ? m_vars[i].type.src :
                                    m_vars[i].type.dst;
        bool is_static = src_is_for_mic ?
                         m_vars[i].flags.is_static :
                         m_vars[i].flags.is_static_dstn;
        void *ptr = NULL;

        if (m_vars[i].flags.alloc_disp) {
            int64_t offset = 0;
            m_in.receive_data(&offset, sizeof(offset));
            m_vars[i].offset = -offset;
        }
        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
            VAR_TYPE_IS_DV_DATA(type)) {
            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
                  reinterpret_cast<ArrDesc*>(ptr_addr) :
                  *reinterpret_cast<ArrDesc**>(ptr_addr);
            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
        }

        // Set pointer values
        switch (type) {
            case c_data_ptr_array:
                {
                    int j = m_vars[i].ptr_arr_offset;
                    int max_el = j + m_vars[i].count;
                    char *dst_arr_ptr = (src_is_for_mic)?
                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
                        reinterpret_cast<char*>(m_vars[i].into);

                    for (; j < max_el; j++) {
                        if (src_is_for_mic) {
                            m_vars[j].ptr =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                        else {
                            m_vars[j].into =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                    }
                }
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                                  m_vars[i].mic_offset +
                                  (m_vars[i].flags.is_stack_buf ?
                                   0 : m_vars[i].offset);
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    void *ptr = static_cast<char*>(buf) +
                                    m_vars[i].mic_offset +
                                    (m_vars[i].flags.is_stack_buf ?
                                     0 : m_vars[i].offset);
                    *ptr_addr = ptr;
                }
                break;

            case c_func_ptr:
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                            m_vars[i].mic_offset + m_vars[i].offset;
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    ptr = static_cast<char*>(buf) +
                          m_vars[i].mic_offset + m_vars[i].offset;
                    *ptr_addr = ptr;
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
                abort();
        }
        // Release obsolete buffers for stack of persistent objects
        if (type = c_data_ptr &&
            m_vars[i].flags.is_stack_buf &&
            !m_vars[i].direction.bits &&
            m_vars[i].alloc_if &&
            m_vars[i].size != 0) {
                for (int j=0; j < m_vars[i].size; j++) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    BufferReleaseRef(buf);
                    ref_data.erase(buf);
                }
        }
        // Do copyin
        switch (m_vars[i].type.dst) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
                if (m_vars[i].direction.in &&
                    !m_vars[i].flags.is_static_dstn) {
                    int64_t size;
                    int64_t disp;
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    if (m_vars[i].type.dst == c_cean_var) {
                        m_in.receive_data((&size), sizeof(int64_t));
                        m_in.receive_data((&disp), sizeof(int64_t));
                    }
                    else {
                        size = m_vars[i].size;
                        disp = 0;
                    }
                    m_in.receive_data(ptr + disp, size);
                }
                break;

            case c_dv:
                if (m_vars[i].direction.bits ||
                    m_vars[i].alloc_if ||
                    m_vars[i].free_if) {
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    m_in.receive_data(ptr + sizeof(uint64_t),
                                      m_vars[i].size - sizeof(uint64_t));
                }
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                break;

            case c_func_ptr:
                if (m_vars[i].direction.in) {
                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                abort();
        }
    }

    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
                  m_in.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);

    OFFLOAD_TIMER_START(c_offload_target_compute);
}
示例#21
0
char const * report_get_message_str(error_types input_tag)
{
    switch (input_tag) {
        case c_report_title:
            return (offload_get_message_str(msg_c_report_title));
        case c_report_from_file:
            return (offload_get_message_str(msg_c_report_from_file));
        case c_report_offload:
            return (offload_get_message_str(msg_c_report_offload));
        case c_report_mic:
            return (offload_get_message_str(msg_c_report_mic));
        case c_report_file:
            return (offload_get_message_str(msg_c_report_file));
        case c_report_line:
            return (offload_get_message_str(msg_c_report_line));
        case c_report_host:
            return (offload_get_message_str(msg_c_report_host));
        case c_report_tag:
            return (offload_get_message_str(msg_c_report_tag));
        case c_report_cpu_time:
            return (offload_get_message_str(msg_c_report_cpu_time));
        case c_report_seconds:
            return (offload_get_message_str(msg_c_report_seconds));
        case c_report_cpu_to_mic_data:
            return (offload_get_message_str(msg_c_report_cpu_to_mic_data));
        case c_report_bytes:
            return (offload_get_message_str(msg_c_report_bytes));
        case c_report_mic_time:
            return (offload_get_message_str(msg_c_report_mic_time));
        case c_report_mic_to_cpu_data:
            return (offload_get_message_str(msg_c_report_mic_to_cpu_data));
        case c_report_compute:
            return (offload_get_message_str(msg_c_report_compute));
        case c_report_copyin_data:
            return (offload_get_message_str(msg_c_report_copyin_data));
        case c_report_copyout_data:
            return (offload_get_message_str(msg_c_report_copyout_data));
        case c_report_create_buf_host:
            return (offload_get_message_str(c_report_create_buf_host));
        case c_report_create_buf_mic:
            return (offload_get_message_str(msg_c_report_create_buf_mic));
        case c_report_destroy:
            return (offload_get_message_str(msg_c_report_destroy));
        case c_report_gather_copyin_data:
            return (offload_get_message_str(msg_c_report_gather_copyin_data));
        case c_report_gather_copyout_data:
            return (offload_get_message_str(msg_c_report_gather_copyout_data));
        case c_report_state_signal:
            return (offload_get_message_str(msg_c_report_state_signal));
        case c_report_signal:
            return (offload_get_message_str(msg_c_report_signal));
        case c_report_wait:
            return (offload_get_message_str(msg_c_report_wait));
        case c_report_init:
            return (offload_get_message_str(msg_c_report_init));
        case c_report_init_func:
            return (offload_get_message_str(msg_c_report_init_func));
        case c_report_logical_card:
            return (offload_get_message_str(msg_c_report_logical_card));
        case c_report_mic_myo_fptr:
            return (offload_get_message_str(msg_c_report_mic_myo_fptr));
        case c_report_mic_myo_shared:
            return (offload_get_message_str(msg_c_report_mic_myo_shared));
        case c_report_myoacquire:
            return (offload_get_message_str(msg_c_report_myoacquire));
        case c_report_myofini:
            return (offload_get_message_str(msg_c_report_myofini));
        case c_report_myoinit:
            return (offload_get_message_str(msg_c_report_myoinit));
        case c_report_myoregister:
            return (offload_get_message_str(msg_c_report_myoregister));
        case c_report_myorelease:
            return (offload_get_message_str(msg_c_report_myorelease));
        case c_report_myosharedalignedfree:
            return (
                offload_get_message_str(msg_c_report_myosharedalignedfree));
        case c_report_myosharedalignedmalloc:
            return (
                offload_get_message_str(msg_c_report_myosharedalignedmalloc));
        case c_report_myosharedfree:
            return (offload_get_message_str(msg_c_report_myosharedfree));
        case c_report_myosharedmalloc:
            return (offload_get_message_str(msg_c_report_myosharedmalloc));
        case c_report_physical_card:
            return (offload_get_message_str(msg_c_report_physical_card));
        case c_report_receive_pointer_data:
            return (
                offload_get_message_str(msg_c_report_receive_pointer_data));
        case c_report_received_pointer_data:
            return (
                offload_get_message_str(msg_c_report_received_pointer_data));
        case c_report_register:
            return (offload_get_message_str(msg_c_report_register));
        case c_report_scatter_copyin_data:
            return (offload_get_message_str(msg_c_report_scatter_copyin_data));
        case c_report_scatter_copyout_data:
            return (
                offload_get_message_str(msg_c_report_scatter_copyout_data));
        case c_report_send_pointer_data:
            return (offload_get_message_str(msg_c_report_send_pointer_data));
        case c_report_sent_pointer_data:
            return (offload_get_message_str(msg_c_report_sent_pointer_data));
        case c_report_start:
            return (offload_get_message_str(msg_c_report_start));
        case c_report_start_target_func:
            return (offload_get_message_str(msg_c_report_start_target_func));
        case c_report_state:
            return (offload_get_message_str(msg_c_report_state));
        case c_report_unregister:
            return (offload_get_message_str(msg_c_report_unregister));
        case c_report_var:
            return (offload_get_message_str(msg_c_report_var));

        default:
            LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
            abort();
    }
}
示例#22
0
void OffloadDescriptor::offload(
    uint32_t  buffer_count,
    void**    buffers,
    void*     misc_data,
    uint16_t  misc_data_len,
    void*     return_data,
    uint16_t  return_data_len
)
{
    FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
    const char *name = func->data;
    OffloadDescriptor ofld;
    char *in_data = 0;
    char *out_data = 0;
    char *timer_data = 0;

    console_enabled = func->console_enabled;
    timer_enabled = func->timer_enabled;
    offload_report_level = func->offload_report_level;
    offload_number = func->offload_number;
    ofld.set_offload_number(func->offload_number);

#ifdef SEP_SUPPORT
    if (sep_monitor) {
        if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
            OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
            VTResumeSampling();
        }
    }
#endif // SEP_SUPPORT

    OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
                          c_offload_start_target_func,
                          "Offload \"%s\" started\n", name);

    // initialize timer data
    OFFLOAD_TIMER_INIT();

    OFFLOAD_TIMER_START(c_offload_target_total_time);

    OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);

    // get input/output buffer addresses
    if (func->in_datalen > 0 || func->out_datalen > 0) {
        if (func->data_offset != 0) {
            in_data = (char*) misc_data + func->data_offset;
            out_data = (char*) return_data;
        }
        else {
            char *inout_buf = (char*) buffers[--buffer_count];
            in_data = inout_buf;
            out_data = inout_buf;
        }
    }

    // assign variable descriptors
    ofld.m_vars_total = func->vars_num;
    if (ofld.m_vars_total > 0) {
        uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);

        ofld.m_vars = (VarDesc*) malloc(var_data_len);
        if (ofld.m_vars == NULL)
          LIBOFFLOAD_ERROR(c_malloc);
        memcpy(ofld.m_vars, in_data, var_data_len);

        in_data += var_data_len;
        func->in_datalen -= var_data_len;
    }

    // timer data
    if (func->timer_enabled) {
        uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();

        timer_data = out_data;
        out_data += timer_data_len;
        func->out_datalen -= timer_data_len;
    }

    // init Marshallers
    ofld.m_in.init_buffer(in_data, func->in_datalen);
    ofld.m_out.init_buffer(out_data, func->out_datalen);

    // copy buffers to offload descriptor
    std::copy(buffers, buffers + buffer_count,
              std::back_inserter(ofld.m_buffers));

    OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);

    // find offload entry address
    OFFLOAD_TIMER_START(c_offload_target_func_lookup);

    offload_func_with_parms entry = (offload_func_with_parms)
        __offload_entries.find_addr(name);

    if (entry == NULL) {
#if OFFLOAD_DEBUG > 0
        if (console_enabled > 2) {
            __offload_entries.dump();
        }
#endif
        LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
        exit(1);
    }

    OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);

    OFFLOAD_TIMER_START(c_offload_target_func_time);

    // execute offload entry
    entry(&ofld);

    OFFLOAD_TIMER_STOP(c_offload_target_func_time);

    OFFLOAD_TIMER_STOP(c_offload_target_total_time);

    // copy timer data to the buffer
    OFFLOAD_TIMER_TARGET_DATA(timer_data);

    OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);

#ifdef SEP_SUPPORT
    if (sep_monitor) {
        if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
            OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
            VTPauseSampling();
        }
    }
#endif // SEP_SUPPORT
}
示例#23
0
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
    const int*  device_num,
    const char* file,
    uint64_t    line
)
{
    int target_number;

    // make sure libray is initialized and at least one device is available
    if (!__offload_init_library()) {
        LIBOFFLOAD_ERROR(c_device_is_not_available);
        exit(1);
    }

    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library

    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    if (__offload_init_type == c_init_on_offload_all) {
        for (int i = 0; i < mic_engines_total; i++) {
             mic_engines[i].init();
        }
    }

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);

    // use default device number if it is not provided
    if (device_num != 0) {
        target_number = *device_num;
    }
    else {
        target_number = __omp_device_num;
    }

    // device number should be a non-negative integer value
    if (target_number < 0) {
        LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
        exit(1);
    }

    // should we do this for OpenMP?
    target_number %= mic_engines_total;

    // reserve device in ORSL
    if (!ORSL::reserve(target_number)) {
        LIBOFFLOAD_ERROR(c_device_is_not_available);
        exit(1);
    }

    // initialize device(s)
    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    if (__offload_init_type == c_init_on_offload) {
        mic_engines[target_number].init();
    }

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD ofld =
        new OffloadDescriptor(target_number, 0, true, true, timer_data);

    OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);

    Offload_Report_Prolog(timer_data);

    OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
                          "Starting OpenMP offload, device = %d\n",
                          target_number);

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);

    return ofld;
}
示例#24
0
void OffloadDescriptor::scatter_copyin_data()
{
    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);

    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
                        m_in.get_buffer_start(),
                        m_in.get_buffer_size());
    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
                             m_in.get_buffer_size());

    // receive data
    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);
        void** ptr_addr = src_is_for_mic ?
                          static_cast<void**>(m_vars[i].ptr) :
                          static_cast<void**>(m_vars[i].into);
        int type = src_is_for_mic ? m_vars_extra[i].type_src :
                                    m_vars_extra[i].type_dst;
        bool is_static = src_is_for_mic ?
                         m_vars[i].flags.is_static :
                         m_vars[i].flags.is_static_dstn;
        void *ptr = NULL;

        if (m_vars[i].flags.alloc_disp) {
            int64_t offset = 0;
            m_in.receive_data(&offset, sizeof(offset));
        }
        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
            VAR_TYPE_IS_DV_DATA(type)) {
            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
                  reinterpret_cast<ArrDesc*>(ptr_addr) :
                  *reinterpret_cast<ArrDesc**>(ptr_addr);
            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
        }
        // Set pointer values
        switch (type) {
            case c_data_ptr_array:
                {
                    int j = m_vars[i].ptr_arr_offset;
                    int max_el = j + m_vars[i].count;
                    char *dst_arr_ptr = (src_is_for_mic)?
                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
                        reinterpret_cast<char*>(m_vars[i].into);

                    // if is_pointer is 1 it means that pointer array itself
                    // is defined either via pointer or as class member.
                    // i.e. arr_ptr[0:5] or this->ARR[0:5]
                    if (m_vars[i].flags.is_pointer) {
                        int64_t offset = 0;
                        m_in.receive_data(&offset, sizeof(offset));
                        dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
                    }
                    for (; j < max_el; j++) {
                        if (src_is_for_mic) {
                            m_vars[j].ptr =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                        else {
                            m_vars[j].into =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                    }
                }
                break;
            case c_data:
            case c_void_ptr:
            case c_void_ptr_ptr:
            case c_cean_var:
            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_string_ptr_ptr:
            case c_data_ptr_ptr:
            case c_cean_var_ptr:
            case c_cean_var_ptr_ptr:
            case c_dv_ptr:
                // Don't need ptr_addr value for variables from stack buffer.
                // Stack buffer address is set at var_desc with #0.
                if (i != 0 && m_vars[i].flags.is_stack_buf) {
                    break;
                }
                if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
                    TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
                    int64_t offset;

                    m_in.receive_data(&offset, sizeof(offset));
                    ptr_addr = reinterpret_cast<void**>(
                                 reinterpret_cast<char*>(*ptr_addr) + offset);

                }

                if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
                    void *buf = NULL;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                            OFFLOAD_TRACE(1, "    AddRef count = %d\n",
                                              ((RefInfo *) ref_data[buf])->count);
                        }
                        ptr = static_cast<char*>(buf) +
                                  m_vars[i].mic_offset +
                                  (m_vars[i].flags.is_stack_buf ?
                                   0 : m_vars[i].offset);

                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    void *ptr = static_cast<char*>(buf) +
                                    m_vars[i].mic_offset +
                                    (m_vars[i].flags.is_stack_buf ?
                                     0 : m_vars[i].offset);
                    *ptr_addr = ptr;
                }
                break;

            case c_func_ptr:
            case c_func_ptr_ptr:
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                            m_vars[i].mic_offset + m_vars[i].offset;
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    ptr = static_cast<char*>(buf) +
                          m_vars[i].mic_offset + m_vars[i].offset;
                    *ptr_addr = ptr;
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
                abort();
        }
        // Release obsolete buffers for stack of persistent objects.
        // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
        // stack buffer pointer.
        if (i == 0 &&
            m_vars[i].flags.is_stack_buf &&
            !m_vars[i].direction.bits &&
            m_vars[i].alloc_if &&
            m_vars[i].size != 0) {
                for (int j=0; j < m_vars[i].size; j++) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
                    BufferReleaseRef(buf);
                    ref_data.erase(buf);
                }
        }
        // Do copyin
        switch (m_vars_extra[i].type_dst) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_void_ptr_ptr:
            case c_cean_var:
                if (m_vars[i].direction.in &&
                    !m_vars[i].flags.is_static_dstn) {
                    int64_t size;
                    int64_t disp;
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    if (m_vars_extra[i].type_dst == c_cean_var) {
                        m_in.receive_data((&size), sizeof(int64_t));
                        m_in.receive_data((&disp), sizeof(int64_t));
                    }
                    else {
                        size = m_vars[i].size;
                        disp = 0;
                    }
                    m_in.receive_data(ptr + disp, size);
                }
                break;

            case c_dv:
                if (m_vars[i].direction.bits ||
                    m_vars[i].alloc_if ||
                    m_vars[i].free_if) {
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    m_in.receive_data(ptr + sizeof(uint64_t),
                                      m_vars[i].size - sizeof(uint64_t));
                }
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_string_ptr_ptr:
            case c_data_ptr_ptr:
            case c_cean_var_ptr:
            case c_cean_var_ptr_ptr:
            case c_dv_ptr:
            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                break;

            case c_func_ptr:
            case c_func_ptr_ptr:
                if (m_vars[i].direction.in) {
                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
                abort();
        }
    }

    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
                  m_in.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);

    OFFLOAD_TIMER_START(c_offload_target_compute);
}
示例#25
0
void MicEnvVar::mic_parse_env_var_list(
    int card_number, char *env_vars_def_list)
{
    char *c = env_vars_def_list;
    char *env_var_name;
    int  env_var_name_length;
    char *env_var_def;
    bool var_is_quoted;

    if (*c == '"') {
        c++;
    }
    while (*c != 0) {
        var_is_quoted = false;
        env_var_name = c;
        env_var_name_length = 0;
        if (isalpha(*c)) {
            while (isalnum(*c) || *c == '_') {
                c++;
                env_var_name_length++;
            }
        }
        else {
            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1);
            return;
        }
        if (*c != '=') {
            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2);
            return;
        }
        c++;

        if (*c == '"') {
            var_is_quoted = true;
            c++;
        }
        // Environment variable values that contain | will need to be escaped.
        while (*c != 0 && *c != '|' &&
               (!var_is_quoted || *c != '"'))
        {
            // skip escaped symbol
            if (*c == '\\') {
                c++;
            }
            c++;
        }
        if (var_is_quoted) {
            c++; // for "
            while (*c != 0 && *c != '|') {
                c++;
            }
        }

        int sz = c - env_var_name;
        env_var_def = (char*)malloc(sz);
        if (env_var_def == NULL)
          LIBOFFLOAD_ERROR(c_malloc);
        memcpy(env_var_def, env_var_name, sz);
        env_var_def[sz] = 0;

        if (*c == '|') {
            c++;
            while (*c != 0 && *c == ' ') {
                c++;
            }
        }
        add_env_var(card_number,
                    env_var_name,
                    env_var_name_length,
                    env_var_def);
    }
}
示例#26
0
 void CheckResult(const char *func, MyoError error) const {
     if (error != MYO_SUCCESS) {
          LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error);
         exit(1);
     }
 }
示例#27
0
void OffloadDescriptor::gather_copyout_data()
{
    OFFLOAD_TIMER_STOP(c_offload_target_compute);

    OFFLOAD_TIMER_START(c_offload_target_gather_outputs);

    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);

        switch (m_vars[i].type.src) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
                if (m_vars[i].direction.out &&
                    !m_vars[i].flags.is_static) {
                    m_out.send_data(
                        static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
                        m_vars[i].size);
                }
                break;

            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
                if (m_vars[i].free_if &&
                    src_is_for_mic &&
                    !m_vars[i].flags.is_static) {
                    void *buf = *static_cast<char**>(m_vars[i].ptr) -
                                    m_vars[i].mic_offset -
                                    (m_vars[i].flags.is_stack_buf?
                                     0 : m_vars[i].offset);
                    if (buf == NULL) {
                        break;
                    }
                    // decrement buffer reference count
                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
                    BufReleaseRef(buf);
                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
                }
                break;

            case c_func_ptr:
                if (m_vars[i].direction.out) {
                    m_out.send_func_ptr(*((void**) m_vars[i].ptr));
                }
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (src_is_for_mic &&
                    m_vars[i].free_if &&
                    !m_vars[i].flags.is_static) {
                    ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
                                    m_vars[i].type.src == c_dv_data_slice) ?
                        static_cast<ArrDesc*>(m_vars[i].ptr) :
                        *static_cast<ArrDesc**>(m_vars[i].ptr);

                    void *buf = reinterpret_cast<char*>(dvp->Base) -
                                m_vars[i].mic_offset -
                                m_vars[i].offset;

                    if (buf == NULL) {
                        break;
                    }

                    // decrement buffer reference count
                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
                    BufReleaseRef(buf);
                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                abort();
        }

        if (m_vars[i].into) {
            switch (m_vars[i].type.dst) {
                case c_data_ptr_array:
                    break;
                case c_data:
                case c_void_ptr:
                case c_cean_var:
                case c_dv:
                    break;

                case c_string_ptr:
                case c_data_ptr:
                case c_cean_var_ptr:
                case c_dv_ptr:
                    if (m_vars[i].direction.in &&
                        m_vars[i].free_if &&
                        !m_vars[i].flags.is_static_dstn) {
                        void *buf = *static_cast<char**>(m_vars[i].into) -
                                    m_vars[i].mic_offset -
                                    (m_vars[i].flags.is_stack_buf?
                                     0 : m_vars[i].offset);

                        if (buf == NULL) {
                            break;
                        }
                        // decrement buffer reference count
                        OFFLOAD_TIMER_START(
                            c_offload_target_release_buffer_refs);
                        BufReleaseRef(buf);
                        OFFLOAD_TIMER_STOP(
                            c_offload_target_release_buffer_refs);
                    }
                    break;

                case c_func_ptr:
                    break;

                case c_dv_data:
                case c_dv_ptr_data:
                case c_dv_data_slice:
                case c_dv_ptr_data_slice:
                    if (m_vars[i].free_if &&
                        m_vars[i].direction.in &&
                        !m_vars[i].flags.is_static_dstn) {
                        ArrDesc *dvp =
                            (m_vars[i].type.dst == c_dv_data_slice ||
                             m_vars[i].type.dst == c_dv_data) ?
                            static_cast<ArrDesc*>(m_vars[i].into) :
                            *static_cast<ArrDesc**>(m_vars[i].into);
                        void *buf = reinterpret_cast<char*>(dvp->Base) -
                              m_vars[i].mic_offset -
                              m_vars[i].offset;

                        if (buf == NULL) {
                            break;
                        }
                        // decrement buffer reference count
                        OFFLOAD_TIMER_START(
                            c_offload_target_release_buffer_refs);
                        BufReleaseRef(buf);
                        OFFLOAD_TIMER_STOP(
                            c_offload_target_release_buffer_refs);
                    }
                    break;

                default:
                    LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                    abort();
            }
        }
    }

    OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
                        m_out.get_buffer_start(),
                        m_out.get_buffer_size());

    OFFLOAD_DEBUG_DUMP_BYTES(2,
                             m_out.get_buffer_start(),
                             m_out.get_buffer_size());

    OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
                  "Total copyout data sent to host: [%lld] bytes\n",
                  m_out.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
}
示例#28
0
void OffloadDescriptor::merge_var_descs(
    VarDesc *vars,
    VarDesc2 *vars2,
    int vars_total
)
{
    // number of variable descriptors received from host and generated
    // locally should match
    if (m_vars_total < vars_total) {
        LIBOFFLOAD_ERROR(c_merge_var_descs1);
        exit(1);
    }

    for (int i = 0; i < m_vars_total; i++) {
        // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src

        if (i < vars_total) {
            // variable type must match
            if (m_vars[i].type.bits != vars[i].type.bits) {
                OFFLOAD_TRACE(2,
                    "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
                    i, m_vars[i].type.bits, i, vars[i].type.bits);
                LIBOFFLOAD_ERROR(c_merge_var_descs2);
                exit(1);
            }

            if (m_vars[i].type.src == c_extended_type) {
                VarDescExtendedType *etype =
                    reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
                m_vars_extra[i].type_src = etype->extended_type;
                m_vars[i].ptr            = etype->ptr;
            }
            else {
                m_vars_extra[i].type_src = m_vars[i].type.src;
                if (!(m_vars[i].flags.use_device_ptr &&
                      m_vars[i].type.src == c_dv)) {
                    m_vars[i].ptr = vars[i].ptr;
                }
            }
            // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
            if (m_vars[i].type.dst == c_extended_type && i < vars_total) {
                VarDescExtendedType *etype =
                    reinterpret_cast<VarDescExtendedType*>(vars[i].into);
                m_vars_extra[i].type_dst = etype->extended_type;
                m_vars[i].into           = etype->ptr;
            }
            else {
                m_vars_extra[i].type_dst = m_vars[i].type.dst;
                m_vars[i].into = vars[i].into;
            }

            const char *var_sname = "";
            if (vars2 != NULL) {
                if (vars2[i].sname != NULL) {
                    var_sname = vars2[i].sname;
                }
            }
            OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
                "   VarDesc %d, var=%s, %s, %s\n",
                i, var_sname,
                vardesc_direction_as_string[m_vars[i].direction.bits],
                vardesc_type_as_string[m_vars_extra[i].type_src]);
            if (vars2 != NULL && vars2[i].dname != NULL) {
                OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
                    vardesc_type_as_string[m_vars_extra[i].type_dst]);
            }
        }
        else {
            m_vars_extra[i].type_src = m_vars[i].type.src;
            m_vars_extra[i].type_dst = m_vars[i].type.dst;
        }

        OFFLOAD_TRACE(2,
            "              type_src=%d, type_dstn=%d, direction=%d, "
            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
            m_vars_extra[i].type_src,
            m_vars_extra[i].type_dst,
            m_vars[i].direction.bits,
            m_vars[i].alloc_if,
            m_vars[i].free_if,
            m_vars[i].align,
            m_vars[i].mic_offset,
            m_vars[i].flags.bits,
            m_vars[i].offset,
            m_vars[i].size,
            m_vars[i].count,
            m_vars[i].ptr,
            m_vars[i].into);
    }
}
示例#29
0
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
    TARGET_TYPE      target_type,
    int              target_number,
    int              is_optional,
    _Offload_status* status,
    const char*      file,
    uint64_t         line
)
{
    bool retval;
    OFFLOAD ofld;

    // initialize status
    if (status != 0) {
        status->result = OFFLOAD_UNAVAILABLE;
        status->device_number = -1;
        status->data_sent = 0;
        status->data_received = 0;
    }

    // make sure libray is initialized
    retval = __offload_init_library();

    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    // initialize all devices is init_type is on_offload_all
    if (retval && __offload_init_type == c_init_on_offload_all) {
        for (int i = 0; i < mic_engines_total; i++) {
             mic_engines[i].init();
        }
    }
    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);

    if (target_type == TARGET_HOST) {
        // Host always available
        retval = true;
    }
    else if (target_type == TARGET_MIC) {
        if (target_number >= -1) {
            if (retval) {
                if (target_number >= 0) {
                    // User provided the device number
                    target_number = target_number % mic_engines_total;
                }
                else {
                    // use device 0
                    target_number = 0;
                }

                // reserve device in ORSL
                if (is_optional) {
                    if (!ORSL::try_reserve(target_number)) {
                        target_number = -1;
                    }
                }
                else {
                    if (!ORSL::reserve(target_number)) {
                        target_number = -1;
                    }
                }

                // initialize device
                if (target_number >= 0 &&
                    __offload_init_type == c_init_on_offload) {
                    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
                    mic_engines[target_number].init();
                    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
                }
            }
            else {
                // fallback to CPU
                target_number = -1;
            }

            if (target_number < 0 || !retval) {
                if (!is_optional && status == 0) {
                    LIBOFFLOAD_ERROR(c_device_is_not_available);
                    exit(1);
                }

                retval = false;
            }
        }
        else {
            LIBOFFLOAD_ERROR(c_invalid_device_number);
            exit(1);
        }
    }

    if (retval) {
        ofld = new OffloadDescriptor(target_number, status,
                                     !is_optional, false, timer_data);
        OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
        Offload_Report_Prolog(timer_data);
        OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
                              "Starting offload: target_type = %d, "
                              "number = %d, is_optional = %d\n",
                              target_type, target_number, is_optional);

        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
    }
    else {
        ofld = NULL;

        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
        offload_report_free_data(timer_data);
    }

    return ofld;
}