Ejemplo n.º 1
0
void OffloadDescriptor::gather_copyout_data()
{
    OFFLOAD_TIMER_STOP(c_offload_target_compute);

    OFFLOAD_TIMER_START(c_offload_target_gather_outputs);

    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);

        switch (m_vars[i].type.src) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
                if (m_vars[i].direction.out &&
                    !m_vars[i].flags.is_static) {
                    m_out.send_data(
                        static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
                        m_vars[i].size);
                }
                break;

            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
                if (m_vars[i].free_if &&
                    src_is_for_mic &&
                    !m_vars[i].flags.is_static) {
                    void *buf = *static_cast<char**>(m_vars[i].ptr) -
                                    m_vars[i].mic_offset -
                                    (m_vars[i].flags.is_stack_buf?
                                     0 : m_vars[i].offset);
                    if (buf == NULL) {
                        break;
                    }
                    // decrement buffer reference count
                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
                    BufReleaseRef(buf);
                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
                }
                break;

            case c_func_ptr:
                if (m_vars[i].direction.out) {
                    m_out.send_func_ptr(*((void**) m_vars[i].ptr));
                }
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (src_is_for_mic &&
                    m_vars[i].free_if &&
                    !m_vars[i].flags.is_static) {
                    ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
                                    m_vars[i].type.src == c_dv_data_slice) ?
                        static_cast<ArrDesc*>(m_vars[i].ptr) :
                        *static_cast<ArrDesc**>(m_vars[i].ptr);

                    void *buf = reinterpret_cast<char*>(dvp->Base) -
                                m_vars[i].mic_offset -
                                m_vars[i].offset;

                    if (buf == NULL) {
                        break;
                    }

                    // decrement buffer reference count
                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
                    BufReleaseRef(buf);
                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                abort();
        }

        if (m_vars[i].into) {
            switch (m_vars[i].type.dst) {
                case c_data_ptr_array:
                    break;
                case c_data:
                case c_void_ptr:
                case c_cean_var:
                case c_dv:
                    break;

                case c_string_ptr:
                case c_data_ptr:
                case c_cean_var_ptr:
                case c_dv_ptr:
                    if (m_vars[i].direction.in &&
                        m_vars[i].free_if &&
                        !m_vars[i].flags.is_static_dstn) {
                        void *buf = *static_cast<char**>(m_vars[i].into) -
                                    m_vars[i].mic_offset -
                                    (m_vars[i].flags.is_stack_buf?
                                     0 : m_vars[i].offset);

                        if (buf == NULL) {
                            break;
                        }
                        // decrement buffer reference count
                        OFFLOAD_TIMER_START(
                            c_offload_target_release_buffer_refs);
                        BufReleaseRef(buf);
                        OFFLOAD_TIMER_STOP(
                            c_offload_target_release_buffer_refs);
                    }
                    break;

                case c_func_ptr:
                    break;

                case c_dv_data:
                case c_dv_ptr_data:
                case c_dv_data_slice:
                case c_dv_ptr_data_slice:
                    if (m_vars[i].free_if &&
                        m_vars[i].direction.in &&
                        !m_vars[i].flags.is_static_dstn) {
                        ArrDesc *dvp =
                            (m_vars[i].type.dst == c_dv_data_slice ||
                             m_vars[i].type.dst == c_dv_data) ?
                            static_cast<ArrDesc*>(m_vars[i].into) :
                            *static_cast<ArrDesc**>(m_vars[i].into);
                        void *buf = reinterpret_cast<char*>(dvp->Base) -
                              m_vars[i].mic_offset -
                              m_vars[i].offset;

                        if (buf == NULL) {
                            break;
                        }
                        // decrement buffer reference count
                        OFFLOAD_TIMER_START(
                            c_offload_target_release_buffer_refs);
                        BufReleaseRef(buf);
                        OFFLOAD_TIMER_STOP(
                            c_offload_target_release_buffer_refs);
                    }
                    break;

                default:
                    LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                    abort();
            }
        }
    }

    OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
                        m_out.get_buffer_start(),
                        m_out.get_buffer_size());

    OFFLOAD_DEBUG_DUMP_BYTES(2,
                             m_out.get_buffer_start(),
                             m_out.get_buffer_size());

    OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
                  "Total copyout data sent to host: [%lld] bytes\n",
                  m_out.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
}
Ejemplo n.º 2
0
void OffloadDescriptor::scatter_copyin_data()
{
    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);

    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
                        m_in.get_buffer_start(),
                        m_in.get_buffer_size());
    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
                             m_in.get_buffer_size());

    // receive data
    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);
        void** ptr_addr = src_is_for_mic ?
                          static_cast<void**>(m_vars[i].ptr) :
                          static_cast<void**>(m_vars[i].into);
        int type = src_is_for_mic ? m_vars[i].type.src :
                                    m_vars[i].type.dst;
        bool is_static = src_is_for_mic ?
                         m_vars[i].flags.is_static :
                         m_vars[i].flags.is_static_dstn;
        void *ptr = NULL;

        if (m_vars[i].flags.alloc_disp) {
            int64_t offset = 0;
            m_in.receive_data(&offset, sizeof(offset));
            m_vars[i].offset = -offset;
        }
        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
            VAR_TYPE_IS_DV_DATA(type)) {
            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
                  reinterpret_cast<ArrDesc*>(ptr_addr) :
                  *reinterpret_cast<ArrDesc**>(ptr_addr);
            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
        }

        // Set pointer values
        switch (type) {
            case c_data_ptr_array:
                {
                    int j = m_vars[i].ptr_arr_offset;
                    int max_el = j + m_vars[i].count;
                    char *dst_arr_ptr = (src_is_for_mic)?
                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
                        reinterpret_cast<char*>(m_vars[i].into);

                    for (; j < max_el; j++) {
                        if (src_is_for_mic) {
                            m_vars[j].ptr =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                        else {
                            m_vars[j].into =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                    }
                }
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                                  m_vars[i].mic_offset +
                                  (m_vars[i].flags.is_stack_buf ?
                                   0 : m_vars[i].offset);
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    void *ptr = static_cast<char*>(buf) +
                                    m_vars[i].mic_offset +
                                    (m_vars[i].flags.is_stack_buf ?
                                     0 : m_vars[i].offset);
                    *ptr_addr = ptr;
                }
                break;

            case c_func_ptr:
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                            m_vars[i].mic_offset + m_vars[i].offset;
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    ptr = static_cast<char*>(buf) +
                          m_vars[i].mic_offset + m_vars[i].offset;
                    *ptr_addr = ptr;
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
                abort();
        }
        // Release obsolete buffers for stack of persistent objects
        if (type = c_data_ptr &&
            m_vars[i].flags.is_stack_buf &&
            !m_vars[i].direction.bits &&
            m_vars[i].alloc_if &&
            m_vars[i].size != 0) {
                for (int j=0; j < m_vars[i].size; j++) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    BufferReleaseRef(buf);
                    ref_data.erase(buf);
                }
        }
        // Do copyin
        switch (m_vars[i].type.dst) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_cean_var:
                if (m_vars[i].direction.in &&
                    !m_vars[i].flags.is_static_dstn) {
                    int64_t size;
                    int64_t disp;
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    if (m_vars[i].type.dst == c_cean_var) {
                        m_in.receive_data((&size), sizeof(int64_t));
                        m_in.receive_data((&disp), sizeof(int64_t));
                    }
                    else {
                        size = m_vars[i].size;
                        disp = 0;
                    }
                    m_in.receive_data(ptr + disp, size);
                }
                break;

            case c_dv:
                if (m_vars[i].direction.bits ||
                    m_vars[i].alloc_if ||
                    m_vars[i].free_if) {
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    m_in.receive_data(ptr + sizeof(uint64_t),
                                      m_vars[i].size - sizeof(uint64_t));
                }
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_cean_var_ptr:
            case c_dv_ptr:
            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                break;

            case c_func_ptr:
                if (m_vars[i].direction.in) {
                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
                abort();
        }
    }

    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
                  m_in.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);

    OFFLOAD_TIMER_START(c_offload_target_compute);
}
Ejemplo n.º 3
0
void OffloadDescriptor::offload(
    uint32_t  buffer_count,
    void**    buffers,
    void*     misc_data,
    uint16_t  misc_data_len,
    void*     return_data,
    uint16_t  return_data_len
)
{
    FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
    const char *name = func->data;
    OffloadDescriptor ofld;
    char *in_data = 0;
    char *out_data = 0;
    char *timer_data = 0;

    console_enabled = func->console_enabled;
    timer_enabled = func->timer_enabled;
    offload_report_level = func->offload_report_level;
    offload_number = func->offload_number;
    ofld.set_offload_number(func->offload_number);

#ifdef SEP_SUPPORT
    if (sep_monitor) {
        if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
            OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
            VTResumeSampling();
        }
    }
#endif // SEP_SUPPORT

    OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
                          c_offload_start_target_func,
                          "Offload \"%s\" started\n", name);

    // initialize timer data
    OFFLOAD_TIMER_INIT();

    OFFLOAD_TIMER_START(c_offload_target_total_time);

    OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);

    // get input/output buffer addresses
    if (func->in_datalen > 0 || func->out_datalen > 0) {
        if (func->data_offset != 0) {
            in_data = (char*) misc_data + func->data_offset;
            out_data = (char*) return_data;
        }
        else {
            char *inout_buf = (char*) buffers[--buffer_count];
            in_data = inout_buf;
            out_data = inout_buf;
        }
    }

    // assign variable descriptors
    ofld.m_vars_total = func->vars_num;
    if (ofld.m_vars_total > 0) {
        uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);

        ofld.m_vars = (VarDesc*) malloc(var_data_len);
        if (ofld.m_vars == NULL)
          LIBOFFLOAD_ERROR(c_malloc);
        memcpy(ofld.m_vars, in_data, var_data_len);

        in_data += var_data_len;
        func->in_datalen -= var_data_len;
    }

    // timer data
    if (func->timer_enabled) {
        uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();

        timer_data = out_data;
        out_data += timer_data_len;
        func->out_datalen -= timer_data_len;
    }

    // init Marshallers
    ofld.m_in.init_buffer(in_data, func->in_datalen);
    ofld.m_out.init_buffer(out_data, func->out_datalen);

    // copy buffers to offload descriptor
    std::copy(buffers, buffers + buffer_count,
              std::back_inserter(ofld.m_buffers));

    OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);

    // find offload entry address
    OFFLOAD_TIMER_START(c_offload_target_func_lookup);

    offload_func_with_parms entry = (offload_func_with_parms)
        __offload_entries.find_addr(name);

    if (entry == NULL) {
#if OFFLOAD_DEBUG > 0
        if (console_enabled > 2) {
            __offload_entries.dump();
        }
#endif
        LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
        exit(1);
    }

    OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);

    OFFLOAD_TIMER_START(c_offload_target_func_time);

    // execute offload entry
    entry(&ofld);

    OFFLOAD_TIMER_STOP(c_offload_target_func_time);

    OFFLOAD_TIMER_STOP(c_offload_target_total_time);

    // copy timer data to the buffer
    OFFLOAD_TIMER_TARGET_DATA(timer_data);

    OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);

#ifdef SEP_SUPPORT
    if (sep_monitor) {
        if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
            OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
            VTPauseSampling();
        }
    }
#endif // SEP_SUPPORT
}
Ejemplo n.º 4
0
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
    TARGET_TYPE      target_type,
    int              target_number,
    int              is_optional,
    _Offload_status* status,
    const char*      file,
    uint64_t         line
)
{
    bool retval;
    OFFLOAD ofld;

    // initialize status
    if (status != 0) {
        status->result = OFFLOAD_UNAVAILABLE;
        status->device_number = -1;
        status->data_sent = 0;
        status->data_received = 0;
    }

    // make sure libray is initialized
    retval = __offload_init_library();

    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    // initialize all devices is init_type is on_offload_all
    if (retval && __offload_init_type == c_init_on_offload_all) {
        for (int i = 0; i < mic_engines_total; i++) {
             mic_engines[i].init();
        }
    }
    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);

    if (target_type == TARGET_HOST) {
        // Host always available
        retval = true;
    }
    else if (target_type == TARGET_MIC) {
        if (target_number >= -1) {
            if (retval) {
                if (target_number >= 0) {
                    // User provided the device number
                    target_number = target_number % mic_engines_total;
                }
                else {
                    // use device 0
                    target_number = 0;
                }

                // reserve device in ORSL
                if (is_optional) {
                    if (!ORSL::try_reserve(target_number)) {
                        target_number = -1;
                    }
                }
                else {
                    if (!ORSL::reserve(target_number)) {
                        target_number = -1;
                    }
                }

                // initialize device
                if (target_number >= 0 &&
                    __offload_init_type == c_init_on_offload) {
                    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
                    mic_engines[target_number].init();
                    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
                }
            }
            else {
                // fallback to CPU
                target_number = -1;
            }

            if (target_number < 0 || !retval) {
                if (!is_optional && status == 0) {
                    LIBOFFLOAD_ERROR(c_device_is_not_available);
                    exit(1);
                }

                retval = false;
            }
        }
        else {
            LIBOFFLOAD_ERROR(c_invalid_device_number);
            exit(1);
        }
    }

    if (retval) {
        ofld = new OffloadDescriptor(target_number, status,
                                     !is_optional, false, timer_data);
        OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
        Offload_Report_Prolog(timer_data);
        OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
                              "Starting offload: target_type = %d, "
                              "number = %d, is_optional = %d\n",
                              target_type, target_number, is_optional);

        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
    }
    else {
        ofld = NULL;

        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
        offload_report_free_data(timer_data);
    }

    return ofld;
}
Ejemplo n.º 5
0
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
    const int*  device_num,
    const char* file,
    uint64_t    line
)
{
    int target_number;

    // make sure libray is initialized and at least one device is available
    if (!__offload_init_library()) {
        LIBOFFLOAD_ERROR(c_device_is_not_available);
        exit(1);
    }

    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library

    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    if (__offload_init_type == c_init_on_offload_all) {
        for (int i = 0; i < mic_engines_total; i++) {
             mic_engines[i].init();
        }
    }

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);

    // use default device number if it is not provided
    if (device_num != 0) {
        target_number = *device_num;
    }
    else {
        target_number = __omp_device_num;
    }

    // device number should be a non-negative integer value
    if (target_number < 0) {
        LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
        exit(1);
    }

    // should we do this for OpenMP?
    target_number %= mic_engines_total;

    // reserve device in ORSL
    if (!ORSL::reserve(target_number)) {
        LIBOFFLOAD_ERROR(c_device_is_not_available);
        exit(1);
    }

    // initialize device(s)
    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);

    if (__offload_init_type == c_init_on_offload) {
        mic_engines[target_number].init();
    }

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);

    OFFLOAD ofld =
        new OffloadDescriptor(target_number, 0, true, true, timer_data);

    OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);

    Offload_Report_Prolog(timer_data);

    OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
                          "Starting OpenMP offload, device = %d\n",
                          target_number);

    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);

    return ofld;
}
Ejemplo n.º 6
0
void OffloadDescriptor::scatter_copyin_data()
{
    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);

    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
                        m_in.get_buffer_start(),
                        m_in.get_buffer_size());
    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
                             m_in.get_buffer_size());

    // receive data
    for (int i = 0; i < m_vars_total; i++) {
        bool src_is_for_mic = (m_vars[i].direction.out ||
                               m_vars[i].into == NULL);
        void** ptr_addr = src_is_for_mic ?
                          static_cast<void**>(m_vars[i].ptr) :
                          static_cast<void**>(m_vars[i].into);
        int type = src_is_for_mic ? m_vars_extra[i].type_src :
                                    m_vars_extra[i].type_dst;
        bool is_static = src_is_for_mic ?
                         m_vars[i].flags.is_static :
                         m_vars[i].flags.is_static_dstn;
        void *ptr = NULL;

        if (m_vars[i].flags.alloc_disp) {
            int64_t offset = 0;
            m_in.receive_data(&offset, sizeof(offset));
        }
        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
            VAR_TYPE_IS_DV_DATA(type)) {
            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
                  reinterpret_cast<ArrDesc*>(ptr_addr) :
                  *reinterpret_cast<ArrDesc**>(ptr_addr);
            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
        }
        // Set pointer values
        switch (type) {
            case c_data_ptr_array:
                {
                    int j = m_vars[i].ptr_arr_offset;
                    int max_el = j + m_vars[i].count;
                    char *dst_arr_ptr = (src_is_for_mic)?
                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
                        reinterpret_cast<char*>(m_vars[i].into);

                    // if is_pointer is 1 it means that pointer array itself
                    // is defined either via pointer or as class member.
                    // i.e. arr_ptr[0:5] or this->ARR[0:5]
                    if (m_vars[i].flags.is_pointer) {
                        int64_t offset = 0;
                        m_in.receive_data(&offset, sizeof(offset));
                        dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
                    }
                    for (; j < max_el; j++) {
                        if (src_is_for_mic) {
                            m_vars[j].ptr =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                        else {
                            m_vars[j].into =
                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
                        }
                    }
                }
                break;
            case c_data:
            case c_void_ptr:
            case c_void_ptr_ptr:
            case c_cean_var:
            case c_dv:
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_string_ptr_ptr:
            case c_data_ptr_ptr:
            case c_cean_var_ptr:
            case c_cean_var_ptr_ptr:
            case c_dv_ptr:
                // Don't need ptr_addr value for variables from stack buffer.
                // Stack buffer address is set at var_desc with #0.
                if (i != 0 && m_vars[i].flags.is_stack_buf) {
                    break;
                }
                if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
                    TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
                    int64_t offset;

                    m_in.receive_data(&offset, sizeof(offset));
                    ptr_addr = reinterpret_cast<void**>(
                                 reinterpret_cast<char*>(*ptr_addr) + offset);

                }

                if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
                    void *buf = NULL;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                            OFFLOAD_TRACE(1, "    AddRef count = %d\n",
                                              ((RefInfo *) ref_data[buf])->count);
                        }
                        ptr = static_cast<char*>(buf) +
                                  m_vars[i].mic_offset +
                                  (m_vars[i].flags.is_stack_buf ?
                                   0 : m_vars[i].offset);

                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    void *ptr = static_cast<char*>(buf) +
                                    m_vars[i].mic_offset +
                                    (m_vars[i].flags.is_stack_buf ?
                                     0 : m_vars[i].offset);
                    *ptr_addr = ptr;
                }
                break;

            case c_func_ptr:
            case c_func_ptr_ptr:
                break;

            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                if (m_vars[i].alloc_if) {
                    void *buf;
                    if (m_vars[i].flags.sink_addr) {
                        m_in.receive_data(&buf, sizeof(buf));
                    }
                    else {
                        buf = m_buffers.front();
                        m_buffers.pop_front();
                    }
                    if (buf) {
                        if (!is_static) {
                            if (!m_vars[i].flags.sink_addr) {
                                // increment buffer reference
                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
                                BufferAddRef(buf);
                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
                            }
                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
                        }
                        ptr = static_cast<char*>(buf) +
                            m_vars[i].mic_offset + m_vars[i].offset;
                    }
                    *ptr_addr = ptr;
                }
                else if (m_vars[i].flags.sink_addr) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    ptr = static_cast<char*>(buf) +
                          m_vars[i].mic_offset + m_vars[i].offset;
                    *ptr_addr = ptr;
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
                abort();
        }
        // Release obsolete buffers for stack of persistent objects.
        // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
        // stack buffer pointer.
        if (i == 0 &&
            m_vars[i].flags.is_stack_buf &&
            !m_vars[i].direction.bits &&
            m_vars[i].alloc_if &&
            m_vars[i].size != 0) {
                for (int j=0; j < m_vars[i].size; j++) {
                    void *buf;
                    m_in.receive_data(&buf, sizeof(buf));
                    OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
                    BufferReleaseRef(buf);
                    ref_data.erase(buf);
                }
        }
        // Do copyin
        switch (m_vars_extra[i].type_dst) {
            case c_data_ptr_array:
                break;
            case c_data:
            case c_void_ptr:
            case c_void_ptr_ptr:
            case c_cean_var:
                if (m_vars[i].direction.in &&
                    !m_vars[i].flags.is_static_dstn) {
                    int64_t size;
                    int64_t disp;
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    if (m_vars_extra[i].type_dst == c_cean_var) {
                        m_in.receive_data((&size), sizeof(int64_t));
                        m_in.receive_data((&disp), sizeof(int64_t));
                    }
                    else {
                        size = m_vars[i].size;
                        disp = 0;
                    }
                    m_in.receive_data(ptr + disp, size);
                }
                break;

            case c_dv:
                if (m_vars[i].direction.bits ||
                    m_vars[i].alloc_if ||
                    m_vars[i].free_if) {
                    char* ptr = m_vars[i].into ?
                                 static_cast<char*>(m_vars[i].into) :
                                 static_cast<char*>(m_vars[i].ptr);
                    m_in.receive_data(ptr + sizeof(uint64_t),
                                      m_vars[i].size - sizeof(uint64_t));
                }
                break;

            case c_string_ptr:
            case c_data_ptr:
            case c_string_ptr_ptr:
            case c_data_ptr_ptr:
            case c_cean_var_ptr:
            case c_cean_var_ptr_ptr:
            case c_dv_ptr:
            case c_dv_data:
            case c_dv_ptr_data:
            case c_dv_data_slice:
            case c_dv_ptr_data_slice:
                break;

            case c_func_ptr:
            case c_func_ptr_ptr:
                if (m_vars[i].direction.in) {
                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
                }
                break;

            default:
                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
                abort();
        }
    }

    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
                  m_in.get_tfr_size());

    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);

    OFFLOAD_TIMER_START(c_offload_target_compute);
}