hsa_status_t HSA_API
hsa_amd_profiling_get_dispatch_time(hsa_agent_t agent_handle,
                                    hsa_signal_t hsa_signal,
                                    hsa_amd_profiling_dispatch_time_t* time) {
  IS_OPEN();

  IS_BAD_PTR(time);

  core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  core::Signal* signal = core::Signal::Convert(hsa_signal);

  IS_VALID(signal);

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  amd::GpuAgentInt* gpu_agent = static_cast<amd::GpuAgentInt*>(agent);

  gpu_agent->TranslateTime(signal, *time);

  return HSA_STATUS_SUCCESS;
}
hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
                                               uint32_t num_cu_mask_count,
                                               const uint32_t* cu_mask) {
  IS_OPEN();
  IS_BAD_PTR(cu_mask);

  core::Queue* cmd_queue = core::Queue::Convert(queue);
  IS_VALID(cmd_queue);
  return cmd_queue->SetCUMasking(num_cu_mask_count, cu_mask);
}
hsa_status_t HSA_API
hsa_amd_signal_async_handler(hsa_signal_t hsa_signal,
                             hsa_signal_condition_t cond,
                             hsa_signal_value_t value,
                             hsa_amd_signal_handler handler, void* arg) {
  IS_OPEN();

  core::Signal* signal = core::Signal::Convert(hsa_signal);
  IS_VALID(signal);
  IS_BAD_PTR(handler);

  return core::Runtime::runtime_singleton_->SetAsyncSignalHandler(
      hsa_signal, cond, value, handler, arg);
}
hsa_status_t HSA_API hsa_ext_get_memory_type(hsa_agent_t agent_handle,
                                             hsa_amd_memory_type_t* type) {
  const core::Agent* agent = core::Agent::Convert(agent_handle);

  IS_VALID(agent);

  IS_BAD_PTR(type);

  if (agent->device_type() != core::Agent::kAmdGpuDevice) {
    return HSA_STATUS_ERROR_INVALID_AGENT;
  }

  const amd::GpuAgentInt* gpu_agent =
      static_cast<const amd::GpuAgentInt*>(agent);

  *type = gpu_agent->memory_type();

  return HSA_STATUS_SUCCESS;
}
Exemple #5
0
static int slp_io_start(const struct lu_env *env, const struct cl_io_slice *ios)
{
        struct ccc_io     *cio   = cl2ccc_io(env, ios);
        struct cl_io      *io    = ios->cis_io;
        struct cl_object  *obj   = io->ci_obj;
        struct inode      *inode = ccc_object_inode(obj);
        int    err, ret;
        loff_t pos;
        long   cnt;
        struct llu_io_group *iogroup;
        struct lustre_rw_params p = {0};
        int iovidx;
        struct intnl_stat *st = llu_i2stat(inode);
        struct llu_inode_info *lli = llu_i2info(inode);
        struct llu_io_session *session = cl2slp_io(env, ios)->sio_session;
        int write = io->ci_type == CIT_WRITE;
        int exceed = 0;

        CLOBINVRNT(env, obj, ccc_object_invariant(obj));

        if (write) {
                pos = io->u.ci_wr.wr.crw_pos;
                cnt = io->u.ci_wr.wr.crw_count;
        } else {
                pos = io->u.ci_rd.rd.crw_pos;
                cnt = io->u.ci_rd.rd.crw_count;
        }
        if (io->u.ci_wr.wr_append) {
                p.lrp_lock_mode = LCK_PW;
        } else {
                p.lrp_brw_flags = OBD_BRW_SRVLOCK;
                p.lrp_lock_mode = LCK_NL;
        }

        iogroup = get_io_group(inode, max_io_pages(cnt, cio->cui_nrsegs), &p);
        if (IS_ERR(iogroup))
                RETURN(PTR_ERR(iogroup));

        err = ccc_prep_size(env, obj, io, pos, cnt, &exceed);
        if (err != 0 || (write == 0 && exceed != 0))
                GOTO(out, err);

        CDEBUG(D_INODE,
               "%s ino %lu, %lu bytes, offset "LPU64", i_size "LPU64"\n",
               write ? "Write" : "Read", (unsigned long)st->st_ino,
               cnt, (__u64)pos, (__u64)st->st_size);

        if (write && io->u.ci_wr.wr_append)
                pos = io->u.ci_wr.wr.crw_pos = st->st_size; /* XXX? Do we need to change io content too here? */
                /* XXX What about if one write syscall writes at 2 different offsets? */

        for (iovidx = 0; iovidx < cio->cui_nrsegs; iovidx++) {
                char *buf = (char *) cio->cui_iov[iovidx].iov_base;
                long count = cio->cui_iov[iovidx].iov_len;

                if (!count)
                        continue;
                if (cnt < count)
                        count = cnt;
                if (IS_BAD_PTR(buf) || IS_BAD_PTR(buf + count)) {
                        GOTO(out, err = -EFAULT);
                }

                if (io->ci_type == CIT_READ) {
                        if (/* local_lock && */ pos >= st->st_size)
                                break;
                } else if (io->ci_type == CIT_WRITE) {
                        if (pos >= lli->lli_maxbytes) {
                                GOTO(out, err = -EFBIG);
                        }
                        if (pos + count >= lli->lli_maxbytes)
                                count = lli->lli_maxbytes - pos;
                } else {
                        LBUG();
                }

                ret = llu_queue_pio(env, io, iogroup, buf, count, pos);
                if (ret < 0) {
                        GOTO(out, err = ret);
                } else {
                        io->ci_nob += ret;
                        pos += ret;
                        cnt -= ret;
                        if (io->ci_type == CIT_WRITE) {
//                                obd_adjust_kms(exp, lsm, pos, 0); // XXX
                                if (pos > st->st_size)
                                        st->st_size = pos;
                        }
                        if (!cnt)
                                break;
                }
        }
        LASSERT(cnt == 0 || io->ci_type == CIT_READ); /* libsysio should guarantee this */

        if (!iogroup->lig_rc)
                session->lis_rwcount += iogroup->lig_rwcount;
        else if (!session->lis_rc)
                session->lis_rc = iogroup->lig_rc;
        err = 0;

out:
        put_io_group(iogroup);
        return err;
}