static int kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ args->gpu_clock_counter = dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic64(&time); args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); get_monotonic_boottime64(&time); args->system_clock_counter = (uint64_t)timespec64_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ args->system_clock_freq = 1000000000; return 0; }
static int kfd_ioctl_set_scratch_backing_va(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_set_scratch_backing_va_args *args = data; struct kfd_process_device *pdd; struct kfd_dev *dev; long err; dev = kfd_device_by_id(args->gpu_id); if (!dev) return -EINVAL; mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = PTR_ERR(pdd); goto bind_process_to_device_fail; } pdd->qpd.sh_hidden_private_base = args->va_addr; mutex_unlock(&p->mutex); if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); return 0; bind_process_to_device_fail: mutex_unlock(&p->mutex); return err; }
static int kfd_ioctl_dbg_unrgesiter(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_unregister_args *args = data; struct kfd_dev *dev; long status; dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n"); return -EINVAL; } mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_unregister(dev->dbgmgr, p); if (status == 0) { kfd_dbgmgr_destroy(dev->dbgmgr); dev->dbgmgr = NULL; } mutex_unlock(kfd_get_dbgmgr_mutex()); return status; }
static int kfd_ioctl_set_trap_handler(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_set_trap_handler_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = -ESRCH; goto out; } if (dev->dqm->ops.set_trap_handler(dev->dqm, &pdd->qpd, args->tba_addr, args->tma_addr)) err = -EINVAL; out: mutex_unlock(&p->mutex); return err; }
static long kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __user *arg) { struct kfd_ioctl_get_clock_counters_args args; struct kfd_dev *dev; struct timespec time; if (copy_from_user(&args, arg, sizeof(args))) return -EFAULT; dev = kfd_device_by_id(args.gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic(&time); args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); get_monotonic_boottime(&time); args.system_clock_counter = (uint64_t)timespec_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ args.system_clock_freq = 1000000000; if (copy_to_user(arg, &args, sizeof(args))) return -EFAULT; return 0; }
static long kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg) { struct kfd_ioctl_set_memory_policy_args args; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; if (copy_from_user(&args, arg, sizeof(args))) return -EFAULT; if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } dev = kfd_device_by_id(args.gpu_id); if (dev == NULL) return -EINVAL; mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = PTR_ERR(pdd); goto out; } default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, (void __user *)args.alternate_aperture_base, args.alternate_aperture_size)) err = -EINVAL; out: mutex_unlock(&p->mutex); return err; }
static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_register_args *args = data; struct kfd_dev *dev; struct kfd_dbgmgr *dbgmgr_ptr; struct kfd_process_device *pdd; bool create_ok; long status = 0; dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); return -EINVAL; } mutex_lock(kfd_get_dbgmgr_mutex()); mutex_lock(&p->mutex); /* * make sure that we have pdd, if this the first queue created for * this process */ pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); return PTR_ERR(pdd); } if (dev->dbgmgr == NULL) { /* In case of a legal call, we have no dbgmgr yet */ create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); if (create_ok) { status = kfd_dbgmgr_register(dbgmgr_ptr, p); if (status != 0) kfd_dbgmgr_destroy(dbgmgr_ptr); else dev->dbgmgr = dbgmgr_ptr; } } else { pr_debug("debugger already registered\n"); status = -EINVAL; } mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); return status; }
static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_set_memory_policy_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } dev = kfd_device_by_id(args->gpu_id); if (!dev) return -EINVAL; mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = -ESRCH; goto out; } default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, (void __user *)args->alternate_aperture_base, args->alternate_aperture_size)) err = -EINVAL; out: mutex_unlock(&p->mutex); return err; }
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; struct kfd_dev *dev; /* * For simplicitly we only allow mapping of the entire doorbell * allocation of a single device & process. */ if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) return -EINVAL; /* Find kfd device according to gpu id */ dev = kfd_device_by_id(vma->vm_pgoff); if (dev == NULL) return -EINVAL; /* Find if pdd exists for combination of process and gpu id */ if (!kfd_get_process_device_data(dev, process, 0)) return -EINVAL; /* Calculate physical address of doorbell */ address = kfd_get_process_doorbells(dev, process); vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", (unsigned long long) vma->vm_start, address, vma->vm_flags, doorbell_process_allocation()); return io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, doorbell_process_allocation(), vma->vm_page_prot); }
static int kfd_ioctl_get_tile_config(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_get_tile_config_args *args = data; struct kfd_dev *dev; struct tile_config config; int err = 0; dev = kfd_device_by_id(args->gpu_id); if (!dev) return -EINVAL; dev->kfd2kgd->get_tile_config(dev->kgd, &config); args->gb_addr_config = config.gb_addr_config; args->num_banks = config.num_banks; args->num_ranks = config.num_ranks; if (args->num_tile_configs > config.num_tile_configs) args->num_tile_configs = config.num_tile_configs; err = copy_to_user((void __user *)args->tile_config_ptr, config.tile_config_ptr, args->num_tile_configs * sizeof(uint32_t)); if (err) { args->num_tile_configs = 0; return -EFAULT; } if (args->num_macro_tile_configs > config.num_macro_tile_configs) args->num_macro_tile_configs = config.num_macro_tile_configs; err = copy_to_user((void __user *)args->macro_tile_config_ptr, config.macro_tile_config_ptr, args->num_macro_tile_configs * sizeof(uint32_t)); if (err) { args->num_macro_tile_configs = 0; return -EFAULT; } return 0; }
/* Parse and generate fixed size data structure for wave control */ static int kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_wave_control_args *args = data; struct kfd_dev *dev; struct dbg_wave_control_info wac_info; unsigned char *args_buff; uint32_t computed_buff_size; long status; void __user *cmd_from_user; unsigned int args_idx = 0; memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); /* we use compact form, independent of the packing attribute value */ computed_buff_size = sizeof(*args) + sizeof(wac_info.mode) + sizeof(wac_info.operand) + sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + sizeof(wac_info.dbgWave_msg.MemoryVA) + sizeof(wac_info.trapId); dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } /* input size must match the computed "compact" size */ if (args->buf_size_in_bytes != computed_buff_size) { pr_debug("size mismatch, computed : actual %u : %u\n", args->buf_size_in_bytes, computed_buff_size); return -EINVAL; } cmd_from_user = (void __user *) args->content_ptr; if (cmd_from_user == NULL) return -EINVAL; /* copy the entire buffer from user */ args_buff = memdup_user(cmd_from_user, args->buf_size_in_bytes - sizeof(*args)); if (IS_ERR(args_buff)) return PTR_ERR(args_buff); /* move ptr to the start of the "pay-load" area */ wac_info.process = p; wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.operand); wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.mode); wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.trapId); wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = *((uint32_t *)(&args_buff[args_idx])); wac_info.dbgWave_msg.MemoryVA = NULL; mutex_lock(kfd_get_dbgmgr_mutex()); pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", wac_info.process, wac_info.operand, wac_info.mode, wac_info.trapId, wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); pr_debug("Returned status of dbg manager is %ld\n", status); mutex_unlock(kfd_get_dbgmgr_mutex()); kfree(args_buff); return status; }
/* * Parse and generate variable size data structure for address watch. * Total size of the buffer and # watch points is limited in order * to prevent kernel abuse. (no bearing to the much smaller HW limitation * which is enforced by dbgdev module) * please also note that the watch address itself are not "copied from user", * since it be set into the HW in user mode values. * */ static int kfd_ioctl_dbg_address_watch(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_address_watch_args *args = data; struct kfd_dev *dev; struct dbg_address_watch_info aw_info; unsigned char *args_buff; long status; void __user *cmd_from_user; uint64_t watch_mask_value = 0; unsigned int args_idx = 0; memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); return -EINVAL; } cmd_from_user = (void __user *) args->content_ptr; /* Validate arguments */ if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || (cmd_from_user == NULL)) return -EINVAL; /* this is the actual buffer to work with */ args_buff = memdup_user(args_buff, args->buf_size_in_bytes - sizeof(*args)); if (IS_ERR(args_buff)) return PTR_ERR(args_buff); aw_info.process = p; aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); args_idx += sizeof(aw_info.num_watch_points); aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; /* * set watch address base pointer to point on the array base * within args_buff */ aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; /* skip over the addresses buffer */ args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { kfree(args_buff); return -EINVAL; } watch_mask_value = (uint64_t) args_buff[args_idx]; if (watch_mask_value > 0) { /* * There is an array of masks. * set watch mask base pointer to point on the array base * within args_buff */ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; /* skip over the masks buffer */ args_idx += sizeof(aw_info.watch_mask) * aw_info.num_watch_points; } else { /* just the NULL mask, set to NULL and skip over it */ aw_info.watch_mask = NULL; args_idx += sizeof(aw_info.watch_mask); } if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { kfree(args_buff); return -EINVAL; } /* Currently HSA Event is not supported for DBG */ aw_info.watch_event = NULL; mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); mutex_unlock(kfd_get_dbgmgr_mutex()); kfree(args_buff); return status; }
static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_create_queue_args *args = data; struct kfd_dev *dev; int err = 0; unsigned int queue_id; struct kfd_process_device *pdd; struct queue_properties q_properties; memset(&q_properties, 0, sizeof(struct queue_properties)); pr_debug("kfd: creating queue ioctl\n"); err = set_queue_properties_from_user(&q_properties, args); if (err) return err; pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) { pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); return -EINVAL; } mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = -ESRCH; goto err_bind_process; } pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", p->pasid, dev->id); err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, q_properties.type, &queue_id); if (err != 0) goto err_create_queue; args->queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; mutex_unlock(&p->mutex); pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); pr_debug("ring buffer address == 0x%016llX\n", args->ring_base_address); pr_debug("read ptr address == 0x%016llX\n", args->read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", args->write_pointer_address); return 0; err_create_queue: err_bind_process: mutex_unlock(&p->mutex); return err; }
static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *arg) { struct kfd_ioctl_create_queue_args args; struct kfd_dev *dev; int err = 0; unsigned int queue_id; struct kfd_process_device *pdd; struct queue_properties q_properties; memset(&q_properties, 0, sizeof(struct queue_properties)); if (copy_from_user(&args, arg, sizeof(args))) return -EFAULT; pr_debug("kfd: creating queue ioctl\n"); err = set_queue_properties_from_user(&q_properties, &args); if (err) return err; dev = kfd_device_by_id(args.gpu_id); if (dev == NULL) return -EINVAL; mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { err = PTR_ERR(pdd); goto err_bind_process; } pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", p->pasid, dev->id); err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, KFD_QUEUE_TYPE_COMPUTE, &queue_id); if (err != 0) goto err_create_queue; args.queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ args.doorbell_offset = args.gpu_id << PAGE_SHIFT; if (copy_to_user(arg, &args, sizeof(args))) { err = -EFAULT; goto err_copy_args_out; } mutex_unlock(&p->mutex); pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); pr_debug("ring buffer address == 0x%016llX\n", args.ring_base_address); pr_debug("read ptr address == 0x%016llX\n", args.read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", args.write_pointer_address); return 0; err_copy_args_out: pqm_destroy_queue(&p->pqm, queue_id); err_create_queue: err_bind_process: mutex_unlock(&p->mutex); return err; }