/* * Reload the blade's kernel context into a GRU chiplet. Called holding * the bs_kgts_sema for READ. Will steal user contexts if necessary. */ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) { struct gru_state *gru; struct gru_thread_state *kgts; void *vaddr; int ctxnum, ncpus; up_read(&bs->bs_kgts_sema); down_write(&bs->bs_kgts_sema); if (!bs->bs_kgts) { do { bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); if (!IS_ERR(bs->bs_kgts)) break; msleep(1); } while (true); bs->bs_kgts->ts_user_blade_id = blade_id; } kgts = bs->bs_kgts; if (!kgts->ts_gru) { STAT(load_kernel_context); ncpus = uv_blade_nr_possible_cpus(blade_id); kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( GRU_NUM_KERNEL_DSR_BYTES * ncpus + bs->bs_async_dsr_bytes); while (!gru_assign_gru_context(kgts)) { msleep(1); gru_steal_context(kgts); } gru_load_context(kgts); gru = bs->bs_kgts->ts_gru; vaddr = gru->gs_gru_base_vaddr; ctxnum = kgts->ts_ctxnum; bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); } downgrade_write(&bs->bs_kgts_sema); }
void ib_umem_odp_release(struct ib_umem *umem) { struct ib_ucontext *context = umem->context; /* * Ensure that no more pages are mapped in the umem. * * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), ib_umem_end(umem)); down_write(&context->umem_mutex); if (likely(ib_umem_start(umem) != ib_umem_end(umem))) rbt_ib_umem_remove(&umem->odp_data->interval_tree, &context->umem_tree); context->odp_mrs_count--; /* * Downgrade the lock to a read lock. This ensures that the notifiers * (who lock the mutex for reading) will be able to finish, and we * will be able to enventually obtain the mmu notifiers SRCU. Note * that since we are doing it atomically, no other user could register * and unregister while we do the check. */ downgrade_write(&context->umem_mutex); if (!context->odp_mrs_count) { struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = NULL; owning_process = get_pid_task(context->tgid, PIDTYPE_PID); if (owning_process == NULL) /* * The process is already dead, notifier were removed * already. */ goto out; owning_mm = get_task_mm(owning_process); if (owning_mm == NULL) /* * The process' mm is already dead, notifier were * removed already. */ goto out_put_task; mmu_notifier_unregister(&context->mn, owning_mm); mmput(owning_mm); out_put_task: put_task_struct(owning_process); } out: up_read(&context->umem_mutex); vfree(umem->odp_data->dma_list); vfree(umem->odp_data->page_list); kfree(umem->odp_data); kfree(umem); }
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) { int ret_val; struct pid *our_pid; struct mm_struct *mm = get_task_mm(current); BUG_ON(!mm); /* Prevent creating ODP MRs in child processes */ rcu_read_lock(); our_pid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); put_pid(our_pid); if (context->tgid != our_pid) { ret_val = -EINVAL; goto out_mm; } umem->hugetlb = 0; umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); if (!umem->odp_data) { ret_val = -ENOMEM; goto out_mm; } umem->odp_data->umem = umem; mutex_init(&umem->odp_data->umem_mutex); init_completion(&umem->odp_data->notifier_completion); umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->page_list)); if (!umem->odp_data->page_list) { ret_val = -ENOMEM; goto out_odp_data; } umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->dma_list)); if (!umem->odp_data->dma_list) { ret_val = -ENOMEM; goto out_page_list; } /* * When using MMU notifiers, we will get a * notification before the "current" task (and MM) is * destroyed. We use the umem_mutex lock to synchronize. */ down_write(&context->umem_mutex); context->odp_mrs_count++; if (likely(ib_umem_start(umem) != ib_umem_end(umem))) rbt_ib_umem_insert(&umem->odp_data->interval_tree, &context->umem_tree); downgrade_write(&context->umem_mutex); if (context->odp_mrs_count == 1) { /* * Note that at this point, no MMU notifier is running * for this context! */ INIT_HLIST_NODE(&context->mn.hlist); context->mn.ops = &ib_umem_notifiers; /* * Lock-dep detects a false positive for mmap_sem vs. * umem_mutex, due to not grasping downgrade_write correctly. */ lockdep_off(); ret_val = mmu_notifier_register(&context->mn, mm); lockdep_on(); if (ret_val) { pr_err("Failed to register mmu_notifier %d\n", ret_val); ret_val = -EBUSY; goto out_mutex; } } up_read(&context->umem_mutex); /* * Note that doing an mmput can cause a notifier for the relevant mm. * If the notifier is called while we hold the umem_mutex, this will * cause a deadlock. Therefore, we release the reference only after we * released the mutex. */ mmput(mm); return 0; out_mutex: up_read(&context->umem_mutex); vfree(umem->odp_data->dma_list); out_page_list: vfree(umem->odp_data->page_list); out_odp_data: kfree(umem->odp_data); out_mm: mmput(mm); return ret_val; }
static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; int itype; int rv; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; type = (enum clear_refs_types)itype; if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; if (type == CLEAR_REFS_SOFT_DIRTY) { soft_dirty_cleared = true; pr_warn_once("The pagemap bits 55-60 has changed their meaning!" " See the linux/Documentation/vm/pagemap.txt for " "details.\n"); } task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { struct clear_refs_private cp = { .type = type, }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, .test_walk = clear_refs_test_walk, .mm = mm, .private = &cp, }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ down_write(&mm->mmap_sem); reset_mm_hiwater_rss(mm); up_write(&mm->mmap_sem); goto out_mm; } down_read(&mm->mmap_sem); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); } downgrade_write(&mm->mmap_sem); break; } mmu_notifier_invalidate_range_start(mm, 0, -1); } walk_page_range(0, ~0UL, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); up_read(&mm->mmap_sem); out_mm: mmput(mm); } put_task_struct(task); return count; }