/* * Release an access permit for a XPMEM address segment. */ int xpmem_release(xpmem_apid_t apid) { struct xpmem_thread_group *ap_tg; struct xpmem_access_permit *ap; if (apid <= 0) return -EINVAL; ap_tg = xpmem_tg_ref_by_apid(apid); if (IS_ERR(ap_tg)) return PTR_ERR(ap_tg); if (current->aspace->id != ap_tg->gid) { xpmem_tg_deref(ap_tg); return -EACCES; } ap = xpmem_ap_ref_by_apid(ap_tg, apid); if (IS_ERR(ap)) { xpmem_tg_deref(ap_tg); return PTR_ERR(ap); } BUG_ON(ap->tg != ap_tg); xpmem_release_ap(ap_tg, ap); xpmem_ap_deref(ap); xpmem_tg_deref(ap_tg); return 0; }
static unsigned int signal_poll(struct file * filp, struct poll_table_struct * poll) { struct xpmem_thread_group * seg_tg; struct xpmem_segment * seg; xpmem_segid_t segid; unsigned long irqs; unsigned int mask = 0; segid = (xpmem_segid_t)filp->private_data; seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } poll_wait(filp, &(seg->signalled_wq), poll); irqs = atomic_read(&(seg->irq_count)); if (irqs > 0) mask = POLLIN | POLLRDNORM; xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); return mask; }
/* * Send a signal to segment associated with access permit */ int xpmem_signal(xpmem_apid_t apid) { struct xpmem_thread_group * ap_tg, * seg_tg; struct xpmem_access_permit * ap; struct xpmem_segment * seg; int ret; if (apid <= 0) return -EINVAL; ap_tg = xpmem_tg_ref_by_apid(apid); if (IS_ERR(ap_tg)) return PTR_ERR(ap_tg); ap = xpmem_ap_ref_by_apid(ap_tg, apid); if (IS_ERR(ap)) { xpmem_tg_deref(ap_tg); return PTR_ERR(ap); } seg = ap->seg; xpmem_seg_ref(seg); seg_tg = seg->tg; xpmem_tg_ref(seg_tg); xpmem_seg_down(seg); if (!(seg->flags & XPMEM_FLAG_SIGNALLABLE)) { ret = -EACCES; goto out; } /* Send signal */ if (seg->flags & XPMEM_FLAG_SHADOW) { /* Shadow segment */ ret = xpmem_irq_deliver( seg->segid, *((xpmem_sigid_t *)&(seg->sig)), seg->domid); } else { /* Local segment */ xpmem_seg_signal(seg); } ret = 0; out: xpmem_seg_up(seg); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); xpmem_ap_deref(ap); xpmem_tg_deref(ap_tg); return ret; }
/* * The following function gets called whenever a thread group that has opened * /dev/xpmem closes it. */ static int xpmem_flush(struct file *file, fl_owner_t owner) { struct xpmem_thread_group *tg; int index; tg = xpmem_tg_ref_by_tgid(current->tgid); if (IS_ERR(tg)) { /* * xpmem_flush() can get called twice for thread groups * which inherited /dev/xpmem: once for the inherited fd, * once for the first explicit use of /dev/xpmem. If we * don't find the tg via xpmem_tg_ref_by_tgid() we assume we * are in this type of scenario and return silently. */ return 0; } spin_lock(&tg->lock); if (tg->flags & XPMEM_FLAG_DESTROYING) { spin_unlock(&tg->lock); xpmem_tg_deref(tg); return -EALREADY; } tg->flags |= XPMEM_FLAG_DESTROYING; spin_unlock(&tg->lock); xpmem_release_aps_of_tg(tg); xpmem_remove_segs_of_tg(tg); xpmem_mmu_notifier_unlink(tg); /* * At this point, XPMEM no longer needs to reference the thread group * leader's task_struct or mm_struct. Decrement its 'usage' and * 'mm->mm_users' to account for the extra increments previously done * in xpmem_open(). */ mmput(tg->mm); put_task_struct(tg->group_leader); /* Remove tg structure from its hash list */ index = xpmem_tg_hashtable_index(tg->tgid); write_lock(&xpmem_my_part->tg_hashtable[index].lock); list_del_init(&tg->tg_hashlist); write_unlock(&xpmem_my_part->tg_hashtable[index].lock); xpmem_tg_destroyable(tg); xpmem_tg_deref(tg); return 0; }
/* * Detach an attached XPMEM address segment. */ int xpmem_detach(vaddr_t at_vaddr) { struct xpmem_thread_group *tg; struct xpmem_access_permit *ap; struct xpmem_attachment *att; tg = xpmem_tg_ref_by_gid(current->aspace->id); if (IS_ERR(tg)) return PTR_ERR(tg); att = xpmem_att_ref_by_vaddr(tg, at_vaddr); if (IS_ERR(att)) { xpmem_tg_deref(tg); return PTR_ERR(att); } mutex_lock(&att->mutex); if (att->flags & XPMEM_FLAG_DESTROYING) { mutex_unlock(&att->mutex); xpmem_att_deref(att); xpmem_tg_deref(tg); return 0; } att->flags |= XPMEM_FLAG_DESTROYING; ap = att->ap; xpmem_ap_ref(ap); if (current->aspace->id != ap->tg->gid) { att->flags &= ~XPMEM_FLAG_DESTROYING; xpmem_ap_deref(ap); mutex_unlock(&att->mutex); xpmem_att_deref(att); return -EACCES; } __xpmem_detach_att(ap, att); mutex_unlock(&att->mutex); xpmem_att_destroyable(att); xpmem_ap_deref(ap); xpmem_att_deref(att); xpmem_tg_deref(tg); return 0; }
static ssize_t signal_write(struct file * filp, const char __user * buffer, size_t length, loff_t * offset) { struct xpmem_thread_group * seg_tg; struct xpmem_segment * seg; xpmem_segid_t segid; unsigned long irqs; unsigned long acks; int status; if (length != sizeof(unsigned long)) return -EINVAL; status = copy_from_user(&acks, buffer, sizeof(unsigned long)); if (status) return status; segid = (xpmem_segid_t)filp->private_data; seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } irqs = atomic_read(&(seg->irq_count)); if (acks > irqs) acks = irqs; atomic_sub(acks, &(seg->irq_count)); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); return length; }
static ssize_t signal_read(struct file * filp, char __user * buffer, size_t length, loff_t * offset) { struct xpmem_thread_group * seg_tg; struct xpmem_segment * seg; xpmem_segid_t segid; unsigned long irqs; int err; if (length != sizeof(unsigned long)) return -EINVAL; segid = (xpmem_segid_t)filp->private_data; seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } /* Only ack if there are pending notifications */ err = (atomic_add_unless(&(seg->irq_count), -1, 0) == 0); irqs = atomic_read(&(seg->irq_count)); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); if (err) return -ENODEV; if (copy_to_user(buffer, &irqs, sizeof(unsigned long))) return -EFAULT; return length; }
static ssize_t signal_read(struct file * filp, char __user * buffer, size_t length, loff_t * offset) { struct xpmem_thread_group * seg_tg; struct xpmem_segment * seg; xpmem_segid_t segid; unsigned long irqs; if (length != sizeof(unsigned long)) return -EINVAL; segid = (xpmem_segid_t)filp->private_data; seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } wait_event_interruptible(seg->signalled_wq, (atomic_read(&(seg->irq_count)) > 0) ); irqs = atomic_read(&(seg->irq_count)); atomic_set(&(seg->irq_count), 0); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); if (copy_to_user(buffer, &irqs, sizeof(unsigned long))) return -EFAULT; return length; }
int xpmem_segid_signal(xpmem_segid_t segid) { struct xpmem_thread_group * seg_tg; struct xpmem_segment * seg; seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } xpmem_seg_signal(seg); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); return 0; }
/* * Attach a XPMEM address segment. */ int xpmem_attach(xpmem_apid_t apid, off_t offset, size_t size, int att_flags, vaddr_t * at_vaddr_p) { int ret, index; vaddr_t seg_vaddr, at_vaddr; struct xpmem_thread_group *ap_tg, *seg_tg; struct xpmem_access_permit *ap; struct xpmem_segment *seg; struct xpmem_attachment *att; if (apid <= 0) return -EINVAL; /* If the size is not page aligned, fix it */ if (offset_in_page(size) != 0) size += PAGE_SIZE - offset_in_page(size); ap_tg = xpmem_tg_ref_by_apid(apid); if (IS_ERR(ap_tg)) return PTR_ERR(ap_tg); ap = xpmem_ap_ref_by_apid(ap_tg, apid); if (IS_ERR(ap)) { xpmem_tg_deref(ap_tg); return PTR_ERR(ap); } seg = ap->seg; xpmem_seg_ref(seg); seg_tg = seg->tg; xpmem_tg_ref(seg_tg); xpmem_seg_down(seg); ret = xpmem_validate_access(ap_tg, ap, offset, size, XPMEM_RDWR, &seg_vaddr); if (ret != 0) goto out_1; /* size needs to reflect page offset to start of segment */ size += offset_in_page(seg_vaddr); if (seg->flags & XPMEM_FLAG_SHADOW) { BUG_ON(seg->remote_apid <= 0); /* remote - load pfns in now */ ret = xpmem_try_attach_remote(seg->segid, seg->remote_apid, offset, size, &at_vaddr); if (ret != 0) goto out_1; } else { /* not remote - simply figure out where we are smartmapped to this process */ at_vaddr = xpmem_make_smartmap_addr(seg_tg->aspace->id, seg_vaddr); } /* create new attach structure */ att = kmem_alloc(sizeof(struct xpmem_attachment)); if (att == NULL) { ret = -ENOMEM; goto out_1; } mutex_init(&att->mutex); att->vaddr = seg_vaddr; att->at_vaddr = at_vaddr; att->at_size = size; att->ap = ap; att->flags = 0; INIT_LIST_HEAD(&att->att_node); xpmem_att_not_destroyable(att); xpmem_att_ref(att); /* * The attach point where we mapped the portion of the segment the * user was interested in is page aligned. But the start of the portion * of the segment may not be, so we adjust the address returned to the * user by that page offset difference so that what they see is what * they expected to see. */ *at_vaddr_p = at_vaddr + offset_in_page(att->vaddr); /* link attach structure to its access permit's att list */ spin_lock(&ap->lock); if (ap->flags & XPMEM_FLAG_DESTROYING) { spin_unlock(&ap->lock); ret = -ENOENT; goto out_2; } list_add_tail(&att->att_node, &ap->att_list); /* add att to its ap_tg's hash list */ index = xpmem_att_hashtable_index(att->at_vaddr); write_lock(&ap_tg->att_hashtable[index].lock); list_add_tail(&att->att_hashnode, &ap_tg->att_hashtable[index].list); write_unlock(&ap_tg->att_hashtable[index].lock); spin_unlock(&ap->lock); ret = 0; out_2: if (ret != 0) { att->flags |= XPMEM_FLAG_DESTROYING; xpmem_att_destroyable(att); } xpmem_att_deref(att); out_1: xpmem_seg_up(seg); xpmem_ap_deref(ap); xpmem_tg_deref(ap_tg); xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); return ret; }
/* * Release an access permit and detach all associated attaches. */ void xpmem_release_ap(struct xpmem_thread_group *ap_tg, struct xpmem_access_permit *ap) { int index; struct xpmem_thread_group *seg_tg; struct xpmem_attachment *att; struct xpmem_segment *seg; spin_lock(&ap->lock); if (ap->flags & XPMEM_FLAG_DESTROYING) { spin_unlock(&ap->lock); return; } ap->flags |= XPMEM_FLAG_DESTROYING; /* deal with all attaches first */ while (!list_empty(&ap->att_list)) { att = list_entry((&ap->att_list)->next, struct xpmem_attachment, att_node); xpmem_att_ref(att); spin_unlock(&ap->lock); xpmem_detach_att(ap, att); xpmem_att_deref(att); spin_lock(&ap->lock); } spin_unlock(&ap->lock); /* * Remove access structure from its hash list. * This is done after the xpmem_detach_att to prevent any racing * thread from looking up access permits for the owning thread group * and not finding anything, assuming everything is clean, and * freeing the mm before xpmem_detach_att has a chance to * use it. */ index = xpmem_ap_hashtable_index(ap->apid); write_lock(&ap_tg->ap_hashtable[index].lock); list_del_init(&ap->ap_hashnode); write_unlock(&ap_tg->ap_hashtable[index].lock); /* the ap's seg and the seg's tg were ref'd in xpmem_get() */ seg = ap->seg; seg_tg = seg->tg; /* remove ap from its seg's access permit list */ spin_lock(&seg->lock); list_del_init(&ap->ap_node); spin_unlock(&seg->lock); /* Try to teardown a shadow segment */ if (seg->flags & XPMEM_FLAG_SHADOW) xpmem_remove_seg(seg_tg, seg); xpmem_seg_deref(seg); /* deref of xpmem_get()'s ref */ xpmem_tg_deref(seg_tg); /* deref of xpmem_get()'s ref */ xpmem_ap_destroyable(ap); }
/* * Get permission to access a specified segid. */ int xpmem_get(xpmem_segid_t segid, int flags, int permit_type, void *permit_value, xpmem_apid_t *apid_p) { xpmem_apid_t apid = 0; int status = 0; int shadow_seg = 0; struct xpmem_segment *seg = NULL; struct xpmem_thread_group *ap_tg, *seg_tg; if (segid <= 0) return -EINVAL; if ((flags & ~(XPMEM_RDONLY | XPMEM_RDWR)) || (flags & (XPMEM_RDONLY | XPMEM_RDWR)) == (XPMEM_RDONLY | XPMEM_RDWR)) return -EINVAL; switch (permit_type) { case XPMEM_PERMIT_MODE: case XPMEM_GLOBAL_MODE: if (permit_value != NULL) return -EINVAL; break; default: return -EINVAL; } /* There are 2 cases that result in a remote lookup: * (1) The thread group encoded in the segment does not exist locally. * * (2) The thread group exists locally, but the segment does not. The * ids for thread groups are not globally unique, so it's possible that * the same thread group id exists in two separate enclaves, but only * one will own the segment */ seg_tg = xpmem_tg_ref_by_segid(segid); if (IS_ERR(seg_tg)) { seg_tg = xpmem_tg_ref_by_gid(current->aspace->id); if (IS_ERR(seg_tg)) return PTR_ERR(seg_tg); shadow_seg = 1; } if (!shadow_seg) { seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) shadow_seg = 1; } if (shadow_seg) { /* No local segment found. Look for a remote one */ /* NOTE: in either case, the tg has already been ref'd. We ref the * current process' tg if no tg is found for the segid */ status = xpmem_try_get_remote(seg_tg, segid, flags, permit_type, permit_value); if (status != 0) { xpmem_tg_deref(seg_tg); return status; } /* Now, get the shadow segment */ seg = xpmem_seg_ref_by_segid(seg_tg, segid); if (IS_ERR(seg)) { /* Error should be impossible here, but we'll * check anyway. The shadow segment was created in * xpmem_try_get_remote, so destroy it here */ xpmem_remove(segid); xpmem_tg_deref(seg_tg); return PTR_ERR(seg); } } /* find accessor's thread group structure */ ap_tg = xpmem_tg_ref_by_gid(current->aspace->id); if (IS_ERR(ap_tg)) { BUG_ON(PTR_ERR(ap_tg) != -ENOENT); status = -XPMEM_ERRNO_NOPROC; goto err_ap_tg; } apid = xpmem_make_apid(ap_tg); if (apid < 0) { status = apid; goto err_apid; } status = xpmem_get_segment(flags, permit_type, permit_value, apid, seg, seg_tg, ap_tg); if (status != 0) goto err_get; *apid_p = apid; xpmem_tg_deref(ap_tg); /* * The following two derefs * * xpmem_seg_deref(seg); * xpmem_tg_deref(seg_tg); * * aren't being done at this time in order to prevent the seg * and seg_tg structures from being prematurely kmem_free'd as long as the * potential for them to be referenced via this ap structure exists. * * These two derefs will be done by xpmem_release_ap() at the time * this ap structure is destroyed. */ return status; err_get: err_apid: xpmem_tg_deref(ap_tg); err_ap_tg: /* If we created a shadow segment, destroy it on error. Else, just * deref it */ if (shadow_seg) xpmem_remove(segid); else xpmem_seg_deref(seg); xpmem_tg_deref(seg_tg); return status; }
/* * User open of the XPMEM driver. Called whenever /dev/xpmem is opened. * Create a struct xpmem_thread_group structure for the specified thread group. * And add the structure to the tg hash table. */ static int xpmem_open(struct inode *inode, struct file *file) { struct xpmem_thread_group *tg; int index; struct proc_dir_entry *unpin_entry; char tgid_string[XPMEM_TGID_STRING_LEN]; /* if this has already been done, just return silently */ tg = xpmem_tg_ref_by_tgid(current->tgid); if (!IS_ERR(tg)) { xpmem_tg_deref(tg); return 0; } /* create tg */ tg = kzalloc(sizeof(struct xpmem_thread_group), GFP_KERNEL); if (tg == NULL) { return -ENOMEM; } tg->lock = SPIN_LOCK_UNLOCKED; tg->tgid = current->tgid; tg->uid = current->cred->uid; tg->gid = current->cred->gid; atomic_set(&tg->uniq_segid, 0); atomic_set(&tg->uniq_apid, 0); atomic_set(&tg->n_pinned, 0); tg->addr_limit = TASK_SIZE; tg->seg_list_lock = RW_LOCK_UNLOCKED; INIT_LIST_HEAD(&tg->seg_list); INIT_LIST_HEAD(&tg->tg_hashlist); atomic_set(&tg->n_recall_PFNs, 0); mutex_init(&tg->recall_PFNs_mutex); init_waitqueue_head(&tg->block_recall_PFNs_wq); init_waitqueue_head(&tg->allow_recall_PFNs_wq); tg->mmu_initialized = 0; tg->mmu_unregister_called = 0; tg->mm = current->mm; /* Register MMU notifier callbacks */ if (xpmem_mmu_notifier_init(tg) != 0) { kfree(tg); return -EFAULT; } /* create and initialize struct xpmem_access_permit hashtable */ tg->ap_hashtable = kzalloc(sizeof(struct xpmem_hashlist) * XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL); if (tg->ap_hashtable == NULL) { xpmem_mmu_notifier_unlink(tg); kfree(tg); return -ENOMEM; } for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) { tg->ap_hashtable[index].lock = RW_LOCK_UNLOCKED; INIT_LIST_HEAD(&tg->ap_hashtable[index].list); } snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", current->tgid); spin_lock(&xpmem_unpin_procfs_lock); unpin_entry = create_proc_entry(tgid_string, 0644, xpmem_unpin_procfs_dir); spin_unlock(&xpmem_unpin_procfs_lock); if (unpin_entry != NULL) { unpin_entry->data = (void *)(unsigned long)current->tgid; unpin_entry->write_proc = xpmem_unpin_procfs_write; unpin_entry->read_proc = xpmem_unpin_procfs_read; //unpin_entry->owner = THIS_MODULE; unpin_entry->uid = current->cred->uid; unpin_entry->gid = current->cred->gid; } xpmem_tg_not_destroyable(tg); /* add tg to its hash list */ index = xpmem_tg_hashtable_index(tg->tgid); write_lock(&xpmem_my_part->tg_hashtable[index].lock); list_add_tail(&tg->tg_hashlist, &xpmem_my_part->tg_hashtable[index].list); write_unlock(&xpmem_my_part->tg_hashtable[index].lock); /* * Increment 'usage' and 'mm->mm_users' for the current task's thread * group leader. This ensures that both its task_struct and mm_struct * will still be around when our thread group exits. (The Linux kernel * normally tears down the mm_struct prior to calling a module's * 'flush' function.) Since all XPMEM thread groups must go through * this path, this extra reference to mm_users also allows us to * directly inc/dec mm_users in xpmem_ensure_valid_PFNs() and avoid * mmput() which has a scaling issue with the mmlist_lock. */ get_task_struct(current->group_leader); tg->group_leader = current->group_leader; BUG_ON(current->mm != current->group_leader->mm); atomic_inc(¤t->group_leader->mm->mm_users); return 0; }