int sys_execv(char *progname, char **args, int *err) { struct addrspace *as; struct vnode *v; vaddr_t entrypoint, stackptr; int result; /* Validations */ if (progname == NULL) { *err = EFAULT; return -1; } if (args == NULL || (int *)args == (int *)0x40000000 || (int *)args == (int *)0x80000000) { *err = EFAULT; return -1; } /* Count number of arguments and total size of input */ int args_count = 0; for (;args_count < ARG_MAX && args[args_count] != NULL; args_count++); if (args_count > ARG_MAX) { *err = E2BIG; return -1; } /* Copy File name */ char *progname_copy = (char *) kmalloc(sizeof(char) * NAME_MAX); size_t actual = 0; result = copyinstr((userptr_t)progname, progname_copy, NAME_MAX, &actual); if (result) { kfree(progname_copy); *err = result; return -1; } if (strlen(progname_copy) == 0) { kfree(progname_copy); *err = EINVAL; return -1; } /* Allocate Kernel Memory for arguments */ char **args_copy = (char **) kmalloc(sizeof(char *) * args_count); int c_args_count = 0, arg_size = 0, padded_size = 0; for (;c_args_count < args_count; c_args_count++) { if ((int *)args[c_args_count] == (int *)0x40000000 || (int *)args[c_args_count] == (int *)0x80000000) { kfree(progname_copy); *err = EFAULT; return -1; } } c_args_count = 0; /* Calculate total length accounting for padding */ for (;c_args_count < args_count; c_args_count++) { arg_size = strlen(args[c_args_count]) + 1; args_copy[c_args_count] = (char *) kmalloc(sizeof(char) * arg_size); copyinstr((userptr_t)args[c_args_count], args_copy[c_args_count], arg_size, &actual); padded_size += arg_size; if (padded_size % 4) { padded_size += (4 - (padded_size % 4)) % 4; } } /* Open the file. */ result = vfs_open(progname_copy, O_RDONLY, 0, &v); if (result) { kfree(progname_copy); *err = result; return -1; } /* Destroy the current process's address space to create a new one. */ as = curproc->p_addrspace; curproc->p_addrspace = NULL; as_destroy(as); /* We should be a new process. */ KASSERT(proc_getas() == NULL); /* Create a new address space. */ as = as_create(); if (as == NULL) { kfree(progname_copy); vfs_close(v); *err = ENOMEM; return -1; } /* Switch to it and activate it. */ proc_setas(as); as_activate(); /* Load the executable. */ result = load_elf(v, &entrypoint); if (result) { /* p_addrspace will go away when curproc is destroyed */ kfree(progname_copy); vfs_close(v); *err = result; return -1; } /* Done with the file now. */ vfs_close(v); /* Define the user stack in the address space */ result = as_define_stack(as, &stackptr); if (result) { /* p_addrspace will go away when curproc is destroyed */ kfree(progname_copy); *err = result; return -1; } stackptr -= padded_size; char **arg_address = (char **) kmalloc(sizeof(char *) * args_count + 1); /* Copy arguments into user stack */ for(int i = 0; i < args_count; i++) { arg_size = strlen(args_copy[i]) + 1; if (arg_size % 4) { arg_size += (4 - arg_size % 4) % 4; } /* Store address of arguments */ arg_address[i] = (char *)stackptr; copyoutstr(args_copy[i], (userptr_t)stackptr, arg_size, &actual); stackptr += arg_size; } /* Add Null Pointer at the end */ arg_address[args_count] = 0; stackptr -= padded_size; stackptr -= (args_count + 1) * sizeof(char *); /* Copy address locations into user stack*/ for (int i = 0; i < args_count + 1; i++) { copyout((arg_address + i), (userptr_t)stackptr, sizeof(char *)); stackptr += sizeof(char *); } /* Reset pointer to start of the stack */ stackptr -= ((args_count + 1) * sizeof(char *)); kfree(progname_copy); c_args_count = 0; for (;c_args_count < args_count; c_args_count++) { kfree(args_copy[c_args_count]); } kfree(args_copy); kfree(arg_address); /* Warp to user mode. */ enter_new_process(args_count /*argc*/, (userptr_t) stackptr /*userspace addr of argv*/, (userptr_t) stackptr /*userspace addr of environment*/, stackptr, entrypoint); /* enter_new_process does not return. */ panic("enter_new_process returned\n"); *err = EINVAL; return -1; }
afs_omount(struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct thread *p) #endif { /* ndp contains the mounted-from device. Just ignore it. * we also don't care about our thread struct. */ size_t size; if (mp->mnt_flag & MNT_UPDATE) return EINVAL; AFS_GLOCK(); AFS_STATCNT(afs_mount); if (afs_globalVFS) { /* Don't allow remounts. */ AFS_GUNLOCK(); return EBUSY; } afs_globalVFS = mp; mp->vfs_bsize = 8192; vfs_getnewfsid(mp); /* * This is kind of ugly, as the interlock has grown to encompass * more fields over time and there's not a good way to group the * code without duplication. */ #ifdef AFS_FBSD62_ENV MNT_ILOCK(mp); #endif mp->mnt_flag &= ~MNT_LOCAL; #if defined(AFS_FBSD61_ENV) && !defined(AFS_FBSD62_ENV) MNT_ILOCK(mp); #endif #if __FreeBSD_version < 1000021 mp->mnt_kern_flag |= MNTK_MPSAFE; /* solid steel */ #endif #ifndef AFS_FBSD61_ENV MNT_ILOCK(mp); #endif /* * XXX mnt_stat "is considered stable as long as a ref is held". * We should check that we hold the only ref. */ mp->mnt_stat.f_iosize = 8192; if (path != NULL) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); else bcopy("/afs", mp->mnt_stat.f_mntonname, size = 4); memset(mp->mnt_stat.f_mntonname + size, 0, MNAMELEN - size); memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strcpy(mp->mnt_stat.f_mntfromname, "AFS"); /* null terminated string "AFS" will fit, just leave it be. */ strcpy(mp->mnt_stat.f_fstypename, "afs"); MNT_IUNLOCK(mp); AFS_GUNLOCK(); #ifdef AFS_FBSD80_ENV afs_statfs(mp, &mp->mnt_stat); #else afs_statfs(mp, &mp->mnt_stat, p); #endif return 0; }
/* * static long rctlsys_get(char *name, rctl_opaque_t *old_rblk, * rctl_opaque_t *new_rblk, int flags) * * Overview * rctlsys_get() is the implementation of the core logic of getrctl(2), the * public system call for fetching resource control values. Three mutually * exclusive flag values are supported: RCTL_USAGE, RCTL_FIRST and RCTL_NEXT. * When RCTL_USAGE is presented, the current usage for the resource control * is returned in new_blk if the resource control provides an implementation * of the usage operation. When RCTL_FIRST is presented, the value of * old_rblk is ignored, and the first value in the resource control value * sequence for the named control is transformed and placed in the user * memory location at new_rblk. In the RCTL_NEXT case, the value of old_rblk * is examined, and the next value in the sequence is transformed and placed * at new_rblk. */ static long rctlsys_get(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk, int flags) { rctl_val_t *nval; rctl_opaque_t *nblk; rctl_hndl_t hndl; char *kname; size_t klen; rctl_dict_entry_t *krde; int ret; int action = flags & (~RCTLSYS_ACTION_MASK); if (flags & (~RCTLSYS_MASK)) return (set_errno(EINVAL)); if (action != RCTL_FIRST && action != RCTL_NEXT && action != RCTL_USAGE) return (set_errno(EINVAL)); if (new_rblk == NULL || name == NULL) return (set_errno(EFAULT)); kname = kmem_alloc(MAXPATHLEN, KM_SLEEP); krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP); if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) { kmem_free(kname, MAXPATHLEN); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(EFAULT)); } if ((hndl = rctl_hndl_lookup(kname)) == -1) { kmem_free(kname, MAXPATHLEN); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(EINVAL)); } if (rctl_global_get(kname, krde) == -1) { kmem_free(kname, MAXPATHLEN); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ESRCH)); } kmem_free(kname, MAXPATHLEN); if (action != RCTL_USAGE) nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); if (action == RCTL_USAGE) { rctl_set_t *rset; rctl_t *rctl; rctl_qty_t usage; mutex_enter(&curproc->p_lock); if ((rset = rctl_entity_obtain_rset(krde, curproc)) == NULL) { mutex_exit(&curproc->p_lock); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ESRCH)); } mutex_enter(&rset->rcs_lock); if (rctl_set_find(rset, hndl, &rctl) == -1) { mutex_exit(&rset->rcs_lock); mutex_exit(&curproc->p_lock); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ESRCH)); } if (RCTLOP_NO_USAGE(rctl)) { mutex_exit(&rset->rcs_lock); mutex_exit(&curproc->p_lock); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ENOTSUP)); } usage = RCTLOP_GET_USAGE(rctl, curproc); mutex_exit(&rset->rcs_lock); mutex_exit(&curproc->p_lock); nblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP); nblk->rcq_value = usage; ret = copyout(nblk, new_rblk, sizeof (rctl_opaque_t)); kmem_free(nblk, sizeof (rctl_opaque_t)); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (ret == 0 ? 0 : set_errno(EFAULT)); } else if (action == RCTL_FIRST) { mutex_enter(&curproc->p_lock); if (ret = rctl_local_get(hndl, NULL, nval, curproc)) { mutex_exit(&curproc->p_lock); kmem_cache_free(rctl_val_cache, nval); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ret)); } mutex_exit(&curproc->p_lock); } else { /* * RCTL_NEXT */ rctl_val_t *oval; rctl_opaque_t *oblk; oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP); if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) { kmem_cache_free(rctl_val_cache, nval); kmem_free(oblk, sizeof (rctl_opaque_t)); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(EFAULT)); } oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL); mutex_enter(&curproc->p_lock); ret = rctl_local_get(hndl, oval, nval, curproc); mutex_exit(&curproc->p_lock); kmem_cache_free(rctl_val_cache, oval); kmem_free(oblk, sizeof (rctl_opaque_t)); if (ret != 0) { kmem_cache_free(rctl_val_cache, nval); kmem_free(krde, sizeof (rctl_dict_entry_t)); return (set_errno(ret)); } } nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP); rctlsys_rblk_xfrm(nblk, krde, nval, RBX_TO_BLK | RBX_VAL | RBX_CTL); kmem_free(krde, sizeof (rctl_dict_entry_t)); kmem_cache_free(rctl_val_cache, nval); if (copyout(nblk, new_rblk, sizeof (rctl_opaque_t)) == -1) { kmem_free(nblk, sizeof (rctl_opaque_t)); return (set_errno(EFAULT)); } kmem_free(nblk, sizeof (rctl_opaque_t)); return (0); }
static int hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, struct ucred *cred) { struct hammer_mount_info info; hammer_mount_t hmp; hammer_volume_t rootvol; struct vnode *rootvp; struct vnode *devvp = NULL; const char *upath; /* volume name in userspace */ char *path; /* volume name in system space */ int error; int i; int master_id; char *next_volume_ptr = NULL; /* * Accept hammer_mount_info. mntpt is NULL for root mounts at boot. */ if (mntpt == NULL) { bzero(&info, sizeof(info)); info.asof = 0; info.hflags = 0; info.nvolumes = 1; next_volume_ptr = mp->mnt_stat.f_mntfromname; /* Count number of volumes separated by ':' */ for (char *p = next_volume_ptr; *p != '\0'; ++p) { if (*p == ':') { ++info.nvolumes; } } mp->mnt_flag &= ~MNT_RDONLY; /* mount R/W */ } else { if ((error = copyin(data, &info, sizeof(info))) != 0) return (error); } /* * updating or new mount */ if (mp->mnt_flag & MNT_UPDATE) { hmp = (void *)mp->mnt_data; KKASSERT(hmp != NULL); } else { if (info.nvolumes <= 0 || info.nvolumes >= 32768) return (EINVAL); hmp = NULL; } /* * master-id validation. The master id may not be changed by a * mount update. */ if (info.hflags & HMNT_MASTERID) { if (hmp && hmp->master_id != info.master_id) { kprintf("hammer: cannot change master id " "with mount update\n"); return(EINVAL); } master_id = info.master_id; if (master_id < -1 || master_id >= HAMMER_MAX_MASTERS) return (EINVAL); } else { if (hmp) master_id = hmp->master_id; else master_id = 0; } /* * Internal mount data structure */ if (hmp == NULL) { hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); mp->mnt_data = (qaddr_t)hmp; hmp->mp = mp; /*TAILQ_INIT(&hmp->recycle_list);*/ /* * Make sure kmalloc type limits are set appropriately. * * Our inode kmalloc group is sized based on maxvnodes * (controlled by the system, not us). */ kmalloc_create(&hmp->m_misc, "HAMMER-others"); kmalloc_create(&hmp->m_inodes, "HAMMER-inodes"); kmalloc_raise_limit(hmp->m_inodes, 0); /* unlimited */ hmp->root_btree_beg.localization = 0x00000000U; hmp->root_btree_beg.obj_id = -0x8000000000000000LL; hmp->root_btree_beg.key = -0x8000000000000000LL; hmp->root_btree_beg.create_tid = 1; hmp->root_btree_beg.delete_tid = 1; hmp->root_btree_beg.rec_type = 0; hmp->root_btree_beg.obj_type = 0; hmp->root_btree_end.localization = 0xFFFFFFFFU; hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL; hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL; hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL; hmp->root_btree_end.delete_tid = 0; /* special case */ hmp->root_btree_end.rec_type = 0xFFFFU; hmp->root_btree_end.obj_type = 0; hmp->krate.freq = 1; /* maximum reporting rate (hz) */ hmp->krate.count = -16; /* initial burst */ hmp->sync_lock.refs = 1; hmp->free_lock.refs = 1; hmp->undo_lock.refs = 1; hmp->blkmap_lock.refs = 1; hmp->snapshot_lock.refs = 1; hmp->volume_lock.refs = 1; TAILQ_INIT(&hmp->delay_list); TAILQ_INIT(&hmp->flush_group_list); TAILQ_INIT(&hmp->objid_cache_list); TAILQ_INIT(&hmp->undo_lru_list); TAILQ_INIT(&hmp->reclaim_list); RB_INIT(&hmp->rb_dedup_crc_root); RB_INIT(&hmp->rb_dedup_off_root); TAILQ_INIT(&hmp->dedup_lru_list); } hmp->hflags &= ~HMNT_USERFLAGS; hmp->hflags |= info.hflags & HMNT_USERFLAGS; hmp->master_id = master_id; if (info.asof) { mp->mnt_flag |= MNT_RDONLY; hmp->asof = info.asof; } else { hmp->asof = HAMMER_MAX_TID; } hmp->volume_to_remove = -1; /* * Re-open read-write if originally read-only, or vise-versa. * * When going from read-only to read-write execute the stage2 * recovery if it has not already been run. */ if (mp->mnt_flag & MNT_UPDATE) { lwkt_gettoken(&hmp->fs_token); error = 0; if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { kprintf("HAMMER read-only -> read-write\n"); hmp->ronly = 0; RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, hammer_adjust_volume_mode, NULL); rootvol = hammer_get_root_volume(hmp, &error); if (rootvol) { hammer_recover_flush_buffers(hmp, rootvol, 1); error = hammer_recover_stage2(hmp, rootvol); bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hammer_rel_volume(rootvol, 0); } RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, hammer_reload_inode, NULL); /* kernel clears MNT_RDONLY */ } else if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { kprintf("HAMMER read-write -> read-only\n"); hmp->ronly = 1; /* messy */ RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, hammer_reload_inode, NULL); hmp->ronly = 0; hammer_flusher_sync(hmp); hammer_flusher_sync(hmp); hammer_flusher_sync(hmp); hmp->ronly = 1; RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, hammer_adjust_volume_mode, NULL); } lwkt_reltoken(&hmp->fs_token); return(error); } RB_INIT(&hmp->rb_vols_root); RB_INIT(&hmp->rb_inos_root); RB_INIT(&hmp->rb_redo_root); RB_INIT(&hmp->rb_nods_root); RB_INIT(&hmp->rb_undo_root); RB_INIT(&hmp->rb_resv_root); RB_INIT(&hmp->rb_bufs_root); RB_INIT(&hmp->rb_pfsm_root); hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); RB_INIT(&hmp->volu_root); RB_INIT(&hmp->undo_root); RB_INIT(&hmp->data_root); RB_INIT(&hmp->meta_root); RB_INIT(&hmp->lose_root); TAILQ_INIT(&hmp->iorun_list); lwkt_token_init(&hmp->fs_token, "hammerfs"); lwkt_token_init(&hmp->io_token, "hammerio"); lwkt_gettoken(&hmp->fs_token); /* * Load volumes */ path = objcache_get(namei_oc, M_WAITOK); hmp->nvolumes = -1; for (i = 0; i < info.nvolumes; ++i) { if (mntpt == NULL) { /* * Root mount. */ KKASSERT(next_volume_ptr != NULL); strcpy(path, ""); if (*next_volume_ptr != '/') { /* relative path */ strcpy(path, "/dev/"); } int k; for (k = strlen(path); k < MAXPATHLEN-1; ++k) { if (*next_volume_ptr == '\0') { break; } else if (*next_volume_ptr == ':') { ++next_volume_ptr; break; } else { path[k] = *next_volume_ptr; ++next_volume_ptr; } } path[k] = '\0'; error = 0; cdev_t dev = kgetdiskbyname(path); error = bdevvp(dev, &devvp); if (error) { kprintf("hammer_mountroot: can't find devvp\n"); } } else { error = copyin(&info.volumes[i], &upath, sizeof(char *)); if (error == 0) error = copyinstr(upath, path, MAXPATHLEN, NULL); } if (error == 0) error = hammer_install_volume(hmp, path, devvp); if (error) break; } objcache_put(namei_oc, path); /* * Make sure we found a root volume */ if (error == 0 && hmp->rootvol == NULL) { kprintf("hammer_mount: No root volume found!\n"); error = EINVAL; } /* * Check that all required volumes are available */ if (error == 0 && hammer_mountcheck_volumes(hmp)) { kprintf("hammer_mount: Missing volumes, cannot mount!\n"); error = EINVAL; } if (error) { /* called with fs_token held */ hammer_free_hmp(mp); return (error); } /* * No errors, setup enough of the mount point so we can lookup the * root vnode. */ mp->mnt_iosize_max = MAXPHYS; mp->mnt_kern_flag |= MNTK_FSMID; /* * MPSAFE code. Note that VOPs and VFSops which are not MPSAFE * will acquire a per-mount token prior to entry and release it * on return, so even if we do not specify it we no longer get * the BGL regardlless of how we are flagged. */ mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /*MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_IN_MPSAFE;*/ /* * note: f_iosize is used by vnode_pager_haspage() when constructing * its VOP_BMAP call. */ mp->mnt_stat.f_iosize = HAMMER_BUFSIZE; mp->mnt_stat.f_bsize = HAMMER_BUFSIZE; mp->mnt_vstat.f_frsize = HAMMER_BUFSIZE; mp->mnt_vstat.f_bsize = HAMMER_BUFSIZE; mp->mnt_maxsymlinklen = 255; mp->mnt_flag |= MNT_LOCAL; vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops); vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops); vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops); /* * The root volume's ondisk pointer is only valid if we hold a * reference to it. */ rootvol = hammer_get_root_volume(hmp, &error); if (error) goto failed; /* * Perform any necessary UNDO operations. The recovery code does * call hammer_undo_lookup() so we have to pre-cache the blockmap, * and then re-copy it again after recovery is complete. * * If this is a read-only mount the UNDO information is retained * in memory in the form of dirty buffer cache buffers, and not * written back to the media. */ bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); /* * Check filesystem version */ hmp->version = rootvol->ondisk->vol_version; if (hmp->version < HAMMER_VOL_VERSION_MIN || hmp->version > HAMMER_VOL_VERSION_MAX) { kprintf("HAMMER: mount unsupported fs version %d\n", hmp->version); error = ERANGE; goto done; } /* * The undo_rec_limit limits the size of flush groups to avoid * blowing out the UNDO FIFO. This calculation is typically in * the tens of thousands and is designed primarily when small * HAMMER filesystems are created. */ hmp->undo_rec_limit = hammer_undo_max(hmp) / 8192 + 100; if (hammer_debug_general & 0x0001) kprintf("HAMMER: undo_rec_limit %d\n", hmp->undo_rec_limit); /* * NOTE: Recover stage1 not only handles meta-data recovery, it * also sets hmp->undo_seqno for HAMMER VERSION 4+ filesystems. */ error = hammer_recover_stage1(hmp, rootvol); if (error) { kprintf("Failed to recover HAMMER filesystem on mount\n"); goto done; } /* * Finish setup now that we have a good root volume. * * The top 16 bits of fsid.val[1] is a pfs id. */ ksnprintf(mp->mnt_stat.f_mntfromname, sizeof(mp->mnt_stat.f_mntfromname), "%s", rootvol->ondisk->vol_name); mp->mnt_stat.f_fsid.val[0] = crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8); mp->mnt_stat.f_fsid.val[1] = crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8); mp->mnt_stat.f_fsid.val[1] &= 0x0000FFFF; mp->mnt_vstat.f_fsid_uuid = rootvol->ondisk->vol_fsid; mp->mnt_vstat.f_fsid = crc32(&mp->mnt_vstat.f_fsid_uuid, sizeof(mp->mnt_vstat.f_fsid_uuid)); /* * Certain often-modified fields in the root volume are cached in * the hammer_mount structure so we do not have to generate lots * of little UNDO structures for them. * * Recopy after recovery. This also has the side effect of * setting our cached undo FIFO's first_offset, which serves to * placemark the FIFO start for the NEXT flush cycle while the * on-disk first_offset represents the LAST flush cycle. */ hmp->next_tid = rootvol->ondisk->vol0_next_tid; hmp->flush_tid1 = hmp->next_tid; hmp->flush_tid2 = hmp->next_tid; bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks; hammer_flusher_create(hmp); /* * Locate the root directory using the root cluster's B-Tree as a * starting point. The root directory uses an obj_id of 1. * * FUTURE: Leave the root directory cached referenced but unlocked * in hmp->rootvp (need to flush it on unmount). */ error = hammer_vfs_vget(mp, NULL, 1, &rootvp); if (error) goto done; vput(rootvp); /*vn_unlock(hmp->rootvp);*/ if (hmp->ronly == 0) error = hammer_recover_stage2(hmp, rootvol); /* * If the stage2 recovery fails be sure to clean out all cached * vnodes before throwing away the mount structure or bad things * will happen. */ if (error) vflush(mp, 0, 0); done: if ((mp->mnt_flag & MNT_UPDATE) == 0) { /* New mount */ /* Populate info for mount point (NULL pad)*/ bzero(mp->mnt_stat.f_mntonname, MNAMELEN); size_t size; if (mntpt) { copyinstr(mntpt, mp->mnt_stat.f_mntonname, MNAMELEN -1, &size); } else { /* Root mount */ mp->mnt_stat.f_mntonname[0] = '/'; } } (void)VFS_STATFS(mp, &mp->mnt_stat, cred); hammer_rel_volume(rootvol, 0); failed: /* * Cleanup and return. */ if (error) { /* called with fs_token held */ hammer_free_hmp(mp); } else { lwkt_reltoken(&hmp->fs_token); } return (error); }
int mac_ifnet_ioctl_set(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { struct label *intlabel; struct mac mac; char *buffer; int error; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, buffer, mac.m_buflen, NULL); if (error) { free(buffer, M_MACTEMP); return (error); } intlabel = mac_ifnet_label_alloc(); error = mac_ifnet_internalize_label(intlabel, buffer); free(buffer, M_MACTEMP); if (error) { mac_ifnet_label_free(intlabel); return (error); } /* * XXX: Note that this is a redundant privilege check, since policies * impose this check themselves if required by the policy * Eventually, this should go away. */ error = priv_check_cred(cred, PRIV_NET_SETIFMAC); if (error) { mac_ifnet_label_free(intlabel); return (error); } MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_relabel, cred, ifp, ifp->if_label, intlabel); if (error) { MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (error); } MAC_POLICY_PERFORM_NOSLEEP(ifnet_relabel, cred, ifp, ifp->if_label, intlabel); MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (0); }
/* * Search an alternate path before passing pathname arguments on * to system calls. Useful for keeping a separate 'emulation tree'. * * If cflag is set, we check if an attempt can be made to create * the named file, i.e. we check if the directory it should * be in exists. */ int emul_find(struct proc *p, caddr_t *sgp, const char *prefix, char *path, char **pbuf, int cflag) { struct nameidata nd; struct nameidata ndroot; struct vattr vat; struct vattr vatroot; int error; char *ptr, *buf, *cp; const char *pr; size_t sz, len; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pbuf = path; for (ptr = buf, pr = prefix; (*ptr = *pr) != '\0'; ptr++, pr++) continue; sz = MAXPATHLEN - (ptr - buf); /* * If sgp is not given then the path is already in kernel space */ if (sgp == NULL) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) goto bad; if (*ptr != '/') { error = EINVAL; goto bad; } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (cflag) { for (cp = &ptr[len] - 1; *cp != '/'; cp--) ; *cp = '\0'; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) goto bad; *cp = '/'; } else { NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) goto bad; /* * We now compare the vnode of the emulation root to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ /* XXX: prototype should have const here for NDINIT */ NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, p); if ((error = namei(&ndroot)) != 0) goto bad2; if ((error = VOP_GETATTR(nd.ni_vp, &vat, p->p_ucred, p)) != 0) goto bad3; if ((error = VOP_GETATTR(ndroot.ni_vp, &vatroot, p->p_ucred, p)) != 0) goto bad3; if (vat.va_fsid == vatroot.va_fsid && vat.va_fileid == vatroot.va_fileid) { error = ENOENT; goto bad3; } } if (sgp == NULL) *pbuf = buf; else { sz = &ptr[len] - buf; *pbuf = stackgap_alloc(sgp, sz + 1); if (*pbuf == NULL) { error = ENAMETOOLONG; goto bad; } if ((error = copyout(buf, *pbuf, sz)) != 0) { *pbuf = path; goto bad; } free(buf, M_TEMP); } vrele(nd.ni_vp); if (!cflag) vrele(ndroot.ni_vp); return error; bad3: vrele(ndroot.ni_vp); bad2: vrele(nd.ni_vp); bad: free(buf, M_TEMP); return error; }
int udf_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { struct vnode *devvp; /* vnode of the mount device */ struct udf_args args; size_t len; int error; if ((mp->mnt_flag & MNT_RDONLY) == 0) { mp->mnt_flag |= MNT_RDONLY; printf("udf_mount: enforcing read-only mode\n"); } /* * No root filesystem support. Probably not a big deal, since the * bootloader doesn't understand UDF. */ if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); error = copyin(data, &args, sizeof(struct udf_args)); if (error) return (error); if (args.fspec == NULL) return (EINVAL); NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp))) return (error); devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { vrele(devvp); return (ENOTBLK); } if (major(devvp->v_rdev) >= nblkdev) { vrele(devvp); return (ENXIO); } /* Check the access rights on the mount device */ if (p->p_ucred->cr_uid) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD, p->p_ucred, p); VOP_UNLOCK(devvp, 0, p); if (error) { vrele(devvp); return (error); } } if ((error = udf_mountfs(devvp, mp, args.lastblock, p))) { vrele(devvp); return (error); } /* * Keep a copy of the mount information. */ copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &len); bzero(mp->mnt_stat.f_mntonname + len, MNAMELEN - len); copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len); bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len); return (0); };
static int virtif_ioctl(struct ifnet *ifp, u_long cmd, void *data) { struct virtif_sc *sc = ifp->if_softc; int rv; switch (cmd) { #ifdef RUMP_VIF_LINKSTR struct ifdrv *ifd; size_t linkstrlen; #ifndef RUMP_VIF_LINKSTRMAX #define RUMP_VIF_LINKSTRMAX 4096 #endif case SIOCGLINKSTR: ifd = data; if (!sc->sc_linkstr) { rv = ENOENT; break; } linkstrlen = strlen(sc->sc_linkstr)+1; if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) { ifd->ifd_len = linkstrlen; rv = 0; break; } if (ifd->ifd_cmd != 0) { rv = ENOTTY; break; } rv = copyoutstr(sc->sc_linkstr, ifd->ifd_data, MIN(ifd->ifd_len,linkstrlen), NULL); break; case SIOCSLINKSTR: if (ifp->if_flags & IFF_UP) { rv = EBUSY; break; } ifd = data; if (ifd->ifd_cmd == IFLINKSTR_UNSET) { panic("unset linkstr not implemented"); } else if (ifd->ifd_cmd != 0) { rv = ENOTTY; break; } else if (sc->sc_linkstr) { rv = EBUSY; break; } if (ifd->ifd_len > RUMP_VIF_LINKSTRMAX) { rv = E2BIG; break; } else if (ifd->ifd_len < 1) { rv = EINVAL; break; } sc->sc_linkstr = kmem_alloc(ifd->ifd_len, KM_SLEEP); rv = copyinstr(ifd->ifd_data, sc->sc_linkstr, ifd->ifd_len, NULL); if (rv) { kmem_free(sc->sc_linkstr, ifd->ifd_len); break; } rv = virtif_create(ifp); if (rv) { kmem_free(sc->sc_linkstr, ifd->ifd_len); } break; #endif /* RUMP_VIF_LINKSTR */ default: if (!sc->sc_linkstr) rv = ENXIO; else rv = ether_ioctl(ifp, cmd, data); if (rv == ENETRESET) rv = 0; break; } return rv; }
/* * This is a special interface that will be utilized by ZFS to cause * a share to be added/removed * * arg is either a smb_share_t or share_name from userspace. * It will need to be copied into the kernel. It is smb_share_t * for add operations and share_name for delete operations. */ int smb_kshare_upcall(door_handle_t dhdl, void *arg, boolean_t add_share) { door_arg_t doorarg = { 0 }; char *buf = NULL; char *str = NULL; int error; int rc; unsigned int used; smb_dr_ctx_t *dec_ctx; smb_dr_ctx_t *enc_ctx; smb_share_t *lmshare = NULL; int opcode; opcode = (add_share) ? SMB_SHROP_ADD : SMB_SHROP_DELETE; buf = kmem_alloc(SMB_SHARE_DSIZE, KM_SLEEP); enc_ctx = smb_dr_encode_start(buf, SMB_SHARE_DSIZE); smb_dr_put_uint32(enc_ctx, opcode); switch (opcode) { case SMB_SHROP_ADD: lmshare = kmem_alloc(sizeof (smb_share_t), KM_SLEEP); error = xcopyin(arg, lmshare, sizeof (smb_share_t)); if (error != 0) { kmem_free(lmshare, sizeof (smb_share_t)); kmem_free(buf, SMB_SHARE_DSIZE); return (error); } smb_dr_put_share(enc_ctx, lmshare); break; case SMB_SHROP_DELETE: str = kmem_alloc(MAXPATHLEN, KM_SLEEP); error = copyinstr(arg, str, MAXPATHLEN, NULL); if (error != 0) { kmem_free(str, MAXPATHLEN); kmem_free(buf, SMB_SHARE_DSIZE); return (error); } smb_dr_put_string(enc_ctx, str); kmem_free(str, MAXPATHLEN); break; } if ((error = smb_dr_encode_finish(enc_ctx, &used)) != 0) { kmem_free(buf, SMB_SHARE_DSIZE); if (lmshare) kmem_free(lmshare, sizeof (smb_share_t)); return (NERR_InternalError); } doorarg.data_ptr = buf; doorarg.data_size = used; doorarg.rbuf = buf; doorarg.rsize = SMB_SHARE_DSIZE; error = door_ki_upcall_limited(dhdl, &doorarg, NULL, SIZE_MAX, 0); if (error) { kmem_free(buf, SMB_SHARE_DSIZE); if (lmshare) kmem_free(lmshare, sizeof (smb_share_t)); return (error); } dec_ctx = smb_dr_decode_start(doorarg.data_ptr, doorarg.data_size); if (smb_kshare_chk_dsrv_status(opcode, dec_ctx) != 0) { kmem_free(buf, SMB_SHARE_DSIZE); if (lmshare) kmem_free(lmshare, sizeof (smb_share_t)); return (NERR_InternalError); } rc = smb_dr_get_uint32(dec_ctx); if (opcode == SMB_SHROP_ADD) smb_dr_get_share(dec_ctx, lmshare); if (smb_dr_decode_finish(dec_ctx)) rc = NERR_InternalError; kmem_free(buf, SMB_SHARE_DSIZE); if (lmshare) kmem_free(lmshare, sizeof (smb_share_t)); return ((rc == NERR_DuplicateShare && add_share) ? 0 : rc); }
int sys_execv(const char *progname, char**args) { kprintf("hello world"); struct vnode *v; vaddr_t entrypoint, stackptr; int result; int argc = getargc(args); /* Copy the arguments into the kernel buffer */ char** kbuffer = kmalloc(argc*sizeof(char*)); int i; for (i=0; i<argc; i++) { kbuffer[i] = kmalloc(strlen(args[i])*sizeof(char*)+1); copyinstr(args[i], kbuffer[i], strlen(args[i])*sizeof(char*)+1, NULL); } args = kbuffer; /* Open the file. */ result = vfs_open(progname, O_RDONLY, &v); if (result) { return result; } /* This is not the first process, we have to make sure the previous process address space is not NULL. */ assert(curthread->t_vmspace != NULL); /* Destroy the old process address space */ as_destroy(curthread->t_vmspace); /* Create a new address space. */ curthread->t_vmspace = as_create(); if (curthread->t_vmspace == NULL) { vfs_close(v); return ENOMEM; } /* Activate it. */ as_activate(curthread->t_vmspace); /* Load the executable. */ result = load_elf(v, &entrypoint); if (result) { /* thread_exit destroys curthread->t_vmspace */ vfs_close(v); return result; } /* Done with the file now. */ vfs_close(v); /* Free the current process user stack */ kfree(curthread->t_stack); /* Define the user stack in the address space */ result = as_define_stack(curthread->t_vmspace, &stackptr); if (result) { /* thread_exit destroys curthread->t_vmspace */ return result; } /* copy arguments to user space */ char **args_u; /* user space arguments.(array of ptrs to string arguments.*/ /* string length of the arguments */ size_t argstrlen = getlen(argc, args); /* address space for the string arguments value. */ char* arg_str = (char *) stackptr - argstrlen; /* address space for the pointers to string arguments. */ args_u = (char **) arg_str - (argc + 1) * sizeof (char**); /* adjust the address so that its divisable by 4 */ args_u = (int) args_u - (int) args_u % 4; /* copy the arguments to the user address space */ int len; for (i = 0; i < argc; i++) { /* copy a single argument to the user address space */ copyoutstr(args[i], (userptr_t) arg_str, strlen(args[i]) + 1, &len); /* set the user argument to the current argument string pointer */ args_u[i] = arg_str; /* increment the argument pointer to the next argument */ arg_str += (strlen(args[i]) + 1) * sizeof (char); } /* set the n+1th argument to be NULL */ args_u[argc] = NULL; /* set the stackptr to the starting point of args_u and adjust the stack pointer */ stackptr = args_u - sizeof (char**) - ((int)args_u%8); /* Warp to user mode. */ md_usermode(argc, (userptr_t) args_u, stackptr, entrypoint); /*********************** end of A2 stuff *****************/ /* md_usermode does not return */ panic("md_usermode returned\n"); return EINVAL; }
/* ARGSUSED */ int mfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { struct vnode *devvp; struct mfs_args args; struct ufsmount *ump; struct fs *fs; struct mfsnode *mfsp; char fspec[MNAMELEN]; int flags, error; error = copyin(data, &args, sizeof(struct mfs_args)); if (error) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ffs_flushfiles(mp, flags, p); if (error) return (error); } if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) fs->fs_ronly = 0; #ifdef EXPORTMFS if (args.fspec == NULL) return (vfs_export(mp, &ump->um_export, &args.export_info)); #endif return (0); } error = copyinstr(args.fspec, fspec, sizeof(fspec), NULL); if (error) return (error); error = getnewvnode(VT_MFS, NULL, &mfs_vops, &devvp); if (error) return (error); devvp->v_type = VBLK; if (checkalias(devvp, makedev(255, mfs_minor), (struct mount *)0)) panic("mfs_mount: dup dev"); mfs_minor++; mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK | M_ZERO); devvp->v_data = mfsp; mfsp->mfs_baseoff = args.base; mfsp->mfs_size = args.size; mfsp->mfs_vnode = devvp; mfsp->mfs_pid = p->p_pid; bufq_init(&mfsp->mfs_bufq, BUFQ_FIFO); if ((error = ffs_mountfs(devvp, mp, p)) != 0) { mfsp->mfs_shutdown = 1; vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_fs; memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt)); strlcpy(fs->fs_fsmnt, path, sizeof(fs->fs_fsmnt)); memcpy(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN); memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN); memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); memcpy(&mp->mnt_stat.mount_info.mfs_args, &args, sizeof(args)); return (0); }
/* normal open() function - called by user */ int sys_open(const_userptr_t fileName, int flags, int * retval) { char fileNameFromUser[BUF_SIZE]; // file name from copyinstr bool append_flag = false; // if O_APPEND is one of the flags, this variable is set to TRUE int err; // Step 1: first check if the filename pointer is valid if (fileName == NULL) { err = EFAULT; return err; } // Step 2: then check if the flags are valid (use switch statement to check for valid flags) switch(flags) { case O_RDONLY: break; case O_WRONLY: break; case O_RDWR: break; case O_RDONLY|O_CREAT: break; case O_WRONLY|O_CREAT: break; case O_WRONLY|O_APPEND: append_flag = true; break; case O_RDWR|O_CREAT: break; case O_RDWR|O_APPEND: append_flag = true; break; case O_RDWR|O_CREAT|O_TRUNC: break; default: err = EINVAL; return err; } // Step 3: if steps 1 and 2 are passing, then find an empty slot in the tfd_table and allocate it to this file that is going to be opened int slot = -1; for (int i=3; i<OPEN_MAX; i++) { if (curthread->t_fdtable[i] == NULL) { slot = i; break; } } /* in case the file table is full and there are no empty slots */ if (slot == -1) { err = EMFILE; return err; } // Step 4: copy the valid filename into the kernel buffer, using the copyinstr function size_t actual; if ((err = copyinstr(fileName, fileNameFromUser, BUF_SIZE, &actual)) != 0) { return err; } // Step 5: call vfs_open, with correct arguments struct vnode * node; err = vfs_open(fileNameFromUser, flags, 0, &node); // add code here to check if vnode returned successfully if (err) { // if vnode not successful, then return appropriate error to user return err; } // Step 6: initialize the struct fhandle with correct values, taking the flags into consideration if (append_flag) { /* case where the APPEND flag is true, get the file size and set pass it as the offset */ struct stat buffer; VOP_STAT(node, &buffer); int bytes = buffer.st_size; *retval = slot; curthread->t_fdtable[slot] = fhandle_create(fileNameFromUser, flags, bytes, node); KASSERT(curthread->t_fdtable[slot] != NULL); } else { /* case where append flag is not TRUE */ *retval = slot; curthread->t_fdtable[slot] = fhandle_create(fileNameFromUser, flags, 0, node); KASSERT(curthread->t_fdtable[slot] != NULL); } /* on success, return 0 */ return 0; }
/* * exec_script_makecmds(): Check if it's an executable shell script. * * Given a proc pointer and an exec package pointer, see if the referent * of the epp is in shell script. If it is, then set things up so that * the script can be run. This involves preparing the address space * and arguments for the shell which will run the script. * * This function is ultimately responsible for creating a set of vmcmds * which can be used to build the process's vm space and inserting them * into the exec package. */ int exec_script_makecmds(struct proc *p, struct exec_package *epp) { int error, hdrlinelen, shellnamelen, shellarglen; char *hdrstr = epp->ep_hdr; char *cp, *shellname, *shellarg, *oldpnbuf; char **shellargp = NULL, **tmpsap; struct vnode *scriptvp; uid_t script_uid = -1; gid_t script_gid = -1; u_short script_sbits; /* * remember the old vp and pnbuf for later, so we can restore * them if check_exec() fails. */ scriptvp = epp->ep_vp; oldpnbuf = epp->ep_ndp->ni_cnd.cn_pnbuf; /* * if the magic isn't that of a shell script, or we've already * done shell script processing for this exec, punt on it. */ if ((epp->ep_flags & EXEC_INDIR) != 0 || epp->ep_hdrvalid < EXEC_SCRIPT_MAGICLEN || strncmp(hdrstr, EXEC_SCRIPT_MAGIC, EXEC_SCRIPT_MAGICLEN)) return ENOEXEC; /* * check that the shell spec is terminated by a newline, * and that it isn't too large. Don't modify the * buffer unless we're ready to commit to handling it. * (The latter requirement means that we have to check * for both spaces and tabs later on.) */ hdrlinelen = min(epp->ep_hdrvalid, MAXINTERP); for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; cp < hdrstr + hdrlinelen; cp++) { if (*cp == '\n') { *cp = '\0'; break; } } if (cp >= hdrstr + hdrlinelen) return ENOEXEC; shellname = NULL; shellarg = NULL; shellarglen = 0; /* strip spaces before the shell name */ for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; *cp == ' ' || *cp == '\t'; cp++) ; /* collect the shell name; remember its length for later */ shellname = cp; shellnamelen = 0; if (*cp == '\0') goto check_shell; for ( /* cp = cp */ ; *cp != '\0' && *cp != ' ' && *cp != '\t'; cp++) shellnamelen++; if (*cp == '\0') goto check_shell; *cp++ = '\0'; /* skip spaces before any argument */ for ( /* cp = cp */ ; *cp == ' ' || *cp == '\t'; cp++) ; if (*cp == '\0') goto check_shell; /* * collect the shell argument. everything after the shell name * is passed as ONE argument; that's the correct (historical) * behaviour. */ shellarg = cp; for ( /* cp = cp */ ; *cp != '\0'; cp++) shellarglen++; *cp++ = '\0'; check_shell: /* * MNT_NOSUID and STRC are already taken care of by check_exec, * so we don't need to worry about them now or later. */ script_sbits = epp->ep_vap->va_mode & (VSUID | VSGID); if (script_sbits != 0) { script_uid = epp->ep_vap->va_uid; script_gid = epp->ep_vap->va_gid; } /* * if the script isn't readable, or it's set-id, then we've * gotta supply a "/dev/fd/..." for the shell to read. * Note that stupid shells (csh) do the wrong thing, and * close all open fd's when they start. That kills this * method of implementing "safe" set-id and x-only scripts. */ vn_lock(scriptvp, LK_EXCLUSIVE|LK_RETRY, p); error = VOP_ACCESS(scriptvp, VREAD, p->p_ucred, p); VOP_UNLOCK(scriptvp, 0, p); if (error == EACCES || script_sbits) { struct file *fp; #ifdef DIAGNOSTIC if (epp->ep_flags & EXEC_HASFD) panic("exec_script_makecmds: epp already has a fd"); #endif fdplock(p->p_fd); error = falloc(p, &fp, &epp->ep_fd); fdpunlock(p->p_fd); if (error) goto fail; epp->ep_flags |= EXEC_HASFD; fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = (caddr_t) scriptvp; fp->f_flag = FREAD; FILE_SET_MATURE(fp, p); } /* set up the parameters for the recursive check_exec() call */ epp->ep_ndp->ni_dirfd = AT_FDCWD; epp->ep_ndp->ni_dirp = shellname; epp->ep_ndp->ni_segflg = UIO_SYSSPACE; epp->ep_flags |= EXEC_INDIR; /* and set up the fake args list, for later */ shellargp = mallocarray(4, sizeof(char *), M_EXEC, M_WAITOK); tmpsap = shellargp; *tmpsap = malloc(shellnamelen + 1, M_EXEC, M_WAITOK); strlcpy(*tmpsap++, shellname, shellnamelen + 1); if (shellarg != NULL) { *tmpsap = malloc(shellarglen + 1, M_EXEC, M_WAITOK); strlcpy(*tmpsap++, shellarg, shellarglen + 1); } *tmpsap = malloc(MAXPATHLEN, M_EXEC, M_WAITOK); if ((epp->ep_flags & EXEC_HASFD) == 0) { #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { error = systrace_scriptname(p, *tmpsap); if (error == 0) tmpsap++; else /* * Since systrace_scriptname() provides a * convenience, not a security issue, we are * safe to do this. */ error = copystr(epp->ep_name, *tmpsap++, MAXPATHLEN, NULL); } else #endif error = copyinstr(epp->ep_name, *tmpsap++, MAXPATHLEN, NULL); if (error != 0) goto fail; } else snprintf(*tmpsap++, MAXPATHLEN, "/dev/fd/%d", epp->ep_fd); *tmpsap = NULL; /* * mark the header we have as invalid; check_exec will read * the header from the new executable */ epp->ep_hdrvalid = 0; if ((error = check_exec(p, epp)) == 0) { /* note that we've clobbered the header */ epp->ep_flags |= EXEC_DESTR; /* * It succeeded. Unlock the script and * close it if we aren't using it any more. * Also, set things up so that the fake args * list will be used. */ if ((epp->ep_flags & EXEC_HASFD) == 0) vn_close(scriptvp, FREAD, p->p_ucred, p); /* free the old pathname buffer */ pool_put(&namei_pool, oldpnbuf); epp->ep_flags |= (EXEC_HASARGL | EXEC_SKIPARG); epp->ep_fa = shellargp; /* * set things up so that set-id scripts will be * handled appropriately */ epp->ep_vap->va_mode |= script_sbits; if (script_sbits & VSUID) epp->ep_vap->va_uid = script_uid; if (script_sbits & VSGID) epp->ep_vap->va_gid = script_gid; return (0); } /* XXX oldpnbuf not set for "goto fail" path */ epp->ep_ndp->ni_cnd.cn_pnbuf = oldpnbuf; fail: /* note that we've clobbered the header */ epp->ep_flags |= EXEC_DESTR; /* kill the opened file descriptor, else close the file */ if (epp->ep_flags & EXEC_HASFD) { epp->ep_flags &= ~EXEC_HASFD; fdplock(p->p_fd); (void) fdrelease(p, epp->ep_fd); fdpunlock(p->p_fd); } else vn_close(scriptvp, FREAD, p->p_ucred, p); pool_put(&namei_pool, epp->ep_ndp->ni_cnd.cn_pnbuf); /* free the fake arg list, because we're not returning it */ if ((tmpsap = shellargp) != NULL) { while (*tmpsap != NULL) { free(*tmpsap, M_EXEC, 0); tmpsap++; } free(shellargp, M_EXEC, 0); } /* * free any vmspace-creation commands, * and release their references */ kill_vmcmds(&epp->ep_vmcmds); return error; }
/* * Convert a pathname into a pointer to a vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * If the LOCKLEAF flag is set, a locked vnode is returned. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC if (!cnp->cn_cred || !cnp->cn_proc) panic ("namei: bad cred/proc"); if (cnp->cn_nameiop & (~OPMASK)) panic ("namei: nameiop contaminated with flags"); if (cnp->cn_flags & OPMASK) panic ("namei: flags contaminated with nameiops"); #endif fdp = cnp->cn_proc->p_fd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) cnp->cn_pnbuf = pool_get(&namei_pool, PR_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); else error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); /* * Fail on null pathnames */ if (error == 0 && ndp->ni_pathlen == 1) error = ENOENT; if (error) { fail: pool_put(&namei_pool, cnp->cn_pnbuf); ndp->ni_vp = NULL; return (error); } #ifdef KTRACE if (KTRPOINT(cnp->cn_proc, KTR_NAMEI)) ktrnamei(cnp->cn_proc, cnp->cn_pnbuf); #endif #if NSYSTRACE > 0 if (ISSET(cnp->cn_proc->p_flag, P_SYSTRACE)) systrace_namei(ndp); #endif /* * Strip trailing slashes, as requested */ if (cnp->cn_flags & STRIPSLASHES) { char *end = cnp->cn_pnbuf + ndp->ni_pathlen - 2; cp = end; while (cp >= cnp->cn_pnbuf && (*cp == '/')) cp--; /* Still some remaining characters in the buffer */ if (cp >= cnp->cn_pnbuf) { ndp->ni_pathlen -= (end - cp); *(cp + 1) = '\0'; } } ndp->ni_loopcnt = 0; /* * Get starting point for the translation. */ if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL) ndp->ni_rootdir = rootvnode; if ((p->p_p->ps_flags & PS_TAMED)) { error = tame_namei(p, cnp->cn_pnbuf); if (error) goto fail; } /* * Check if starting from root directory or current directory. */ if (cnp->cn_pnbuf[0] == '/') { dp = ndp->ni_rootdir; vref(dp); } else if (ndp->ni_dirfd == AT_FDCWD) { dp = fdp->fd_cdir; vref(dp); } else { struct file *fp = fd_getfile(fdp, ndp->ni_dirfd); if (fp == NULL) { pool_put(&namei_pool, cnp->cn_pnbuf); return (EBADF); } dp = (struct vnode *)fp->f_data; if (fp->f_type != DTYPE_VNODE || dp->v_type != VDIR) { pool_put(&namei_pool, cnp->cn_pnbuf); return (ENOTDIR); } vref(dp); } for (;;) { if (!dp->v_mount) { /* Give up if the directory is no longer mounted */ pool_put(&namei_pool, cnp->cn_pnbuf); return (ENOENT); } cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_startdir = dp; if ((error = vfs_lookup(ndp)) != 0) { pool_put(&namei_pool, cnp->cn_pnbuf); return (error); } /* * If not a symbolic link, return search result. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) pool_put(&namei_pool, cnp->cn_pnbuf); else cnp->cn_flags |= HASBUF; return (0); } if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(ndp->ni_dvp, 0, p); if (ndp->ni_loopcnt++ >= SYMLOOP_MAX) { error = ELOOP; break; } if (ndp->ni_pathlen > 1) cp = pool_get(&namei_pool, PR_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = cnp->cn_proc; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink: if (ndp->ni_pathlen > 1) pool_put(&namei_pool, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink; } if (ndp->ni_pathlen > 1) { memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); pool_put(&namei_pool, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; /* * Check if root directory should replace current directory. */ if (cnp->cn_pnbuf[0] == '/') { vrele(dp); dp = ndp->ni_rootdir; vref(dp); } } pool_put(&namei_pool, cnp->cn_pnbuf); vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; return (error); }
/* ARGSUSED */ int sys_execve(struct proc *p, void *v, register_t *retval) { struct sys_execve_args /* { syscallarg(const char *) path; syscallarg(char *const *) argp; syscallarg(char *const *) envp; } */ *uap = v; int error; struct exec_package pack; struct nameidata nid; struct vattr attr; struct ucred *cred = p->p_ucred; char *argp; char * const *cpp, *dp, *sp; struct process *pr = p->p_p; long argc, envc; size_t len, sgap; #ifdef MACHINE_STACK_GROWS_UP size_t slen; #endif char *stack; struct ps_strings arginfo; struct vmspace *vm = p->p_vmspace; char **tmpfap; extern struct emul emul_native; #if NSYSTRACE > 0 int wassugid = ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC); size_t pathbuflen; #endif char *pathbuf = NULL; /* * Cheap solution to complicated problems. * Mark this process as "leave me alone, I'm execing". */ atomic_setbits_int(&p->p_flag, P_INEXEC); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { systrace_execve0(p); pathbuf = pool_get(&namei_pool, PR_WAITOK); error = copyinstr(SCARG(uap, path), pathbuf, MAXPATHLEN, &pathbuflen); if (error != 0) goto clrflag; } #endif if (pathbuf != NULL) { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, p); } else { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); } /* * initialize the fields of the exec package. */ if (pathbuf != NULL) pack.ep_name = pathbuf; else pack.ep_name = (char *)SCARG(uap, path); pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK); pack.ep_hdrlen = exec_maxhdrsz; pack.ep_hdrvalid = 0; pack.ep_ndp = &nid; pack.ep_interp = NULL; pack.ep_emul_arg = NULL; VMCMDSET_INIT(&pack.ep_vmcmds); pack.ep_vap = &attr; pack.ep_emul = &emul_native; pack.ep_flags = 0; /* see if we can run it. */ if ((error = check_exec(p, &pack)) != 0) { goto freehdr; } /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */ /* allocate an argument buffer */ argp = (char *) uvm_km_valloc_wait(exec_map, NCARGS); #ifdef DIAGNOSTIC if (argp == NULL) panic("execve: argp == NULL"); #endif dp = argp; argc = 0; /* copy the fake args list, if there's one, freeing it as we go */ if (pack.ep_flags & EXEC_HASARGL) { tmpfap = pack.ep_fa; while (*tmpfap != NULL) { char *cp; cp = *tmpfap; while (*cp) *dp++ = *cp++; *dp++ = '\0'; free(*tmpfap, M_EXEC); tmpfap++; argc++; } free(pack.ep_fa, M_EXEC); pack.ep_flags &= ~EXEC_HASARGL; } /* Now get argv & environment */ if (!(cpp = SCARG(uap, argp))) { error = EFAULT; goto bad; } if (pack.ep_flags & EXEC_SKIPARG) cpp++; while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; argc++; } envc = 0; /* environment does not need to be there */ if ((cpp = SCARG(uap, envp)) != NULL ) { while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; envc++; } } dp = (char *)ALIGN(dp); sgap = STACKGAPLEN; if (stackgap_random != 0) sgap += (arc4random() * ALIGNBYTES) & (stackgap_random - 1); #ifdef MACHINE_STACK_GROWS_UP sgap = ALIGN(sgap); #endif /* Now check if args & environ fit into new stack */ len = ((argc + envc + 2 + pack.ep_emul->e_arglen) * sizeof(char *) + sizeof(long) + dp + sgap + sizeof(struct ps_strings)) - argp; len = ALIGN(len); /* make the stack "safely" aligned */ if (len > pack.ep_ssize) { /* in effect, compare to initial limit */ error = ENOMEM; goto bad; } /* adjust "active stack depth" for process VSZ */ pack.ep_ssize = len; /* maybe should go elsewhere, but... */ /* * Prepare vmspace for remapping. Note that uvmspace_exec can replace * p_vmspace! */ uvmspace_exec(p, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); vm = p->p_vmspace; /* Now map address space */ vm->vm_taddr = (char *)pack.ep_taddr; vm->vm_tsize = atop(round_page(pack.ep_tsize)); vm->vm_daddr = (char *)pack.ep_daddr; vm->vm_dsize = atop(round_page(pack.ep_dsize)); vm->vm_dused = 0; vm->vm_ssize = atop(round_page(pack.ep_ssize)); vm->vm_maxsaddr = (char *)pack.ep_maxsaddr; vm->vm_minsaddr = (char *)pack.ep_minsaddr; /* create the new process's VM space by running the vmcmds */ #ifdef DIAGNOSTIC if (pack.ep_vmcmds.evs_used == 0) panic("execve: no vmcmds"); #endif error = exec_process_vmcmds(p, &pack); /* if an error happened, deallocate and punt */ if (error) goto exec_abort; /* remember information about the process */ arginfo.ps_nargvstr = argc; arginfo.ps_nenvstr = envc; #ifdef MACHINE_STACK_GROWS_UP stack = (char *)USRSTACK + sizeof(arginfo) + sgap; slen = len - sizeof(arginfo) - sgap; #else stack = (char *)(USRSTACK - len); #endif /* Now copy argc, args & environ to new stack */ if (!(*pack.ep_emul->e_copyargs)(&pack, &arginfo, stack, argp)) goto exec_abort; /* copy out the process's ps_strings structure */ if (copyout(&arginfo, (char *)PS_STRINGS, sizeof(arginfo))) goto exec_abort; stopprofclock(p); /* stop profiling */ fdcloseexec(p); /* handle close on exec */ execsigs(p); /* reset caught signals */ TCB_SET(p, NULL); /* reset the TCB address */ /* set command name & other accounting info */ len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN); bcopy(nid.ni_cnd.cn_nameptr, p->p_comm, len); p->p_comm[len] = 0; p->p_acflag &= ~AFORK; /* record proc's vnode, for use by procfs and others */ if (p->p_textvp) vrele(p->p_textvp); vref(pack.ep_vp); p->p_textvp = pack.ep_vp; atomic_setbits_int(&pr->ps_flags, PS_EXEC); if (pr->ps_flags & PS_PPWAIT) { atomic_clearbits_int(&pr->ps_flags, PS_PPWAIT); atomic_clearbits_int(&pr->ps_pptr->ps_flags, PS_ISPWAIT); wakeup(pr->ps_pptr); } /* * If process does execve() while it has a mismatched real, * effective, or saved uid/gid, we set PS_SUGIDEXEC. */ if (p->p_ucred->cr_uid != p->p_cred->p_ruid || p->p_ucred->cr_uid != p->p_cred->p_svuid || p->p_ucred->cr_gid != p->p_cred->p_rgid || p->p_ucred->cr_gid != p->p_cred->p_svgid) atomic_setbits_int(&pr->ps_flags, PS_SUGIDEXEC); else atomic_clearbits_int(&pr->ps_flags, PS_SUGIDEXEC); /* * deal with set[ug]id. * MNT_NOEXEC has already been used to disable s[ug]id. */ if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) { int i; atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC); #ifdef KTRACE /* * If process is being ktraced, turn off - unless * root set it. */ if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) { p->p_traceflag = 0; ktrsettracevnode(p, NULL); } #endif p->p_ucred = crcopy(cred); if (attr.va_mode & VSUID) p->p_ucred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) p->p_ucred->cr_gid = attr.va_gid; /* * For set[ug]id processes, a few caveats apply to * stdin, stdout, and stderr. */ for (i = 0; i < 3; i++) { struct file *fp = NULL; /* * NOTE - This will never return NULL because of * immature fds. The file descriptor table is not * shared because we're suid. */ fp = fd_getfile(p->p_fd, i); #ifdef PROCFS /* * Close descriptors that are writing to procfs. */ if (fp && fp->f_type == DTYPE_VNODE && ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS && (fp->f_flag & FWRITE)) { fdrelease(p, i); fp = NULL; } #endif /* * Ensure that stdin, stdout, and stderr are already * allocated. We do not want userland to accidentally * allocate descriptors in this range which has implied * meaning to libc. */ if (fp == NULL) { short flags = FREAD | (i == 0 ? 0 : FWRITE); struct vnode *vp; int indx; if ((error = falloc(p, &fp, &indx)) != 0) goto exec_abort; #ifdef DIAGNOSTIC if (indx != i) panic("sys_execve: falloc indx != i"); #endif if ((error = cdevvp(getnulldev(), &vp)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); goto exec_abort; } if ((error = VOP_OPEN(vp, flags, p->p_ucred, p)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); vrele(vp); goto exec_abort; } if (flags & FWRITE) vp->v_writecount++; fp->f_flag = flags; fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; FILE_SET_MATURE(fp); } } } else atomic_clearbits_int(&pr->ps_flags, PS_SUGID); p->p_cred->p_svuid = p->p_ucred->cr_uid; p->p_cred->p_svgid = p->p_ucred->cr_gid; if (pr->ps_flags & PS_SUGIDEXEC) { int i, s = splclock(); timeout_del(&p->p_realit_to); timerclear(&p->p_realtimer.it_interval); timerclear(&p->p_realtimer.it_value); for (i = 0; i < sizeof(p->p_stats->p_timer) / sizeof(p->p_stats->p_timer[0]); i++) { timerclear(&p->p_stats->p_timer[i].it_interval); timerclear(&p->p_stats->p_timer[i].it_value); } splx(s); } uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); /* * notify others that we exec'd */ KNOTE(&pr->ps_klist, NOTE_EXEC); /* setup new registers and do misc. setup. */ if (pack.ep_emul->e_fixup != NULL) { if ((*pack.ep_emul->e_fixup)(p, &pack) != 0) goto free_pack_abort; } #ifdef MACHINE_STACK_GROWS_UP (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack + slen, retval); #else (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack, retval); #endif /* map the process's signal trampoline code */ if (exec_sigcode_map(p, pack.ep_emul)) goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ if (exec_md_map(p, &pack)) goto free_pack_abort; #endif if (p->p_flag & P_TRACED) psignal(p, SIGTRAP); free(pack.ep_hdr, M_EXEC); /* * Call emulation specific exec hook. This can setup per-process * p->p_emuldata or do any other per-process stuff an emulation needs. * * If we are executing process of different emulation than the * original forked process, call e_proc_exit() of the old emulation * first, then e_proc_exec() of new emulation. If the emulation is * same, the exec hook code should deallocate any old emulation * resources held previously by this process. */ if (p->p_emul && p->p_emul->e_proc_exit && p->p_emul != pack.ep_emul) (*p->p_emul->e_proc_exit)(p); p->p_descfd = 255; if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255) p->p_descfd = pack.ep_fd; /* * Call exec hook. Emulation code may NOT store reference to anything * from &pack. */ if (pack.ep_emul->e_proc_exec) (*pack.ep_emul->e_proc_exec)(p, &pack); /* update p_emul, the old value is no longer needed */ p->p_emul = pack.ep_emul; #ifdef KTRACE if (KTRPOINT(p, KTR_EMUL)) ktremul(p, p->p_emul->e_name); #endif atomic_clearbits_int(&p->p_flag, P_INEXEC); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE) && wassugid && !ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC)) systrace_execve1(pathbuf, p); #endif if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (0); bad: /* free the vmspace-creation commands, and release their references */ kill_vmcmds(&pack.ep_vmcmds); /* kill any opened file descriptor, if necessary */ if (pack.ep_flags & EXEC_HASFD) { pack.ep_flags &= ~EXEC_HASFD; (void) fdrelease(p, pack.ep_fd); } if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP); /* close and put the exec'd file */ vn_close(pack.ep_vp, FREAD, cred, p); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS); freehdr: free(pack.ep_hdr, M_EXEC); #if NSYSTRACE > 0 clrflag: #endif atomic_clearbits_int(&p->p_flag, P_INEXEC); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (error); exec_abort: /* * the old process doesn't exist anymore. exit gracefully. * get rid of the (new) address space we have created, if any, get rid * of our namei data and vnode, and exit noting failure */ uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS); free_pack_abort: free(pack.ep_hdr, M_EXEC); exit1(p, W_EXITCODE(0, SIGABRT), EXIT_NORMAL); /* NOTREACHED */ atomic_clearbits_int(&p->p_flag, P_INEXEC); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (0); }
/* * VFS Operations. * * mount system call */ int ext2fs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = NULL; struct m_ext2fs *fs; char fname[MNAMELEN]; char fspec[MNAMELEN]; int error, flags; mode_t accessmode; error = copyin(data, &args, sizeof(struct ufs_args)); if (error) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_e2fs; if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ext2fs_flushfiles(mp, flags, p); if (error == 0 && ext2fs_cgupdate(ump, MNT_WAIT) == 0 && (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) { fs->e2fs.e2fs_state = E2FS_ISCLEAN; (void)ext2fs_sbupdate(ump, MNT_WAIT); } if (error) return (error); fs->e2fs_ronly = 1; } if (mp->mnt_flag & MNT_RELOAD) { error = ext2fs_reload(mp, ndp->ni_cnd.cn_cred, p); if (error) return (error); } if (fs->e2fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (suser(p, 0) != 0) { devvp = ump->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); VOP_UNLOCK(devvp, p); if (error) return (error); } fs->e2fs_ronly = 0; if (fs->e2fs.e2fs_state == E2FS_ISCLEAN) fs->e2fs.e2fs_state = 0; else fs->e2fs.e2fs_state = E2FS_ERRORS; fs->e2fs_fmod = 1; } if (args.fspec == NULL) { /* * Process export requests. */ return (vfs_export(mp, &ump->um_export, &args.export_info)); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = copyinstr(args.fspec, fspec, sizeof(fspec), NULL); if (error) goto error; if (disk_map(fspec, fname, MNAMELEN, DM_OPENBLCK) == -1) memcpy(fname, fspec, sizeof(fname)); NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p); if ((error = namei(ndp)) != 0) goto error; devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { error = ENOTBLK; goto error_devvp; } if (major(devvp->v_rdev) >= nblkdev) { error = ENXIO; goto error_devvp; } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (suser(p, 0) != 0) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); VOP_UNLOCK(devvp, p); if (error) goto error_devvp; } if ((mp->mnt_flag & MNT_UPDATE) == 0) error = ext2fs_mountfs(devvp, mp, p); else { if (devvp != ump->um_devvp) error = EINVAL; /* XXX needs translation */ else vrele(devvp); } if (error) goto error_devvp; ump = VFSTOUFS(mp); fs = ump->um_e2fs; memset(fs->e2fs_fsmnt, 0, sizeof(fs->e2fs_fsmnt)); strlcpy(fs->e2fs_fsmnt, path, sizeof(fs->e2fs_fsmnt)); if (fs->e2fs.e2fs_rev > E2FS_REV0) { memset(fs->e2fs.e2fs_fsmnt, 0, sizeof(fs->e2fs.e2fs_fsmnt)); strlcpy(fs->e2fs.e2fs_fsmnt, mp->mnt_stat.f_mntonname, sizeof(fs->e2fs.e2fs_fsmnt)); } memcpy(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt, MNAMELEN); memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN); memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); memcpy(&mp->mnt_stat.mount_info.ufs_args, &args, sizeof(args)); if (fs->e2fs_fmod != 0) { /* XXX */ fs->e2fs_fmod = 0; if (fs->e2fs.e2fs_state == 0) fs->e2fs.e2fs_wtime = time_second; else printf("%s: file system not clean; please fsck(8)\n", mp->mnt_stat.f_mntfromname); ext2fs_cgupdate(ump, MNT_WAIT); } goto success; error_devvp: /* Error with devvp held. */ vrele(devvp); error: /* Error with no state to backout. */ success: return (error); }
int svr4_32_sys_systeminfo(struct lwp *l, const struct svr4_32_sys_systeminfo_args *uap, register_t *retval) { const char *str = NULL; int name[2]; int error; size_t len; char buf[256]; u_int rlen = SCARG(uap, len); switch (SCARG(uap, what)) { case SVR4_SI_SYSNAME: str = ostype; break; case SVR4_SI_HOSTNAME: str = hostname; break; case SVR4_SI_RELEASE: str = osrelease; break; case SVR4_SI_VERSION: str = version; break; case SVR4_SI_MACHINE: str = "sun4m"; /* Lie, pretend we are 4m */ break; case SVR4_SI_ARCHITECTURE: #if defined(__sparc__) str = "sparc"; #else str = machine_arch; #endif break; case SVR4_SI_ISALIST: #if defined(__sparc__) str = "sparcv8 sparcv8-fsmuld sparcv7 sparc"; #elif defined(__i386__) str = "i386"; #else str = "unknown"; #endif break; case SVR4_SI_HW_SERIAL: snprintf(buf, sizeof(buf), "%lu", hostid); str = buf; break; case SVR4_SI_HW_PROVIDER: str = ostype; break; case SVR4_SI_SRPC_DOMAIN: str = domainname; break; case SVR4_SI_PLATFORM: #if defined(__i386__) str = "i86pc"; #elif defined(__sparc__) { extern char machine_model[]; str = machine_model; } #else str = "unknown"; #endif break; case SVR4_SI_KERB_REALM: str = "unsupported"; break; case SVR4_SI_SET_HOSTNAME: name[1] = KERN_HOSTNAME; break; case SVR4_SI_SET_SRPC_DOMAIN: name[1] = KERN_DOMAINNAME; break; case SVR4_SI_SET_KERB_REALM: return 0; default: DPRINTF(("Bad systeminfo command %d\n", SCARG(uap, what))); return ENOSYS; } if (str) { len = strlen(str) + 1; if (len < rlen) rlen = len; if (SCARG_P32(uap, buf)) { error = copyout(str, SCARG_P32(uap, buf), rlen); if (error) return error; if (rlen > 0) { /* make sure we are NULL terminated */ buf[0] = '\0'; error = copyout(buf, &(((char *) SCARG_P32(uap, buf))[rlen - 1]), 1); } } else error = 0; } else { error = copyinstr(SCARG_P32(uap, buf), buf, sizeof(buf), &len); if (error) return error; name[0] = CTL_KERN; error = old_sysctl(&name[0], 1, 0, 0, buf, len, NULL); } *retval = len; return error; }
int afs_mount(struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, CTX_TYPE ctx) #endif { /* ndp contains the mounted-from device. Just ignore it. * we also don't care about our proc struct. */ size_t size; int error; #ifdef AFS_DARWIN80_ENV struct vfsioattr ioattr; /* vfs_statfs advertised as RO, but isn't */ /* new api will be needed to initialize this information (nfs needs to set mntfromname too) */ #endif STATFS_TYPE *mnt_stat = vfs_statfs(mp); if (vfs_isupdate(mp)) return EINVAL; AFS_GLOCK(); AFS_STATCNT(afs_mount); if (data == 0 && afs_globalVFS) { /* Don't allow remounts. */ AFS_GUNLOCK(); return (EBUSY); } afs_globalVFS = mp; #ifdef AFS_DARWIN80_ENV vfs_ioattr(mp, &ioattr); ioattr.io_devblocksize = (16 * 32768); vfs_setioattr(mp, &ioattr); /* f_iosize is handled in VFS_GETATTR */ #else mp->vfs_bsize = 8192; mp->mnt_stat.f_iosize = 8192; #endif vfs_getnewfsid(mp); #ifndef AFS_DARWIN80_ENV (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); memset(mp->mnt_stat.f_mntonname + size, 0, MNAMELEN - size); #endif memset(mnt_stat->f_mntfromname, 0, MNAMELEN); if (data == 0) { strcpy(mnt_stat->f_mntfromname, "AFS"); /* null terminated string "AFS" will fit, just leave it be. */ vfs_setfsprivate(mp, NULL); } else { struct VenusFid *rootFid = NULL; struct volume *tvp; char volName[MNAMELEN]; (void)copyinstr(data, volName, MNAMELEN - 1, &size); memset(volName + size, 0, MNAMELEN - size); if (volName[0] == 0) { strcpy(mnt_stat->f_mntfromname, "AFS"); vfs_setfsprivate(mp, &afs_rootFid); } else { struct cell *localcell = afs_GetPrimaryCell(READ_LOCK); if (localcell == NULL) { AFS_GUNLOCK(); return ENODEV; } /* Set the volume identifier to "AFS:volume.name" */ snprintf(mnt_stat->f_mntfromname, MNAMELEN - 1, "AFS:%s", volName); tvp = afs_GetVolumeByName(volName, localcell->cellNum, 1, (struct vrequest *)0, READ_LOCK); if (tvp) { int volid = (tvp->roVol ? tvp->roVol : tvp->volume); MALLOC(rootFid, struct VenusFid *, sizeof(*rootFid), M_UFSMNT, M_WAITOK); rootFid->Cell = localcell->cellNum; rootFid->Fid.Volume = volid; rootFid->Fid.Vnode = 1; rootFid->Fid.Unique = 1; } else { AFS_GUNLOCK(); return ENODEV; } vfs_setfsprivate(mp, &rootFid); } }
/* System calls. */ int sys_shm_open(struct thread *td, struct shm_open_args *uap) { struct filedesc *fdp; struct shmfd *shmfd; struct file *fp; char *path; Fnv32_t fnv; mode_t cmode; int fd, error; #ifdef CAPABILITY_MODE /* * shm_open(2) is only allowed for anonymous objects. */ if (IN_CAPABILITY_MODE(td) && (uap->path != SHM_ANON)) return (ECAPMODE); #endif if ((uap->flags & O_ACCMODE) != O_RDONLY && (uap->flags & O_ACCMODE) != O_RDWR) return (EINVAL); if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) return (EINVAL); fdp = td->td_proc->p_fd; cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS; error = falloc(td, &fp, &fd, O_CLOEXEC); if (error) return (error); /* A SHM_ANON path pointer creates an anonymous object. */ if (uap->path == SHM_ANON) { /* A read-only anonymous object is pointless. */ if ((uap->flags & O_ACCMODE) == O_RDONLY) { fdclose(fdp, fp, fd, td); fdrop(fp, td); return (EINVAL); } shmfd = shm_alloc(td->td_ucred, cmode); } else { path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); error = copyinstr(uap->path, path, MAXPATHLEN, NULL); #ifdef KTRACE if (error == 0 && KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(path); #endif /* Require paths to start with a '/' character. */ if (error == 0 && path[0] != '/') error = EINVAL; if (error) { fdclose(fdp, fp, fd, td); fdrop(fp, td); free(path, M_SHMFD); return (error); } fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&shm_dict_lock); shmfd = shm_lookup(path, fnv); if (shmfd == NULL) { /* Object does not yet exist, create it if requested. */ if (uap->flags & O_CREAT) { #ifdef MAC error = mac_posixshm_check_create(td->td_ucred, path); if (error == 0) { #endif shmfd = shm_alloc(td->td_ucred, cmode); shm_insert(path, fnv, shmfd); #ifdef MAC } #endif } else { free(path, M_SHMFD); error = ENOENT; } } else { /* * Object already exists, obtain a new * reference if requested and permitted. */ free(path, M_SHMFD); if ((uap->flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) error = EEXIST; else { #ifdef MAC error = mac_posixshm_check_open(td->td_ucred, shmfd, FFLAGS(uap->flags & O_ACCMODE)); if (error == 0) #endif error = shm_access(shmfd, td->td_ucred, FFLAGS(uap->flags & O_ACCMODE)); } /* * Truncate the file back to zero length if * O_TRUNC was specified and the object was * opened with read/write. */ if (error == 0 && (uap->flags & (O_ACCMODE | O_TRUNC)) == (O_RDWR | O_TRUNC)) { #ifdef MAC error = mac_posixshm_check_truncate( td->td_ucred, fp->f_cred, shmfd); if (error == 0) #endif shm_dotruncate(shmfd, 0); } if (error == 0) shm_hold(shmfd); } sx_xunlock(&shm_dict_lock); if (error) { fdclose(fdp, fp, fd, td); fdrop(fp, td); return (error); } } finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); td->td_retval[0] = fd; fdrop(fp, td); return (0); }
int sys_execv(const char *prog, char **userArgs) { struct vnode *v; vaddr_t entrypoint, stackptr; int res = 0; int length = 0; int index = 0; lock_acquire(allProcLock); char *progname; size_t size; progname = (char *) kmalloc(sizeof(char) * PATH_MAX); res = copyinstr((const_userptr_t) prog, progname, PATH_MAX, &size); char **args = (char **) kmalloc(sizeof(char **)); res = copyin((const_userptr_t)userArgs, args, sizeof(char **)); while (userArgs[index] != NULL) { args[index] = (char *) kmalloc(sizeof(char) * PATH_MAX); res = copyinstr((const_userptr_t) userArgs[index], args[index], PATH_MAX, &size); index++; } args[index] = NULL; index = 0; res = vfs_open(progname, O_RDONLY, 0, &v); struct addrspace *temp; temp = curproc->p_addrspace; if(curproc->p_addrspace != NULL){ as_destroy(curproc->p_addrspace); curproc->p_addrspace = NULL; } KASSERT(curproc->p_addrspace == NULL); if((curproc->p_addrspace = as_create()) == NULL){ kfree(progname); kfree(args); vfs_close(v); return ENOMEM; } as_activate(); res = load_elf(v, &entrypoint); vfs_close(v); res = as_define_stack(curproc->p_addrspace, &stackptr); while(args[index] != NULL) { char * arg; length = strlen(args[index]) + 1; int orignalLength = length; if(length % 4 != 0) { length = length + (4 - length % 4); } arg = kmalloc(sizeof(length)); arg = kstrdup(args[index]); for(int i = 0; i < length; i++){ if(i >= orignalLength){ arg[i] = '\0'; } else{ arg[i] = args[index][i]; } } stackptr -= length; res = copyout((const void *) arg, (userptr_t) stackptr, (size_t) length); kfree(arg); args[index] = (char *) stackptr; index++; } if(args[index] == NULL){ stackptr -= 4 * sizeof(char); } for(int i = (index - 1); i >= 0; i--) { stackptr = stackptr - sizeof(char*); res = copyout((const void *) (args + i), (userptr_t) stackptr, sizeof(char*)); } kfree(progname); kfree(args); lock_release(allProcLock); enter_new_process(index, (userptr_t) stackptr, stackptr, entrypoint); return EINVAL; }
/*ARGSUSED*/ int dump_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rvalp) { uint64_t size; uint64_t dumpsize_in_pages; int error = 0; char *pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); vnode_t *vp; switch (cmd) { case DIOCGETDUMPSIZE: if (dump_conflags & DUMP_ALL) size = ptob((uint64_t)physmem) / DUMP_COMPRESS_RATIO; else { /* * We can't give a good answer for the DUMP_CURPROC * because we won't know which process to use until it * causes a panic. We'll therefore punt and give the * caller the size for the kernel. * * This kernel size equation takes care of the * boot time kernel footprint and also accounts * for availrmem changes due to user explicit locking. * Refer to common/vm/vm_page.c for an explanation * of these counters. */ dumpsize_in_pages = (physinstalled - obp_pages - availrmem - anon_segkp_pages_locked - k_anoninfo.ani_mem_resv - pages_locked - pages_claimed - pages_useclaim); /* * Protect against vm vagaries. */ if (dumpsize_in_pages > (uint64_t)physmem) dumpsize_in_pages = (uint64_t)physmem; size = ptob(dumpsize_in_pages) / DUMP_COMPRESS_RATIO; } if (copyout(&size, (void *)arg, sizeof (size)) < 0) error = EFAULT; break; case DIOCGETCONF: mutex_enter(&dump_lock); *rvalp = dump_conflags; if (dumpvp && !(dumpvp->v_flag & VISSWAP)) *rvalp |= DUMP_EXCL; mutex_exit(&dump_lock); break; case DIOCSETCONF: mutex_enter(&dump_lock); if (arg == DUMP_KERNEL || arg == DUMP_ALL || arg == DUMP_CURPROC) dump_conflags = arg; else error = EINVAL; mutex_exit(&dump_lock); break; case DIOCGETDEV: mutex_enter(&dump_lock); if (dumppath == NULL) { mutex_exit(&dump_lock); error = ENODEV; break; } (void) strcpy(pathbuf, dumppath); mutex_exit(&dump_lock); error = copyoutstr(pathbuf, (void *)arg, MAXPATHLEN, NULL); break; case DIOCSETDEV: case DIOCTRYDEV: if ((error = copyinstr((char *)arg, pathbuf, MAXPATHLEN, NULL)) != 0 || (error = lookupname(pathbuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) break; mutex_enter(&dump_lock); if (vp->v_type == VBLK) error = dumpinit(vp, pathbuf, cmd == DIOCTRYDEV); else error = ENOTBLK; mutex_exit(&dump_lock); VN_RELE(vp); break; case DIOCDUMP: mutex_enter(&dump_lock); if (dumpvp == NULL) error = ENODEV; else if (dumpvp->v_flag & VISSWAP) error = EBUSY; else dumpsys(); mutex_exit(&dump_lock); break; default: error = ENXIO; } kmem_free(pathbuf, MAXPATHLEN); return (error); }
int execv(const char *prog_name, char **argv) { if( prog_name == NULL || argv == NULL ) return EFAULT; int argc = 0; size_t actual; char progname[__NAME_MAX]; int err = copyinstr((const_userptr_t) prog_name, progname, __NAME_MAX, &actual); if(err != 0){ return EFAULT; } /*if ( progname == (const char *)0x40000000 || progname == (const char *)0x80000000 || argv == (char **)0x40000000 || argv == (char **)0x80000000) return EFAULT; if( strcmp(progname,"") ) return EFAULT; if( strcmp((const char*)*argv,"") ) return EINVAL; if( strcmp(progname,"\0") ) return EINVAL; if( strcmp((const char*)*argv,"\0") ) return EINVAL; if( strlen(progname) == 0 ) return EINVAL; if( strlen((const char*)*argv) == 0 ) return EINVAL;*/ int i; for(i=0 ; argv[i] != NULL ; i++){ if(argv == (char **)0x40000000 || argv == (char **)0x80000000) return EFAULT; } argc = i; struct vnode *v; vaddr_t entrypoint, stackptr; int result; //Copy arguments into Temporary Kernel buffer char *ktemp[argc]; int argvlen[argc]; for( int m=0; m <argc; m++) { int len = strlen(argv[m])+1; argvlen[m] = len; int len_padding = len + (4 - (len % 4)); ktemp[m] = kmalloc(len_padding); size_t *p; err = copyinstr((const_userptr_t)argv[m], ktemp[m], len, p); } /* Open the file. */ result = vfs_open((char*)progname, O_RDONLY, 0, &v); if (result) { return result; } /* We should be a new thread. */ //KASSERT(curthread->t_addrspace == NULL); /* Create a new address space. */ curthread->t_addrspace = as_create(); if (curthread->t_addrspace==NULL) { vfs_close(v); return ENOMEM; } /* Activate it. */ as_activate(curthread->t_addrspace); /* Load the executable. */ result = load_elf(v, &entrypoint); if (result) { /* thread_exit destroys curthread->t_addrspace */ vfs_close(v); return result; } /* Done with the file now. */ vfs_close(v); /* Define the user stack in the address space */ result = as_define_stack(curthread->t_addrspace, &stackptr); if (result) { /* thread_exit destroys curthread->t_addrspace */ return result; } // Load the arguments in user stack vaddr_t kargv[argc]; size_t len_from_top = 0; int arglen = 0, arglen_pad=0; if(argc > 0) { //kargv[argc]=0; for(int i=0 ; i < argc ; i++){ arglen = argvlen[i]; arglen_pad =arglen + (4- ((arglen)%4)); len_from_top = len_from_top + arglen_pad ; kargv[i] = stackptr - len_from_top; copyout(ktemp[i], (userptr_t) kargv[i], arglen_pad); kfree((void*)ktemp[i]); } stackptr = stackptr - len_from_top -(argc)*sizeof(vaddr_t); for(int i=0 ; i <argc ; i++){ copyout( &kargv[i], (userptr_t) stackptr, sizeof(vaddr_t)); stackptr = stackptr + sizeof(vaddr_t); } stackptr = stackptr - (argc)*sizeof(vaddr_t); /* Warp to user mode. */ enter_new_process( argc /*argc*/, (userptr_t) stackptr /*userspace addr of argv*/, stackptr, entrypoint); } /* enter_new_process does not return. */ panic("enter_new_process returned\n"); return EINVAL; }
/* ARGSUSED */ int vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) { int unit = DISKUNIT(dev); struct disklabel *lp; struct vnd_softc *sc; struct vnd_ioctl *vio; struct vnd_user *vnu; struct vattr vattr; struct nameidata nd; int error, part, pmask; DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n", dev, cmd, addr, flag, p, unit); error = suser(p, 0); if (error) return (error); if (unit >= numvnd) return (ENXIO); sc = &vnd_softc[unit]; vio = (struct vnd_ioctl *)addr; switch (cmd) { case VNDIOCSET: if (sc->sc_flags & VNF_INITED) return (EBUSY); /* Geometry eventually has to fit into label fields */ if (vio->vnd_secsize > UINT_MAX || vio->vnd_ntracks > UINT_MAX || vio->vnd_nsectors > UINT_MAX) return (EINVAL); if ((error = disk_lock(&sc->sc_dk)) != 0) return (error); if ((error = copyinstr(vio->vnd_file, sc->sc_file, sizeof(sc->sc_file), NULL))) { disk_unlock(&sc->sc_dk); return (error); } /* Set geometry for device. */ sc->sc_secsize = vio->vnd_secsize; sc->sc_ntracks = vio->vnd_ntracks; sc->sc_nsectors = vio->vnd_nsectors; /* * Open for read and write first. This lets vn_open() weed out * directories, sockets, etc. so we don't have to worry about * them. */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); sc->sc_flags &= ~VNF_READONLY; error = vn_open(&nd, FREAD|FWRITE, 0); if (error == EROFS) { sc->sc_flags |= VNF_READONLY; error = vn_open(&nd, FREAD, 0); } if (error) { disk_unlock(&sc->sc_dk); return (error); } if (nd.ni_vp->v_type == VBLK) sc->sc_size = vndbdevsize(nd.ni_vp, p); else { error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); if (error) { VOP_UNLOCK(nd.ni_vp, 0, p); vn_close(nd.ni_vp, VNDRW(sc), p->p_ucred, p); disk_unlock(&sc->sc_dk); return (error); } sc->sc_size = vattr.va_size / sc->sc_secsize; } VOP_UNLOCK(nd.ni_vp, 0, p); sc->sc_vp = nd.ni_vp; if ((error = vndsetcred(sc, p->p_ucred)) != 0) { (void) vn_close(nd.ni_vp, VNDRW(sc), p->p_ucred, p); disk_unlock(&sc->sc_dk); return (error); } if (vio->vnd_keylen > 0) { char key[BLF_MAXUTILIZED]; if (vio->vnd_keylen > sizeof(key)) vio->vnd_keylen = sizeof(key); if ((error = copyin(vio->vnd_key, key, vio->vnd_keylen)) != 0) { (void) vn_close(nd.ni_vp, VNDRW(sc), p->p_ucred, p); disk_unlock(&sc->sc_dk); return (error); } sc->sc_keyctx = malloc(sizeof(*sc->sc_keyctx), M_DEVBUF, M_WAITOK); blf_key(sc->sc_keyctx, key, vio->vnd_keylen); explicit_bzero(key, vio->vnd_keylen); } else sc->sc_keyctx = NULL; vio->vnd_size = sc->sc_size * sc->sc_secsize; sc->sc_flags |= VNF_INITED; DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n", sc->sc_vp, (unsigned long long)sc->sc_size); /* Attach the disk. */ sc->sc_dk.dk_name = sc->sc_dev.dv_xname; disk_attach(&sc->sc_dev, &sc->sc_dk); disk_unlock(&sc->sc_dk); break; case VNDIOCCLR: if ((sc->sc_flags & VNF_INITED) == 0) return (ENXIO); if ((error = disk_lock(&sc->sc_dk)) != 0) return (error); /* * Don't unconfigure if any other partitions are open * or if both the character and block flavors of this * partition are open. */ part = DISKPART(dev); pmask = (1 << part); if ((sc->sc_dk.dk_openmask & ~pmask) || ((sc->sc_dk.dk_bopenmask & pmask) && (sc->sc_dk.dk_copenmask & pmask))) { disk_unlock(&sc->sc_dk); return (EBUSY); } vndclear(sc); DNPRINTF(VDB_INIT, "vndioctl: CLRed\n"); /* Free crypto key */ if (sc->sc_keyctx) { explicit_bzero(sc->sc_keyctx, sizeof(*sc->sc_keyctx)); free(sc->sc_keyctx, M_DEVBUF, sizeof(*sc->sc_keyctx)); } /* Detach the disk. */ disk_detach(&sc->sc_dk); disk_unlock(&sc->sc_dk); break; case VNDIOCGET: vnu = (struct vnd_user *)addr; if (vnu->vnu_unit == -1) vnu->vnu_unit = unit; if (vnu->vnu_unit >= numvnd) return (ENXIO); if (vnu->vnu_unit < 0) return (EINVAL); sc = &vnd_softc[vnu->vnu_unit]; if (sc->sc_flags & VNF_INITED) { error = VOP_GETATTR(sc->sc_vp, &vattr, p->p_ucred, p); if (error) return (error); strlcpy(vnu->vnu_file, sc->sc_file, sizeof(vnu->vnu_file)); vnu->vnu_dev = vattr.va_fsid; vnu->vnu_ino = vattr.va_fileid; } else { vnu->vnu_dev = 0; vnu->vnu_ino = 0; } break; case DIOCRLDINFO: if ((sc->sc_flags & VNF_HAVELABEL) == 0) return (ENOTTY); lp = malloc(sizeof(*lp), M_TEMP, M_WAITOK); vndgetdisklabel(dev, sc, lp, 0); *(sc->sc_dk.dk_label) = *lp; free(lp, M_TEMP, sizeof(*lp)); return (0); case DIOCGPDINFO: if ((sc->sc_flags & VNF_HAVELABEL) == 0) return (ENOTTY); vndgetdisklabel(dev, sc, (struct disklabel *)addr, 1); return (0); case DIOCGDINFO: if ((sc->sc_flags & VNF_HAVELABEL) == 0) return (ENOTTY); *(struct disklabel *)addr = *(sc->sc_dk.dk_label); return (0); case DIOCGPART: if ((sc->sc_flags & VNF_HAVELABEL) == 0) return (ENOTTY); ((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label; ((struct partinfo *)addr)->part = &sc->sc_dk.dk_label->d_partitions[DISKPART(dev)]; return (0); case DIOCWDINFO: case DIOCSDINFO: if ((sc->sc_flags & VNF_HAVELABEL) == 0) return (ENOTTY); if ((flag & FWRITE) == 0) return (EBADF); if ((error = disk_lock(&sc->sc_dk)) != 0) return (error); error = setdisklabel(sc->sc_dk.dk_label, (struct disklabel *)addr, /* sc->sc_dk.dk_openmask */ 0); if (error == 0) { if (cmd == DIOCWDINFO) error = writedisklabel(DISKLABELDEV(dev), vndstrategy, sc->sc_dk.dk_label); } disk_unlock(&sc->sc_dk); return (error); default: return (ENOTTY); } return (0); }
/* Other helper routines. */ static int ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, unsigned int value, int flags, int compat32) { struct filedesc *fdp; struct ksem *ks; struct file *fp; char *path; Fnv32_t fnv; int error, fd; if (value > SEM_VALUE_MAX) return (EINVAL); fdp = td->td_proc->p_fd; mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS; error = falloc(td, &fp, &fd, O_CLOEXEC); if (error) { if (name == NULL) error = ENOSPC; return (error); } /* * Go ahead and copyout the file descriptor now. This is a bit * premature, but it is a lot easier to handle errors as opposed * to later when we've possibly created a new semaphore, etc. */ error = ksem_create_copyout_semid(td, semidp, fd, compat32); if (error) { fdclose(fdp, fp, fd, td); fdrop(fp, td); return (error); } if (name == NULL) { /* Create an anonymous semaphore. */ ks = ksem_alloc(td->td_ucred, mode, value); if (ks == NULL) error = ENOSPC; else ks->ks_flags |= KS_ANONYMOUS; } else { path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK); error = copyinstr(name, path, MAXPATHLEN, NULL); /* Require paths to start with a '/' character. */ if (error == 0 && path[0] != '/') error = EINVAL; if (error) { fdclose(fdp, fp, fd, td); fdrop(fp, td); free(path, M_KSEM); return (error); } fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&ksem_dict_lock); ks = ksem_lookup(path, fnv); if (ks == NULL) { /* Object does not exist, create it if requested. */ if (flags & O_CREAT) { ks = ksem_alloc(td->td_ucred, mode, value); if (ks == NULL) error = ENFILE; else { ksem_insert(path, fnv, ks); path = NULL; } } else error = ENOENT; } else { /* * Object already exists, obtain a new * reference if requested and permitted. */ if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) error = EEXIST; else { #ifdef MAC error = mac_posixsem_check_open(td->td_ucred, ks); if (error == 0) #endif error = ksem_access(ks, td->td_ucred); } if (error == 0) ksem_hold(ks); #ifdef INVARIANTS else ks = NULL; #endif } sx_xunlock(&ksem_dict_lock); if (path) free(path, M_KSEM); } if (error) { KASSERT(ks == NULL, ("ksem_create error with a ksem")); fdclose(fdp, fp, fd, td); fdrop(fp, td); return (error); } KASSERT(ks != NULL, ("ksem_create w/o a ksem")); finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); fdrop(fp, td); return (0); }
/* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .maxgrouplist = NFS_MAXGRPS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */ .hostname = NULL, /* args version 4 */ .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error, ret, has_nfs_args_opt; int has_addr_opt, has_fh_opt, has_hostname_opt; struct sockaddr *nam; struct vnode *vp; char hst[MNAMELEN]; size_t len; u_char nfh[NFSX_V3FHMAX]; char *opt; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; has_nfs_args_opt = 0; has_addr_opt = 0; has_fh_opt = 0; has_hostname_opt = 0; if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { error = nfs_mountroot(mp); goto out; } /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); if (error) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } if (vfs_getopt(mp->mnt_optnew, "dumbtimer", NULL, NULL) == 0) args.flags |= NFSMNT_DUMBTIMR; if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "deadthresh", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.deadthresh); if (ret != 1 || args.deadthresh <= 0) { vfs_mount_error(mp, "illegal deadthresh: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_DEADTHRESH; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "maxgroups", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.maxgrouplist); if (ret != 1 || args.maxgrouplist <= 0) { vfs_mount_error(mp, "illegal maxgroups: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_MAXGRPS; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { has_addr_opt = 1; if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { has_fh_opt = 1; } if (vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, NULL) == 0) { has_hostname_opt = 1; } if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a tranfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "newnfsreq" (truncated to "newnfsre" by procstat). */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(curthread->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change from or to * v3, switch lockd strategies or change cookie translation */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (!has_fh_opt) { error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) { goto out; } args.fh = nfh; } if (!has_hostname_opt) { error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); if (error) { goto out; } bzero(&hst[len], MNAMELEN - len); args.hostname = hst; } if (!has_addr_opt) { /* sockargs() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error) { goto out; } } } else if (has_addr_opt == 0) { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } error = mountnfs(&args, mp, nam, args.hostname, &vp, curthread->td_ucred, nametimeo, negnametimeo); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED); MNT_IUNLOCK(mp); } return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); }
void dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size) { copyinstr((void *)uaddr, (void *)kaddr, size); }
int procfs_docmdline(struct proc *curp, struct lwp *lp, struct pfsnode *pfs, struct uio *uio) { struct proc *p = lp->lwp_proc; char *ps; int error; char *buf, *bp; struct ps_strings pstr; char **ps_argvstr; int i; size_t bytes_left, done; size_t buflen; if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); /* * If we are using the ps/cmdline caching, use that. Otherwise * revert back to the old way which only implements full cmdline * for the currept process and just p->p_comm for all other * processes. * Note that if the argv is no longer available, we deliberately * don't fall back on p->p_comm or return an error: the authentic * Linux behaviour is to return zero-length in this case. */ if (p->p_args && (ps_argsopen || (CHECKIO(curp, p) && (p->p_flags & P_INEXEC) == 0 && !p_trespass(curp->p_ucred, p->p_ucred))) ) { bp = p->p_args->ar_args; buflen = p->p_args->ar_length; buf = NULL; } else if (p != curp) { bp = p->p_comm; buflen = MAXCOMLEN; buf = NULL; } else { buflen = 256; buf = kmalloc(buflen + 1, M_TEMP, M_WAITOK); bp = buf; ps = buf; error = copyin((void*)PS_STRINGS, &pstr, sizeof(pstr)); if (error) { kfree(buf, M_TEMP); return (error); } if (pstr.ps_nargvstr < 0) { kfree(buf, M_TEMP); return (EINVAL); } if (pstr.ps_nargvstr > ARG_MAX) { kfree(buf, M_TEMP); return (E2BIG); } ps_argvstr = kmalloc(pstr.ps_nargvstr * sizeof(char *), M_TEMP, M_WAITOK); error = copyin((void *)pstr.ps_argvstr, ps_argvstr, pstr.ps_nargvstr * sizeof(char *)); if (error) { kfree(ps_argvstr, M_TEMP); kfree(buf, M_TEMP); return (error); } bytes_left = buflen; for (i = 0; bytes_left && (i < pstr.ps_nargvstr); i++) { error = copyinstr(ps_argvstr[i], ps, bytes_left, &done); /* If too long or malformed, just truncate */ if (error) { error = 0; break; } ps += done; bytes_left -= done; } buflen = ps - buf; kfree(ps_argvstr, M_TEMP); } error = uiomove_frombuf(bp, buflen, uio); if (buf) kfree(buf, M_TEMP); return (error); }
/* * Called by nfssvc() for nfsds. Just loops around servicing rpc requests * until it is killed by a signal. */ int nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) { char principal[MAXHOSTNAMELEN + 5]; struct proc *p; int error = 0; bool_t ret2, ret3, ret4; error = copyinstr(args->principal, principal, sizeof (principal), NULL); if (error) goto out; /* * Only the first nfsd actually does any work. The RPC code * adds threads to it as needed. Any extra processes offered * by nfsd just exit. If nfsd is new enough, it will call us * once with a structure that specifies how many threads to * use. */ NFSD_LOCK(); if (newnfs_numnfsd == 0) { p = td->td_proc; PROC_LOCK(p); p->p_flag2 |= P2_AST_SU; PROC_UNLOCK(p); newnfs_numnfsd++; NFSD_UNLOCK(); /* An empty string implies AUTH_SYS only. */ if (principal[0] != '\0') { ret2 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); ret3 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); ret4 = rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER4); if (!ret2 || !ret3 || !ret4) printf("nfsd: can't register svc name\n"); } nfsrvd_pool->sp_minthreads = args->minthreads; nfsrvd_pool->sp_maxthreads = args->maxthreads; svc_run(nfsrvd_pool); if (principal[0] != '\0') { rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER4); } NFSD_LOCK(); newnfs_numnfsd--; nfsrvd_init(1); PROC_LOCK(p); p->p_flag2 &= ~P2_AST_SU; PROC_UNLOCK(p); } NFSD_UNLOCK(); out: NFSEXITCODE(error); return (error); }
/* * static long rctlsys_set(char *name, rctl_opaque_t *old_rblk, * rctl_opaque_t *new_rblk, int flags) * * Overview * rctlsys_set() is the implementation of the core login of setrctl(2), which * allows the establishment of resource control values. Flags may take on any * of three exclusive values: RCTL_INSERT, RCTL_DELETE, and RCTL_REPLACE. * RCTL_INSERT ignores old_rblk and inserts the value in the appropriate * position in the ordered sequence of resource control values. RCTL_DELETE * ignores old_rblk and deletes the first resource control value matching * (value, priority) in the given resource block. If no matching value is * found, -1 is returned and errno is set to ENOENT. Finally, in the case of * RCTL_REPLACE, old_rblk is used to match (value, priority); the matching * resource control value in the sequence is replaced with the contents of * new_rblk. Again, if no match is found, -1 is returned and errno is set to * ENOENT. * * rctlsys_set() causes a cursor test, which can reactivate resource controls * that have previously fired. */ static long rctlsys_set(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk, int flags) { rctl_val_t *nval; rctl_dict_entry_t *rde; rctl_opaque_t *nblk; rctl_hndl_t hndl; char *kname; size_t klen; long ret = 0; proc_t *pp = NULL; pid_t pid; int action = flags & (~RCTLSYS_ACTION_MASK); rctl_val_t *oval; rctl_val_t *rval1; rctl_val_t *rval2; rctl_val_t *tval; rctl_opaque_t *oblk; if (flags & (~RCTLSYS_MASK)) return (set_errno(EINVAL)); if (action != RCTL_INSERT && action != RCTL_DELETE && action != RCTL_REPLACE) return (set_errno(EINVAL)); if (new_rblk == NULL || name == NULL) return (set_errno(EFAULT)); kname = kmem_alloc(MAXPATHLEN, KM_SLEEP); if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) { kmem_free(kname, MAXPATHLEN); return (set_errno(EFAULT)); } if ((hndl = rctl_hndl_lookup(kname)) == -1) { kmem_free(kname, MAXPATHLEN); return (set_errno(EINVAL)); } kmem_free(kname, MAXPATHLEN); rde = rctl_dict_lookup_hndl(hndl); nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP); if (copyin(new_rblk, nblk, sizeof (rctl_opaque_t)) == -1) { kmem_free(nblk, sizeof (rctl_opaque_t)); return (set_errno(EFAULT)); } nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); rctlsys_rblk_xfrm(nblk, NULL, nval, RBX_FROM_BLK | RBX_VAL); if (rctl_invalid_value(rde, nval)) { kmem_free(nblk, sizeof (rctl_opaque_t)); kmem_cache_free(rctl_val_cache, nval); return (set_errno(EINVAL)); } /* allocate what we might need before potentially grabbing p_lock */ oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP); oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); rval1 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); rval2 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); if (nval->rcv_privilege == RCPRIV_BASIC) { if (flags & RCTL_USE_RECIPIENT_PID) { pid = nval->rcv_action_recip_pid; /* case for manipulating rctl values on other procs */ if (pid != curproc->p_pid) { /* cannot be other pid on process rctls */ if (rde->rcd_entity == RCENTITY_PROCESS) { ret = set_errno(EINVAL); goto rctlsys_out; } /* * must have privilege to manipulate controls * on other processes */ if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) { ret = set_errno(EACCES); goto rctlsys_out; } pid = nval->rcv_action_recip_pid; mutex_enter(&pidlock); pp = prfind(pid); if (!pp) { mutex_exit(&pidlock); ret = set_errno(ESRCH); goto rctlsys_out; } /* * idle or zombie procs have either not yet * set up their rctls or have already done * their rctl_set_tearoff's. */ if (pp->p_stat == SZOMB || pp->p_stat == SIDL) { mutex_exit(&pidlock); ret = set_errno(ESRCH); goto rctlsys_out; } /* * hold this pp's p_lock to ensure that * it does not do it's rctl_set_tearoff * If we did not do this, we could * potentially add rctls to the entity * with a recipient that is a process * that has exited. */ mutex_enter(&pp->p_lock); mutex_exit(&pidlock); /* * We know that curproc's task, project, * and zone pointers will not change * because functions that change them * call holdlwps(SHOLDFORK1) first. */ /* * verify that the found pp is in the * current task. If it is, then it * is also within the current project * and zone. */ if (rde->rcd_entity == RCENTITY_TASK && pp->p_task != curproc->p_task) { ret = set_errno(ESRCH); goto rctlsys_out; } ASSERT(pp->p_task->tk_proj == curproc->p_task->tk_proj); ASSERT(pp->p_zone == curproc->p_zone); nval->rcv_action_recipient = pp; nval->rcv_action_recip_pid = pid; } else { /* for manipulating rctl values on this proc */ mutex_enter(&curproc->p_lock); pp = curproc; nval->rcv_action_recipient = curproc; nval->rcv_action_recip_pid = curproc->p_pid; } } else { /* RCTL_USE_RECIPIENT_PID not set, use this proc */ mutex_enter(&curproc->p_lock); pp = curproc; nval->rcv_action_recipient = curproc; nval->rcv_action_recip_pid = curproc->p_pid; } } else { /* privileged controls have no recipient pid */ mutex_enter(&curproc->p_lock); pp = curproc; nval->rcv_action_recipient = NULL; nval->rcv_action_recip_pid = -1; } nval->rcv_firing_time = 0; if (action == RCTL_REPLACE) { if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) { ret = set_errno(EFAULT); goto rctlsys_out; } rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL); if (rctl_invalid_value(rde, oval)) { ret = set_errno(EINVAL); goto rctlsys_out; } if (oval->rcv_privilege == RCPRIV_BASIC) { if (!(flags & RCTL_USE_RECIPIENT_PID)) { oval->rcv_action_recipient = curproc; oval->rcv_action_recip_pid = curproc->p_pid; } } else { oval->rcv_action_recipient = NULL; oval->rcv_action_recip_pid = -1; } /* * Find the real value we're attempting to replace on the * sequence, rather than trusting the one delivered from * userland. */ if (ret = rctl_local_get(hndl, NULL, rval1, pp)) { (void) set_errno(ret); goto rctlsys_out; } do { if (rval1->rcv_privilege == RCPRIV_SYSTEM || rctl_val_cmp(oval, rval1, 0) == 0) break; tval = rval1; rval1 = rval2; rval2 = tval; } while (rctl_local_get(hndl, rval2, rval1, pp) == 0); if (rval1->rcv_privilege == RCPRIV_SYSTEM) { if (rctl_val_cmp(oval, rval1, 1) == 0) ret = set_errno(EPERM); else ret = set_errno(ESRCH); goto rctlsys_out; } bcopy(rval1, oval, sizeof (rctl_val_t)); /* * System controls are immutable. */ if (nval->rcv_privilege == RCPRIV_SYSTEM) { ret = set_errno(EPERM); goto rctlsys_out; } /* * Only privileged processes in the global zone can modify * privileged rctls of type RCENTITY_ZONE; replacing privileged * controls with basic ones are not allowed either. Lowering a * lowerable one might be OK for privileged processes in a * non-global zone, but lowerable rctls probably don't make * sense for zones (hence, not modifiable from within a zone). */ if (rde->rcd_entity == RCENTITY_ZONE && (nval->rcv_privilege == RCPRIV_PRIVILEGED || oval->rcv_privilege == RCPRIV_PRIVILEGED) && secpolicy_rctlsys(CRED(), B_TRUE) != 0) { ret = set_errno(EACCES); goto rctlsys_out; } /* * Must be privileged to replace a privileged control with * a basic one. */ if (oval->rcv_privilege == RCPRIV_PRIVILEGED && nval->rcv_privilege != RCPRIV_PRIVILEGED && secpolicy_rctlsys(CRED(), B_FALSE) != 0) { ret = set_errno(EACCES); goto rctlsys_out; } /* * Must have lowerable global property for non-privileged * to lower the value of a privileged control; otherwise must * have sufficient privileges to modify privileged controls * at all. */ if (oval->rcv_privilege == RCPRIV_PRIVILEGED && nval->rcv_privilege == RCPRIV_PRIVILEGED && ((((rde->rcd_flagaction & RCTL_GLOBAL_LOWERABLE) == 0) || oval->rcv_flagaction != nval->rcv_flagaction || oval->rcv_action_signal != nval->rcv_action_signal || oval->rcv_value < nval->rcv_value)) && secpolicy_rctlsys(CRED(), B_FALSE) != 0) { ret = set_errno(EACCES); goto rctlsys_out; } if (ret = rctl_local_replace(hndl, oval, nval, pp)) { (void) set_errno(ret); goto rctlsys_out; } /* ensure that nval is not freed */ nval = NULL; } else if (action == RCTL_INSERT) { /* * System controls are immutable. */ if (nval->rcv_privilege == RCPRIV_SYSTEM) { ret = set_errno(EPERM); goto rctlsys_out; } /* * Only privileged processes in the global zone may add * privileged zone.* rctls. Only privileged processes * may add other privileged rctls. */ if (nval->rcv_privilege == RCPRIV_PRIVILEGED) { if ((rde->rcd_entity == RCENTITY_ZONE && secpolicy_rctlsys(CRED(), B_TRUE) != 0) || (rde->rcd_entity != RCENTITY_ZONE && secpolicy_rctlsys(CRED(), B_FALSE) != 0)) { ret = set_errno(EACCES); goto rctlsys_out; } } /* * Only one basic control is allowed per rctl. * If a basic control is being inserted, delete * any other basic control. */ if ((nval->rcv_privilege == RCPRIV_BASIC) && (rctl_local_get(hndl, NULL, rval1, pp) == 0)) { do { if (rval1->rcv_privilege == RCPRIV_BASIC && rval1->rcv_action_recipient == curproc) { (void) rctl_local_delete(hndl, rval1, pp); if (rctl_local_get(hndl, NULL, rval1, pp) != 0) break; } tval = rval1; rval1 = rval2; rval2 = tval; } while (rctl_local_get(hndl, rval2, rval1, pp) == 0); } if (ret = rctl_local_insert(hndl, nval, pp)) { (void) set_errno(ret); goto rctlsys_out; } /* ensure that nval is not freed */ nval = NULL; } else { /* * RCTL_DELETE */ if (nval->rcv_privilege == RCPRIV_SYSTEM) { ret = set_errno(EPERM); goto rctlsys_out; } if (nval->rcv_privilege == RCPRIV_PRIVILEGED) { if ((rde->rcd_entity == RCENTITY_ZONE && secpolicy_rctlsys(CRED(), B_TRUE) != 0) || (rde->rcd_entity != RCENTITY_ZONE && secpolicy_rctlsys(CRED(), B_FALSE) != 0)) { ret = set_errno(EACCES); goto rctlsys_out; } } if (ret = rctl_local_delete(hndl, nval, pp)) { (void) set_errno(ret); goto rctlsys_out; } } rctlsys_out: if (pp) mutex_exit(&pp->p_lock); kmem_free(nblk, sizeof (rctl_opaque_t)); kmem_free(oblk, sizeof (rctl_opaque_t)); /* only free nval if we did not rctl_local_insert it */ if (nval) kmem_cache_free(rctl_val_cache, nval); kmem_cache_free(rctl_val_cache, oval); kmem_cache_free(rctl_val_cache, rval1); kmem_cache_free(rctl_val_cache, rval2); return (ret); }
/* ARGSUSED */ int mfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { struct vnode *devvp; struct mfs_args args; struct ufsmount *ump; struct fs *fs; struct mfsnode *mfsp; size_t size; int flags, error; error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args)); if (error) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ffs_flushfiles(mp, flags, p); if (error) return (error); } if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) fs->fs_ronly = 0; #ifdef EXPORTMFS if (args.fspec == 0) return (vfs_export(mp, &ump->um_export, &args.export_info)); #endif return (0); } error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp); if (error) return (error); devvp->v_type = VBLK; if (checkalias(devvp, makedev(255, mfs_minor), (struct mount *)0)) panic("mfs_mount: dup dev"); mfs_minor++; mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK); devvp->v_data = mfsp; mfsp->mfs_baseoff = args.base; mfsp->mfs_size = args.size; mfsp->mfs_vnode = devvp; mfsp->mfs_pid = p->p_pid; mfsp->mfs_buflist = (struct buf *)0; if ((error = ffs_mountfs(devvp, mp, p)) != 0) { mfsp->mfs_buflist = (struct buf *)-1; vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_fs; (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); bcopy(&args, &mp->mnt_stat.mount_info.mfs_args, sizeof(args)); return (0); }