// for creating, opening, or stating files, verify that the caller is permitted according to our ACLs // return 0 on success // return -EPERM if denied // return other -errno on error static int vdevfs_access_check( struct vdevfs* vdev, struct fskit_fuse_state* fs_state, char const* method_name, char const* path ) { int rc = 0; pid_t pid = 0; uid_t uid = 0; gid_t gid = 0; struct stat sb; struct pstat ps; memset( &sb, 0, sizeof(struct stat) ); sb.st_mode = 0777; memset( &ps, 0, sizeof(struct pstat) ); // stat the calling process pid = fskit_fuse_get_pid(); uid = fskit_fuse_get_uid( fs_state ); gid = fskit_fuse_get_gid( fs_state ); vdev_debug("%s('%s') from user %d group %d task %d\n", method_name, path, uid, gid, pid ); // see who's asking rc = pstat( pid, &ps, 0 ); if( rc != 0 ) { vdev_error("pstat(%d) rc = %d\n", pid, rc ); return -EIO; } // apply the ACLs on the stat buffer rc = vdev_acl_apply_all( vdev->config, vdev->acls, vdev->num_acls, path, &ps, uid, gid, &sb ); if( rc < 0 ) { vdev_error("vdev_acl_apply_all(%s, uid=%d, gid=%d, pid=%d) rc = %d\n", path, uid, gid, pid, rc ); return -EIO; } // omit entirely? if( rc == 0 || (sb.st_mode & 0777) == 0 ) { // filter vdev_debug("DENY '%s'\n", path ); return -EPERM; } else { // accept! return 0; } }
// print a uevent, either with debugging or error loglevels static int vdev_linux_log_uevent( char const* uevent_buf, size_t uevent_buf_len, bool debug ) { for( unsigned int i = 0; i < uevent_buf_len; ) { if( debug ) { vdev_debug("uevent '%s'\n", uevent_buf + i ); } else { vdev_error("uevent '%s'\n", uevent_buf + i ); } i += strlen(uevent_buf + i) + 1; } return 0; }
// initialize the filesystem front-end // call after vdev_init // return 0 on success // return -ENOMEM on OOM // return negative on error int vdevfs_init( struct vdevfs* vdev, int argc, char** argv ) { int rc = 0; int rh = 0; struct fskit_core* core = NULL; int fuse_argc = 0; char** fuse_argv = NULL; int dirfd = 0; // library setup vdev_setup_global(); struct fskit_fuse_state* fs = VDEV_CALLOC( struct fskit_fuse_state, 1 ); if( fs == NULL ) { return -ENOMEM; } fuse_argv = VDEV_CALLOC( char*, argc + 5 ); if( fuse_argv == NULL ) { free( fs ); return -ENOMEM; } // load config vdev->config = VDEV_CALLOC( struct vdev_config, 1 ); if( vdev->config == NULL ) { free( fs ); free( fuse_argv ); return -ENOMEM; } // init config rc = vdev_config_init( vdev->config ); if( rc != 0 ) { vdev_error("vdev_config_init rc = %d\n", rc ); vdevfs_shutdown( vdev ); free( fs ); free( fuse_argv ); return rc; } // parse opts rc = vdev_config_load_from_args( vdev->config, argc, argv, &fuse_argc, fuse_argv ); if( rc != 0 ) { vdev_error("vdev_opts_parse rc = %d\n", rc ); vdev_config_usage( argv[0] ); free( fs ); free( fuse_argv ); vdevfs_shutdown( vdev ); return rc; } // get the mountpoint, but from FUSE if( vdev->config->mountpoint != NULL ) { free( vdev->config->mountpoint ); } rc = vdev_get_mountpoint( fuse_argc, fuse_argv, &vdev->config->mountpoint ); if( rc != 0 ) { vdev_error("vdev_get_mountpoint rc = %d\n", rc ); vdev_config_usage( argv[0] ); free( fs ); free( fuse_argv ); return rc; } vdev_set_debug_level( vdev->config->debug_level ); vdev_set_error_level( vdev->config->error_level ); vdev_debug("Config file: %s\n", vdev->config->config_path ); rc = vdev_config_load( vdev->config->config_path, vdev->config ); if( rc != 0 ) { vdev_error("vdev_config_load('%s') rc = %d\n", vdev->config->config_path, rc ); vdevfs_shutdown( vdev ); free( fs ); free( fuse_argv ); return rc; } vdev_debug("vdev ACLs dir: %s\n", vdev->config->acls_dir ); // force -odev, since we'll create device nodes fuse_argv[fuse_argc] = (char*)vdev_fuse_odev; fuse_argc++; // force -oallow_other, since we'll want to expose this to everyone fuse_argv[fuse_argc] = (char*)vdev_fuse_allow_other; fuse_argc++; // force -ononempty, since we'll want to import the underlying filesystem fuse_argv[fuse_argc] = (char*)vdev_fuse_ononempty; fuse_argc++; vdev->mountpoint = vdev_strdup_or_null( vdev->config->mountpoint ); if( vdev->mountpoint == NULL ) { vdev_error("Failed to set mountpoint, config.mountpount = '%s'\n", vdev->config->mountpoint ); vdevfs_shutdown( vdev ); free( fuse_argv ); free( fs ); return -EINVAL; } else { vdev_debug("mountpoint: %s\n", vdev->mountpoint ); } vdev->argc = argc; vdev->argv = argv; vdev->fuse_argc = fuse_argc; vdev->fuse_argv = fuse_argv; fskit_set_debug_level( vdev->config->debug_level ); fskit_set_error_level( vdev->config->error_level ); // get mountpoint directory dirfd = open( vdev->mountpoint, O_DIRECTORY ); if( dirfd < 0 ) { rc = -errno; vdev_error("open('%s') rc = %d\n", vdev->mountpoint, rc ); free( fs ); vdevfs_shutdown( vdev ); return rc; } vdev->mountpoint_dirfd = dirfd; // set up fskit rc = fskit_fuse_init( fs, vdev ); if( rc != 0 ) { vdev_error("fskit_fuse_init rc = %d\n", rc ); free( fs ); vdevfs_shutdown( vdev ); return rc; } // load ACLs rc = vdev_acl_load_all( vdev->config->acls_dir, &vdev->acls, &vdev->num_acls ); if( rc != 0 ) { vdev_error("vdev_acl_load_all('%s') rc = %d\n", vdev->config->acls_dir, rc ); fskit_fuse_shutdown( fs, NULL ); free( fs ); vdevfs_shutdown( vdev ); return rc; } // make sure the fs can access its methods through the VFS fskit_fuse_setting_enable( fs, FSKIT_FUSE_SET_FS_ACCESS ); core = fskit_fuse_get_core( fs ); // add handlers. rh = fskit_route_readdir( core, FSKIT_ROUTE_ANY, vdevfs_readdir, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_readdir(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_stat( core, FSKIT_ROUTE_ANY, vdevfs_stat, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_stat(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_mknod( core, FSKIT_ROUTE_ANY, vdevfs_mknod, FSKIT_CONCURRENT ); if( rc < 0 ) { vdev_error("fskit_route_mknod(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_mkdir( core, FSKIT_ROUTE_ANY, vdevfs_mkdir, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_mkdir(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_create( core, FSKIT_ROUTE_ANY, vdevfs_create, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_create(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_open( core, FSKIT_ROUTE_ANY, vdevfs_open, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_open(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_read( core, FSKIT_ROUTE_ANY, vdevfs_read, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_read(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_write( core, FSKIT_ROUTE_ANY, vdevfs_write, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_write(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_close( core, FSKIT_ROUTE_ANY, vdevfs_close, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_close(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_sync( core, FSKIT_ROUTE_ANY, vdevfs_sync, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_sync(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } rh = fskit_route_detach( core, FSKIT_ROUTE_ANY, vdevfs_detach, FSKIT_CONCURRENT ); if( rh < 0 ) { vdev_error("fskit_route_detach(%s) rc = %d\n", FSKIT_ROUTE_ANY, rh ); goto vdev_route_fail; } vdev->fs = fs; vdev->close_rh = rh; // set the root to be owned by the effective UID and GID of user fskit_chown( core, "/", 0, 0, geteuid(), getegid() ); // import the underlying filesystem once we're mounted, but before taking requests. rc = fskit_fuse_postmount_callback( fs, vdevfs_dev_import, vdev ); if( rc != 0 ) { vdev_error("fskit_fuse_postmount_callback() rc = %d\n", rc ); vdev->fs = NULL; goto vdev_route_fail; } return 0; vdev_route_fail: fskit_fuse_shutdown( fs, NULL ); free( fs ); vdevfs_shutdown( vdev ); return rh; }
// readdir: equivocate about which devices exist, depending on who's asking // omit entries if the ACLs forbid them int vdevfs_readdir( struct fskit_core* core, struct fskit_match_group* grp, struct fskit_entry* fent, struct fskit_dir_entry** dirents, size_t num_dirents ) { int rc = 0; struct fskit_entry* child = NULL; // entries to omit in the listing vector<int> omitted_idx; pid_t pid = 0; uid_t uid = 0; gid_t gid = 0; struct vdevfs* vdev = (struct vdevfs*)fskit_core_get_user_data( core ); struct fskit_fuse_state* fs_state = fskit_fuse_get_state(); struct stat sb; struct pstat ps; char* child_path = NULL; pid = fskit_fuse_get_pid(); uid = fskit_fuse_get_uid( fs_state ); gid = fskit_fuse_get_gid( fs_state ); vdev_debug("vdevfs_readdir(%s, %zu) from user %d group %d task %d\n", grp->path, num_dirents, uid, gid, pid ); // see who's asking rc = pstat( pid, &ps, 0 ); if( rc != 0 ) { vdev_error("pstat(%d) rc = %d\n", pid, rc ); return -EIO; } for( unsigned int i = 0; i < num_dirents; i++ ) { // skip . and .. if( strcmp(dirents[i]->name, ".") == 0 || strcmp(dirents[i]->name, "..") == 0 ) { continue; } // find the associated fskit_entry child = fskit_dir_find_by_name( fent, dirents[i]->name ); if( child == NULL ) { // strange, shouldn't happen... continue; } fskit_entry_rlock( child ); // construct a stat buffer from what we actually need memset( &sb, 0, sizeof(struct stat) ); sb.st_uid = child->owner; sb.st_gid = child->group; sb.st_mode = fskit_fullmode( child->type, child->mode ); child_path = fskit_fullpath( grp->path, child->name, NULL ); if( child_path == NULL ) { // can't continue; OOM fskit_entry_unlock( child ); rc = -ENOMEM; break; } // filter it rc = vdev_acl_apply_all( vdev->config, vdev->acls, vdev->num_acls, child_path, &ps, uid, gid, &sb ); if( rc < 0 ) { vdev_error("vdev_acl_apply_all('%s', uid=%d, gid=%d, pid=%d) rc = %d\n", child_path, uid, gid, pid, rc ); rc = -EIO; } else if( rc == 0 || (sb.st_mode & 0777) == 0 ) { // omit this one vdev_debug("Filter '%s'\n", child->name ); omitted_idx.push_back( i ); rc = 0; } else { // success; matched rc = 0; } fskit_entry_unlock( child ); free( child_path ); // error? if( rc != 0 ) { break; } } // skip ACL'ed entries for( unsigned int i = 0; i < omitted_idx.size(); i++ ) { fskit_readdir_omit( dirents, omitted_idx[i] ); } return rc; }
// yield new devices int vdev_os_main( struct vdev_os_context* vos ) { int rc = 0; while( vos->running ) { // make a device request struct vdev_device_request* vreq = VDEV_CALLOC( struct vdev_device_request, 1 ); if( vreq == NULL ) { // OOM break; } // next device request rc = vdev_device_request_init( vreq, vos->state, VDEV_DEVICE_INVALID, NULL ); if( rc != 0 ) { if( rc == -EAGAIN ) { continue; } free( vreq ); vdev_error("vdev_device_request_init rc = %d\n", rc ); break; } // yield the next device rc = vdev_os_next_device( vreq, vos->os_cls ); if( rc != 0 ) { vdev_device_request_free( vreq ); free( vreq ); if( rc < 0 ) { vdev_error("vdev_os_next_device rc = %d\n", rc ); if( rc == -EAGAIN ) { // OS backend says try again continue; } else { // fatal error break; } } else { // exit on success rc = 0; break; } } vdev_debug("Next device: %p, type=%d path=%s major=%u minor=%u mode=%o\n", vreq, vreq->type, vreq->path, major(vreq->dev), minor(vreq->dev), vreq->mode ); /* struct sglib_vdev_params_iterator itr2; struct vdev_param_t* dp2 = NULL; printf("vreq %p: params:\n", vreq); for( dp2 = sglib_vdev_params_it_init_inorder( &itr2, vreq->params ); dp2 != NULL; dp2 = sglib_vdev_params_it_next( &itr2 ) ) { printf(" '%s' == '%s'\n", dp2->key, dp2->value ); } */ // post the event to the device work queue rc = vdev_device_request_enqueue( &vos->state->device_wq, vreq ); if( rc != 0 ) { vdev_device_request_free( vreq ); free( vreq ); vdev_error("vdev_device_request_add rc = %d\n", rc ); continue; } } return rc; }
// yield the next device event // return 0 on success // return 1 if there are no more devices // return -EAGAIN if vdev should try to get this device again // return -errno on failure to poll for devices or read the next device packet. int vdev_os_next_device( struct vdev_device_request* vreq, void* cls ) { int rc = 0; struct vdev_linux_context* ctx = (struct vdev_linux_context*)cls; char buf[VDEV_LINUX_NETLINK_BUF_MAX]; ssize_t len = 0; char cbuf[CMSG_SPACE(sizeof(struct ucred))]; struct cmsghdr *chdr = NULL; struct ucred *cred = NULL; struct msghdr hdr; struct iovec iov; struct sockaddr_nl cnls; pthread_mutex_lock( &ctx->initial_requests_lock ); // do we have initial requests? if( ctx->initial_requests != NULL ) { // next request struct vdev_device_request* req = ctx->initial_requests; // consume ctx->initial_requests = ctx->initial_requests->next; memcpy( vreq, req, sizeof(struct vdev_device_request) ); free( req ); pthread_mutex_unlock( &ctx->initial_requests_lock ); // was that the last of them? if( ctx->initial_requests == NULL ) { // tell vdevd that we've flushed all pending requests vdev_os_context_signal_flushed( ctx->os_ctx ); } return 0; } else if( ctx->os_ctx->state->once ) { // out of requests; die pthread_mutex_unlock( &ctx->initial_requests_lock ); return 1; } else { pthread_mutex_unlock( &ctx->initial_requests_lock ); } memset(&hdr, 0, sizeof(struct msghdr)); // next event (wait forever) // NOTE: this is a cancellation point! rc = poll( &ctx->pfd, 1, -1 ); if( rc < 0 ) { rc = -errno; if( rc == -EINTR ) { // try again return -EAGAIN; } vdev_error("FATAL: poll(%d) rc = %d\n", ctx->pfd.fd, rc ); return rc; } // get the event iov.iov_base = buf; iov.iov_len = VDEV_LINUX_NETLINK_BUF_MAX; hdr.msg_iov = &iov; hdr.msg_iovlen = 1; // get control-plane messages hdr.msg_control = cbuf; hdr.msg_controllen = sizeof(cbuf); hdr.msg_name = &cnls; hdr.msg_namelen = sizeof(cnls); // get the event len = recvmsg( ctx->pfd.fd, &hdr, 0 ); if( len < 0 ) { rc = -errno; vdev_error("FATAL: recvmsg(%d) rc = %d\n", ctx->pfd.fd, rc ); return rc; } // big enough? if( len < 32 || len >= VDEV_LINUX_NETLINK_BUF_MAX ) { vdev_error("Netlink message is %zd bytes; ignoring...\n", len ); return -EAGAIN; } // control message, for credentials chdr = CMSG_FIRSTHDR( &hdr ); if( chdr == NULL || chdr->cmsg_type != SCM_CREDENTIALS ) { vdev_error("%s", "Netlink message has no credentials\n"); return -EAGAIN; } // get the credentials cred = (struct ucred *)CMSG_DATA(chdr); // if not root, ignore if( cred->uid != 0 ) { vdev_error("Ignoring message from non-root ID %d\n", cred->uid ); return -EAGAIN; } // if udev, ignore if( memcmp( buf, VDEV_LINUX_NETLINK_UDEV_HEADER, VDEV_LINUX_NETLINK_UDEV_HEADER_LEN ) == 0 ) { // message from udev; ignore vdev_warn("%s", "Ignoring libudev message\n"); return -EAGAIN; } // kernel messages don't come from userspace if( cnls.nl_pid > 0 ) { // from userspace??? vdev_warn("Ignoring message from PID %d\n", (int)cnls.nl_pid ); return -EAGAIN; } // parse the event buffer vdev_debug("%p from netlink\n", vreq ); rc = vdev_linux_parse_request( ctx, vreq, buf, len ); if( rc != 0 ) { vdev_error("vdev_linux_parse_request rc = %d\n", rc ); return -EAGAIN; } return 0; }
// parse a uevent, and use the information to fill in a device request. // nlbuf must be a contiguous concatenation of null-terminated KEY=VALUE strings. // return 0 on success static int vdev_linux_parse_request( struct vdev_linux_context* ctx, struct vdev_device_request* vreq, char* nlbuf, ssize_t buflen ) { char* buf = nlbuf; char* key = NULL; char* value = NULL; int offset = 0; int rc = 0; unsigned int major = 0; unsigned int minor = 0; bool have_major = false; bool have_minor = false; mode_t dev_mode = 0; int line_count = 0; bool not_param = false; // if set to true, add as an OS-specific parameter to the vreq char* devpath = NULL; // sysfs devpath char* subsystem = NULL; // sysfs subsystem char* devname = (char*)VDEV_DEVICE_PATH_UNKNOWN; // DEVNAME from uevent vdev_device_request_t reqtype = VDEV_DEVICE_INVALID; vdev_debug("%p: uevent buffer\n", vreq ); vdev_linux_debug_uevent( nlbuf, buflen ); // sanity check: if the first line is $action@$devpath, then skip it (since the information // contained in the uevent will encode the very same bits of information) if( strchr(buf, '@') != NULL ) { // advance to the next line offset += strlen(buf) + 1; } // get key/value pairs while( offset < buflen ) { line_count++; not_param = false; rc = vdev_keyvalue_next( buf + offset, &key, &value ); if( rc < 0 ) { vdev_error("Invalid line %d (byte %d): '%s'\n", line_count, offset, buf + offset ); vdev_linux_error_uevent( nlbuf, buflen ); return -EINVAL; } offset += rc + 1; // count the \0 at the end rc = 0; // is this the action to take? if( strcmp(key, "ACTION") == 0 ) { reqtype = vdev_linux_parse_device_request_type( value ); if( reqtype == VDEV_DEVICE_INVALID ) { vdev_error("Invalid ACTION '%s'\n", value ); vdev_linux_error_uevent( nlbuf, buflen ); return -EINVAL; } vdev_device_request_set_type( vreq, reqtype ); not_param = true; } // is this the sysfs device path? else if( strcmp(key, "DEVPATH") == 0 ) { devpath = value; } // is this the devname? else if( strcmp(key, "DEVNAME") == 0 ) { devname = value; } // subsystem given? else if( strcmp(key, "SUBSYSTEM") == 0 ) { subsystem = vdev_strdup_or_null( value ); } // is this the major device number? else if( strcmp(key, "MAJOR") == 0 && !have_major ) { char* tmp = NULL; major = (int)strtol( value, &tmp, 10 ); if( *tmp != '\0' ) { vdev_error("Invalid 'MAJOR' value '%s'\n", value); vdev_linux_error_uevent( nlbuf, buflen ); return -EINVAL; } have_major = true; not_param = true; } // is this the minor device number? else if( strcmp(key, "MINOR") == 0 && !have_minor ) { char* tmp = NULL; minor = (int)strtol( value, &tmp, 10 ) ; if( *tmp != '\0' ) { vdev_error("Invalid 'MINOR' value '%s'\n", value ); vdev_linux_error_uevent( nlbuf, buflen ); return -EINVAL; } have_minor = true; not_param = true; } if( !not_param ) { // add to OS params rc = vdev_device_request_add_param( vreq, key, value ); if( rc != 0 ) { // could be OOM if( subsystem != NULL ) { free( subsystem ); } return rc; } } } if( reqtype == VDEV_DEVICE_INVALID ) { vdev_error("%s", "No ACTION given\n"); vdev_linux_error_uevent( nlbuf, buflen ); if( subsystem != NULL ) { free( subsystem ); } return -EINVAL; } if( (!have_major && have_minor) || (have_major && !have_minor) ) { vdev_error("Missing device information: major=%d, minor=%d\n", have_major, have_minor ); vdev_linux_error_uevent( nlbuf, buflen ); if( subsystem != NULL ) { free( subsystem ); } return -EINVAL; } if( have_major && have_minor ) { // explicit major and minor device numbers given vdev_device_request_set_dev( vreq, makedev(major, minor) ); } if( devname != NULL ) { // use this as the device's path vdev_device_request_set_path( vreq, devname ); } if( devpath != NULL ) { // get any remaining information from sysfs // check major/minor? if( !have_major || !have_minor ) { // see if we have major/minor device numbers for this device... rc = vdev_linux_sysfs_read_dev_nums( ctx, devpath, &major, &minor ); if( rc == 0 ) { // yup! vdev_device_request_set_dev( vreq, makedev(major, minor) ); have_major = true; have_minor = true; } else { // it's okay to not have dev numbers rc = 0; } } // subsystem? if( subsystem == NULL ) { // see if we have a subsystem rc = vdev_linux_sysfs_read_subsystem( ctx, devpath, &subsystem ); if( rc == 0 ) { // yup! rc = vdev_device_request_add_param( vreq, "SUBSYSTEM", subsystem ); if( rc != 0 ) { // OOM free( subsystem ); return rc; } } else if( rc != -ENOMEM ) { // this is weird... vdev_warn("no subsystem found for '%s'\n", devpath ); rc = 0; } } } if( have_major && have_minor ) { if( subsystem != NULL && strcasecmp(subsystem, "block") == 0 ) { // this is a block dev_mode = S_IFBLK; } else { // this is a character device--we have major/minor numbers dev_mode = S_IFCHR; } vdev_device_request_set_mode( vreq, dev_mode ); } vdev_debug("subsystem = '%s', have_major=%d, major = %u, have_minor=%d, minor = %u, mode = %o\n", subsystem, have_major, major, have_minor, minor, dev_mode ); if( subsystem != NULL ) { free( subsystem ); } // tell helpers where /sys is mounted rc = vdev_device_request_add_param( vreq, "SYSFS_MOUNTPOINT", ctx->sysfs_mountpoint ); if( rc != 0 ) { // OOM return rc; } return rc; }