/* Read in a filename from the context file, obtaining memory from __getname */ const char *__cr_getname(cr_errbuf_t *eb, struct file *filp, int null_ok) { char *name; int err; name = __getname(); if (!name) { CR_ERR_EB(eb, "Couldn't allocate buffer for file name."); err = -ENOMEM; goto out; } /* now read out the name */ err = cr_fgets(eb, name, PATH_MAX, filp); if (err < 0) { CR_ERR_EB(eb, "Bad read of filename."); goto out_free; } else if (err == 0) { if (!null_ok) err = -EIO; goto out_free; } return name; out_free: __putname(name); out: return (err < 0) ? ERR_PTR(err) : NULL; }
/* If the original file location doesn't work, try "/tmp/" */ struct file *cr_mkunlinked(cr_errbuf_t *eb, struct file *cr_filp, const char *name, int mode, int flags, loff_t size, unsigned long unlinked_id) { const char *tmpdir = "/tmp/"; /* XXX: Ick. Note trailing '/' is required */ struct file *filp; loff_t w; const int xmodes = (S_IRUSR | S_IWUSR); #if CRI_DEBUG filp = ERR_PTR(-EINVAL); if (!unlinked_id) { CR_ERR_EB(eb, "Zero 'unlinked_id' passed to %s", __FUNCTION__); goto out; } if ((mode&S_IFMT) && ((mode&S_IFMT) != S_IFREG)) { CR_ERR_EB(eb, "Bad mode %d passed to %s", mode, __FUNCTION__); goto out; } #endif /* make a new unlinked file, suitable for writting in the saved data */ filp = cr_filp_mknod(eb, name, mode|xmodes|S_IFREG, O_LARGEFILE|O_RDWR, unlinked_id); if (IS_ERR(filp)) { filp = cr_filp_mknod(eb, tmpdir, mode|xmodes|S_IFREG, O_LARGEFILE|O_RDWR, unlinked_id); } if (IS_ERR(filp)) { CR_ERR_EB(eb, "Failed to recreate unlinked file %s, err=%d.", name, (int)PTR_ERR(filp)); goto out; } /* and populate it with the saved data */ /* XXX: If/when we split the create from the populate, then we'll need to either * do an ftruncate() here, or else defer the llseek() until post-populate. */ w = cr_sendfile(eb, filp, cr_filp, NULL, size); if (w != size) { filp = ERR_PTR(w); if (w >= 0) filp = ERR_PTR(-EIO); goto out; } /* Now reopen with caller-requested flags, if different */ if (filp->f_flags != flags) { struct file *tmp = cr_filp_reopen(filp, flags); (void)filp_close(filp, current->files); filp = tmp; /* Even on error. */ } if (IS_ERR(filp)) goto out; /* If we forced S_I[RW]USR, then fix it so fstat() doesn't reveal any change. */ if ((mode & xmodes) != xmodes) { int err = cr_filp_chmod(filp, mode); if (err < 0) { filp_close(filp, current->files); filp = ERR_PTR(err); } } out: return filp; }
/* Create a new link to an existing (but potentially unlinked) dentry. * If the target link already exists we return the dentry, but if the target * exists and is not a link to the desired object, we return -EEXIST. * * NOTE: We once tried to do this via vfs_link(), rather than sys_link(). * That was "better" in the sense that it was able to link to unlinked * targets (which this cannot). However, that was not working over NFS * for reasons I never did figure out. -PHH */ struct dentry * cr_link(cr_errbuf_t *eb, struct path *old_path, const char *name) { struct nameidata nd; char *buf, *old_name; struct dentry *new_dentry = NULL; int retval; mm_segment_t oldfs; /* Lookup the path to the "old" file. * This is the part that prevents us from linking to an unlinked target. */ retval = -ENOMEM; buf = __getname(); if (!buf) goto out; old_name = cr_getpath(old_path, buf, PATH_MAX); if (IS_ERR(old_name)) { retval = PTR_ERR(old_name); goto out_free; } /* Now sys_link() */ oldfs = get_fs(); set_fs(KERNEL_DS); retval = sys_link(old_name, name); set_fs(oldfs); if (retval == -EEXIST) { /* Keep going, it may be the one we want */ } else if (retval < 0) { CR_ERR_EB(eb, "cr_link: sys_link(%s,%s) returned %d", old_name, name, retval); goto out_free; } /* Now get the dentry for the newly-created object. * YES, there is a potential race, but we check below that we have the right object. */ retval = path_lookup(name, LOOKUP_FOLLOW, &nd); if (retval < 0) { CR_ERR_EB(eb, "cr_link: path_lookup(%s) returned %d", name, retval); goto out_free; } new_dentry = dget(nd.nd_dentry); cr_path_release(&nd); /* Check that we have a link to the desired object. * Needed for sys_link() == -EEXIST and for the link-to-lookup race. */ if (new_dentry->d_inode != old_path->dentry->d_inode) { dput(new_dentry); retval = -EEXIST; goto out_free; } out_free: __putname(buf); out: return (retval < 0) ? ERR_PTR(retval) : new_dentry; }
/* * Copy the given number of bytes from one file to another. * Uses naive approach that should work for all types. * This is only used when a source file lacks a readpage method * (e.g. restart from a pipes or sockets). * XXX: Could we use non-blocking writes and double buffering? * * Note: Caller is responsible for checking count==0 or src_ppos==NULL. */ static loff_t cr_sendfile_buffered(cr_errbuf_t *eb, struct file *dst_filp, struct file *src_filp, loff_t *src_ppos, loff_t count) { const size_t maxsz = (4 << PAGE_SHIFT); mm_segment_t oldfs; loff_t retval = 0; loff_t bytes_left = count; char *buf; buf = vmalloc((count < maxsz) ? count : maxsz); retval = -ENOMEM; if (!buf) goto out_nobuf; retval = -EIO; oldfs = get_fs(); set_fs(KERNEL_DS); while (bytes_left) { ssize_t buffered; char *p = buf; /* Read as much as we can in a single call */ buffered = vfs_read(src_filp, p, (bytes_left < maxsz) ? bytes_left : maxsz, src_ppos); if (!buffered) goto out_eof; if (buffered < 0) { CR_ERR_EB(eb, "vfs_read returned %ld", (long int)buffered); retval = buffered; goto out_err; } /* Write as much as we read */ while (buffered) { ssize_t w = vfs_write(dst_filp, p, buffered, &dst_filp->f_pos); if (!w) goto out_eof; if (w < 0) { CR_ERR_EB(eb, "vfs_write returned %ld", (long int)w); retval = w; goto out_err; } bytes_left -= w; buffered -= w; p += w; } } out_eof: retval = count - bytes_left; out_err: set_fs(oldfs); vfree(buf); out_nobuf: return retval; }
/* * cr_anonymous_rename * * Rewrite a filename to an anonymous value * * len is strlen and size is buffer size */ static char * cr_anonymous_rename(cr_errbuf_t *eb, const char *in_buf, unsigned long id) { size_t len; char *out_buf, *p; len = strlen(in_buf); if (len >= PATH_MAX) { /* XXX: probably not what we want to do */ len = PATH_MAX - 1; } /* strdup() */ out_buf = __getname(); if (out_buf == NULL) { goto out; } memcpy(out_buf, in_buf, len+1); /* dirname() */ p = out_buf + len; while ((p != out_buf) && (*p != '/')) { --p; } ++p; len = p - out_buf; #if BITS_PER_LONG == 32 if (len > PATH_MAX - 20) { CR_ERR_EB(eb, "cr_anonymous_rename - unlinked name too long for renaming"); goto out_free; } sprintf(p, ".blcr_%04x.%08lx", (unsigned int)current->pid, id); #elif BITS_PER_LONG == 64 if (len > PATH_MAX - 24) { CR_ERR_EB(eb, "cr_anonymous_rename - unlinked name too long for renaming"); goto out_free; } sprintf(p, ".blcr_%04x.%016lx", (unsigned int)current->pid, id); #else #error "No value for BITS_PER_LONG" #endif return out_buf; out_free: __putname(out_buf); out: return NULL; }
/* Loops on short reads, but return -EIO if vfs_read() returns zero */ ssize_t cr_uread(cr_errbuf_t *eb, struct file *file, void *buf, size_t count) { ssize_t retval; ssize_t bytes_left = count; char *p = buf; while (bytes_left) { const ssize_t r = vfs_read(file, p, CR_TRIM_XFER(bytes_left), &file->f_pos); if (r <= 0) { CR_ERR_EB(eb, "vfs_read returned %ld", (long int)r); retval = r; if (!retval) retval = -EIO; /* Map zero -> EIO */ goto out; } bytes_left -= r; p += r; } retval = count; #if CRI_DEBUG if (cr_read_fault_rate) { unsigned int x; get_random_bytes(&x, sizeof(x)); if (!(x % cr_read_fault_rate)) { CR_INFO("injecting READ fault"); retval = -EFAULT; } } #endif out: return retval; }
/* Loops on short writes, but return -EIO if vfs_write() returns zero */ ssize_t cr_uwrite(cr_errbuf_t *eb, struct file *file, const void *buf, size_t count) { ssize_t retval; ssize_t bytes_left = count; const char *p = buf; while (bytes_left) { const ssize_t w = vfs_write(file, p, CR_TRIM_XFER(bytes_left), &file->f_pos); if (w <= 0) { CR_ERR_EB(eb, "vfs_write returned %ld", (long int)w); retval = w; if (!retval) retval = -EIO; /* Map zero -> EIO */ goto out; } bytes_left -= w; p += w; } retval = count; #if CRI_DEBUG if (cr_write_fault_rate) { unsigned int x; get_random_bytes(&x, sizeof(x)); if (!(x % cr_write_fault_rate)) { CR_INFO("injecting WRITE fault"); retval = -EFAULT; } } #endif out: return retval; }
/* * cr_fputs * * Slightly different than fputs, since it writes the '\0' also, and refuses to * write anything larger than a PATH_MAX * * Returns number of bytes written on success. */ int cr_fputs(cr_errbuf_t *eb, const char *buf, struct file *filp) { int ret; int wrote=0; int len; ret = -EIO; /* we actually want to know when someone wrote NULL to an area, so we * can restore the NULL on restart. */ if (!buf) { /* magic length of 0 for NULL buf */ len = 0; } else { /* we write out the length WITH the '\0' termination. */ len = strlen(buf)+1; } /* compare l+1 to avoid type issue wrt promotion of len+1 to unsigned * long in comparison against PATH_MAX */ if ((len < 0) || (len+1 > PATH_MAX+1)) { CR_ERR_EB(eb, "cr_fputs: String length (%d) out of bounds.", len); ret = -EINVAL; goto out; } ret = cr_kwrite(eb, filp, &len, sizeof(len)); wrote += ret; if (ret != sizeof(len)) { CR_ERR_EB(eb, "cr_fputs: write len returned %d", ret); goto out; } if (len) { ret = cr_kwrite(eb, filp, buf, len); wrote += ret; if (ret != len) { CR_ERR_EB(eb, "cr_fputs: write buf returned %d", ret); goto out; } } ret = wrote; out: return ret; }
/* Saves pathname, returning bytes written (or <0 on error), * NULL dentry yields saved string value of NULL (distinct from empty string). * Uses supplied buf, if any, or will alloc/free otherwise. */ int cr_save_pathname(cr_errbuf_t *eb, struct file *cr_filp, struct path *path, char *orig_buf, int size) { int retval; const char *name = NULL; char *buf = orig_buf; /* Short cut on NULL path or dentry */ if (!path || !path->dentry) { goto write; } /* Allocate buf if none was supplied */ if (!buf) { retval = -ENOMEM; buf = __getname(); if (!buf) { goto out; } size = PATH_MAX; } /* find the file name */ name = cr_getpath(path, buf, size); CR_INFO("file = %s\n"); if (name == NULL) { CR_ERR_EB(eb, "Bad or non/existant name!"); retval = -EBADF; goto out_bad; } /* now write out the name */ write: retval = cr_fputs(eb, name, cr_filp); if (retval < 0) { CR_ERR_EB(eb, "cr_save_pathname - Bad file write! (cr_fputs returned %d)", retval); goto out_bad; } out_bad: if (buf && !orig_buf) { __putname(buf); } out: return retval; }
// cr_loc_init(loc, fd, from) // // Validate and record data about the requested destination of a checkpoint // // Returns 0 on success, negative error code on failure. // // XXX: need to document return cases? // XXX: need to be sure we make all the right checks. int cr_loc_init(cr_errbuf_t *eb, cr_location_t *loc, int fd, struct file *from, int is_write) { struct file *filp; memset(loc, 0, sizeof(*loc)); loc->is_write = is_write; if (fd != CR_DEST_CWD) { filp = fget(fd); } else { // XXX: do we want to shortcut and just copy current->fs? filp = filp_open(".", O_RDONLY|O_NDELAY|O_DIRECTORY, 0); } if (!filp) { CR_ERR_EB(eb, "invalid file descriptor %d received", fd); return -EINVAL; } else if (IS_ERR(filp)) { return PTR_ERR(filp); } else if (filp == from) { CR_ERR_EB(eb, "file descriptor %d is the ctrl descriptor", fd); fput(filp); return -EINVAL; } switch (filp->f_dentry->d_inode->i_mode & S_IFMT) { case S_IFREG: case S_IFCHR: case S_IFIFO: case S_IFSOCK: CR_KTRACE_LOW_LVL("Calling do_init_reg on fd %d", fd); return do_init_reg(loc, filp); break; case S_IFDIR: return do_init_dir(loc, filp); break; case S_IFBLK: // We don't deal with this case yet: // fall through... default: CR_ERR_EB(eb, "unsupported file type"); fput(filp); return -EINVAL; } }
/* * cr_fgets * * Sort of like fgets. * Returns length (including '\0') on success, or <0 on error. * 0 return means string is NULL, 1 means empty string. */ int cr_fgets(cr_errbuf_t *eb, char *buf, int size, struct file *filp) { int ret; int len = 0; /* Argument checking */ ret = -EINVAL; if (!buf) { CR_ERR_EB(eb, "cr_fgets: NULL buffer pass as argument!"); goto out; } if (size <= 0) { CR_ERR_EB(eb, "cr_fgets: Bad buffer size %d.", size); goto out; } ret = cr_kread(eb,filp, &len, sizeof(len)); if (ret != sizeof(len)) { CR_ERR_EB(eb, "cr_fgets: read len returned %d", ret); goto out; } if ((len < 0) || (len > size)) { ret = -EINVAL; CR_ERR_EB(eb, "cr_fgets: Bad string length %d in file.", len); goto out; } if (len == 0) { /* old string was NULL. i.e. never initialized rather than a * pointer to an empty string */ goto out; } ret = cr_kread(eb, filp, buf, len); if (ret != len) { CR_ERR_EB(eb, "cr_fgets: read buf returned %d", ret); goto out; } out: return (ret < 0) ? ret : len; }
/* Calls cr_mknod and then opens with the given flags, returning a (struct file *) */ struct file * cr_filp_mknod(cr_errbuf_t *eb, const char *name, int mode, int flags, unsigned long unlinked_id) { struct nameidata nd; struct dentry * dentry; struct file *filp; /* mknod */ dentry = cr_mknod(eb, &nd, name, mode, unlinked_id); if (IS_ERR(dentry)) { CR_KTRACE_UNEXPECTED("Failed to recreate %sfilesystem object %s, err=%d.", unlinked_id?"unlinked ":"", name, (int)PTR_ERR(dentry)); filp = (struct file *)dentry; goto out; } /* now open it */ filp = cr_dentry_open(dget(dentry), mntget(nd.nd_mnt), flags); if (IS_ERR(filp)) { CR_ERR_EB(eb, "Failed to reopen %sfilesystem object %s, err=%d.", unlinked_id?"unlinked ":"", name, (int)PTR_ERR(dentry)); goto out_dput; } /* check that we actually got the expected type */ if ((mode ^ filp->f_dentry->d_inode->i_mode) & S_IFMT) { CR_ERR_EB(eb, "Type conflict when recreating %sfilesystem object %s.", unlinked_id?"unlinked ":"", name); fput(filp); filp = ERR_PTR(-EEXIST); goto out_dput; } out_dput: dput(dentry); cr_path_release(&nd); out: return filp; }
/* Calls cr_mknod and then opens with the given flags, returning a (struct file *) */ struct file * cr_filp_mknod(cr_errbuf_t *eb, const char *name, int mode, int flags, unsigned long unlinked_id) { struct path path; struct file *filp; int err; /* mknod */ err = cr_mknod(eb, &path, name, mode, unlinked_id); if (err) { CR_KTRACE_UNEXPECTED("Failed to recreate %sfilesystem object %s, err=%d.", unlinked_id?"unlinked ":"", name, err); filp = (struct file *)ERR_PTR(err); goto out; } /* now open it */ path_get(&path); filp = cr_dentry_open_perm(&path, flags); if (IS_ERR(filp)) { CR_ERR_EB(eb, "Failed to reopen %sfilesystem object %s, err=%d.", unlinked_id?"unlinked ":"", name, (int)PTR_ERR(filp)); goto out_put; } /* check that we actually got the expected type */ if ((mode ^ filp->f_dentry->d_inode->i_mode) & S_IFMT) { CR_ERR_EB(eb, "Type conflict when recreating %sfilesystem object %s.", unlinked_id?"unlinked ":"", name); fput(filp); filp = ERR_PTR(-EEXIST); } out_put: path_put(&path); out: return filp; }
/* * Like cr_sendfile_buffered(), but for HUGETLBFS destination file. * Uses temporary mmap()s of a chunk of len HPAGE_SIZE at a time. * * Note: Caller is responsible for checking count==0 or {dst,src}_ppos==NULL. */ static loff_t cr_sendfile_hugedst(cr_errbuf_t *eb, struct file *dst_filp, struct file *src_filp, loff_t *src_ppos, loff_t count) { loff_t bytes_left = count; loff_t retval; struct mm_struct *mm = current->mm; unsigned long map_addr = 0; unsigned long map_pgoff = 0; unsigned long map_flags = MAP_SHARED; CRI_ASSERT((count & (HPAGE_SIZE-1)) == 0); CRI_ASSERT(dst_filp->f_pos == 0); CRI_ASSERT(src_ppos = &src_filp->f_pos); for (bytes_left = count; bytes_left; bytes_left -= HPAGE_SIZE) { unsigned long tmp; down_write(&mm->mmap_sem); tmp = do_mmap_pgoff(dst_filp, map_addr, HPAGE_SIZE, PROT_READ|PROT_WRITE, map_flags, map_pgoff); up_write(&mm->mmap_sem); if (IS_ERR((void*)tmp)) { CR_ERR_EB(eb, "do_mmap(HUGE dst file) returned %ld", (long)tmp); retval = tmp; goto out_err; } map_addr = tmp; map_pgoff += (HPAGE_SIZE >> PAGE_SHIFT); map_flags |= MAP_FIXED; retval = cr_uread(eb, src_filp, (void *)map_addr, HPAGE_SIZE); if (retval < 0) goto out_unmap; } retval = count; dst_filp->f_pos = count; out_unmap: if (map_addr) { (void)sys_munmap(map_addr, HPAGE_SIZE); // XXX: check for error (unless on error path already)? } out_err: return retval; }
/* cr_mknod - based on linux/fs/namei.c:sys_mknod * * Creates regular files or fifos (no devices) making them anonymous (unlinked) * if desired. * Returns a dentry for the resulting filesystem objects, and the corresponding * vfsmnt can be obtained in nd->mnt. Together these two can be passed * to dentry_open() or cr_dentry_open(), even for an unlinked inode. * In the event of an error, no dput() or cr_path_release() is required, * otherwise they are. * * In the event that an object exists with the given name, it will be * check for the proper mode prior to return, yielding -EEXIST on conflict. */ struct dentry * cr_mknod(cr_errbuf_t *eb, struct nameidata *nd, const char *name, int mode, unsigned long unlinked_id) { struct dentry * dentry; int err; if (unlinked_id) { /* Generate a replacement name which we will use instead of the original one. */ name = cr_anonymous_rename(eb, name, unlinked_id); if (!name) { CR_ERR_EB(eb, "cr_mknod - failed to rename unlinked object"); err = -ENOMEM; goto out; } } /* Prior to 2.6.26, lookup_create() would return an exisiting dentry. * Since 2.6.26, it returns -EEXIST if the dentry exists. So, we first * check for an existing dentry. For older kernels this is not required, * but is still correct. */ err = path_lookup(name, LOOKUP_FOLLOW, nd); if (!err) { dentry = dget(nd->nd_dentry); err = -EEXIST; /* Forces mode validation below */ goto have_it; } err = path_lookup(name, LOOKUP_PARENT, nd); if (err) { CR_KTRACE_UNEXPECTED("Couldn't path_lookup for mknod %s. err=%d.", name, err); goto out_free; } dentry = cr_lookup_create(nd, 0); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); CR_KTRACE_UNEXPECTED("Couldn't lookup_create for mknod %s. err=%d.", name, err); goto out_release; } switch (mode & S_IFMT) { case S_IFREG: err = vfs_create(nd->nd_dentry->d_inode, dentry, mode, nd); break; case S_IFIFO: err = cr_vfs_mknod(nd->nd_dentry->d_inode, dentry, nd->nd_mnt, mode, 0 /* ignored */); break; default: CR_ERR_EB(eb, "Unknown/invalid type %d passed to cr_mknod %s.", (mode&S_IFMT), name); err = -EINVAL; } if (unlinked_id && !err) { /* Note that we don't unlink if we failed to create */ dget(dentry); /* ensure unlink doesn't destroy the dentry */ /* Note possibility of silent failure here: */ (void)cr_vfs_unlink(nd->nd_dentry->d_inode, dentry, nd->nd_mnt); dput(dentry); } cr_inode_unlock(nd->nd_dentry->d_inode); have_it: if ((err == -EEXIST) && !((dentry->d_inode->i_mode ^ mode) & S_IFMT)) { /* We fall through and return the dentry */ } else if (err) { CR_KTRACE_UNEXPECTED("Couldn't cr_mknod %s. err=%d.", name, err); goto out_put; } if (unlinked_id) { __putname(name); } return dentry; out_put: dput(dentry); out_release: cr_path_release(nd); out_free: if (unlinked_id) { __putname(name); } out: return (struct dentry *)ERR_PTR(err); }
/* cr_mknod * * Creates regular files or fifos (no devices) making them anonymous (unlinked) * if desired, populating the struct path appropriatly. * In the event of an error, no dput() or path_put() is required, * otherwise they are. * * In the event that an object exists with the given name, it will be * check for the proper mode prior to return, yielding -EEXIST on conflict. */ int cr_mknod(cr_errbuf_t *eb, struct path *path, const char *name, int mode, unsigned long unlinked_id) { mm_segment_t oldfs; int err; #if CRI_DEBUG /* first validate mode */ switch (mode & S_IFMT) { case S_IFREG: case S_IFIFO: break; default: CR_ERR_EB(eb, "Unknown/invalid type %d passed to cr_mknod %s.", (mode&S_IFMT), name); err = -EINVAL; goto out; } #endif if (unlinked_id) { /* Generate a replacement name which we will use instead of the original one. */ name = cr_anonymous_rename(eb, name, unlinked_id); if (!name) { CR_ERR_EB(eb, "cr_mknod - failed to rename unlinked object"); err = -ENOMEM; goto out; } } /* sys_mknod() */ oldfs = get_fs(); set_fs(KERNEL_DS); err = sys_mknod(name, mode, 0); set_fs(oldfs); if (err == -EEXIST) { /* Keep going, it may be the one we want */ } else if (err < 0) { goto out_free; } /* Now get the (struct path) for the newly-created object. * YES, there is a potential race, but we check below that we have the right object. */ err = cr_kern_path(name, LOOKUP_FOLLOW, path); if (err < 0) { CR_ERR_EB(eb, "cr_mknod: cr_kern_path(%s) returned %d after sys_mknod()", name, err); goto out_free; } /* Check that we have the desired object type. * Needed for sys_mknod() == -EEXIST and for the mknod-to-lookup race. */ if ((path->dentry->d_inode->i_mode ^ mode) & S_IFMT) { CR_ERR_EB(eb, "cr_mknod: cr_kern_path(%s) found conflicting object", name); err = -EEXIST; path_put(path); goto out_free; } /* unlink if required */ if (unlinked_id) { /* Note possibility of silent failure here: */ oldfs = get_fs(); set_fs(KERNEL_DS); (void) sys_unlink(name); set_fs(oldfs); } out_free: if (unlinked_id) { __putname(name); } out: return err; }