/* * Merge changes to an ASCII file into a quotause list. */ int readprivs(struct quotause *quplist, int infd) { struct quotause *qup; FILE *fp; int cnt; char *cp; struct dqblk dqblk; char *fsp, line1[BUFSIZ], line2[BUFSIZ]; lseek(infd, 0, SEEK_SET); fp = fdopen(dup(infd), "r"); if (fp == NULL) { warnx("can't re-read temp file!!"); return(0); } /* * Discard title line, then read pairs of lines to process. */ (void)fgets(line1, sizeof (line1), fp); while (fgets(line1, sizeof (line1), fp) != NULL && fgets(line2, sizeof (line2), fp) != NULL) { if ((fsp = strtok(line1, " \t:")) == NULL) { warnx("%s: bad format", line1); return(0); } if ((cp = strtok(NULL, "\n")) == NULL) { warnx("%s: %s: bad format", fsp, &fsp[strlen(fsp) + 1]); return(0); } cnt = sscanf(cp, " KBytes in use: %d, limits (soft = %d, hard = %d)", &dqblk.dqb_curblocks, &dqblk.dqb_bsoftlimit, &dqblk.dqb_bhardlimit); if (cnt != 3) { warnx("%s:%s: bad format", fsp, cp); return(0); } dqblk.dqb_curblocks = btodb((u_quad_t) dqblk.dqb_curblocks * 1024); dqblk.dqb_bsoftlimit = btodb((u_quad_t) dqblk.dqb_bsoftlimit * 1024); dqblk.dqb_bhardlimit = btodb((u_quad_t) dqblk.dqb_bhardlimit * 1024); if ((cp = strtok(line2, "\n")) == NULL) { warnx("%s: %s: bad format", fsp, line2); return(0); } cnt = sscanf(cp, "\tinodes in use: %d, limits (soft = %d, hard = %d)", &dqblk.dqb_curinodes, &dqblk.dqb_isoftlimit, &dqblk.dqb_ihardlimit); if (cnt != 3) { warnx("%s: %s: bad format", fsp, line2); return(0); } for (qup = quplist; qup; qup = qup->next) { if (strcmp(fsp, qup->fsname)) continue; /* * Cause time limit to be reset when the quota * is next used if previously had no soft limit * or were under it, but now have a soft limit * and are over it. */ if (dqblk.dqb_bsoftlimit && qup->dqblk.dqb_curblocks >= dqblk.dqb_bsoftlimit && (qup->dqblk.dqb_bsoftlimit == 0 || qup->dqblk.dqb_curblocks < qup->dqblk.dqb_bsoftlimit)) qup->dqblk.dqb_btime = 0; if (dqblk.dqb_isoftlimit && qup->dqblk.dqb_curinodes >= dqblk.dqb_isoftlimit && (qup->dqblk.dqb_isoftlimit == 0 || qup->dqblk.dqb_curinodes < qup->dqblk.dqb_isoftlimit)) qup->dqblk.dqb_itime = 0; qup->dqblk.dqb_bsoftlimit = dqblk.dqb_bsoftlimit; qup->dqblk.dqb_bhardlimit = dqblk.dqb_bhardlimit; qup->dqblk.dqb_isoftlimit = dqblk.dqb_isoftlimit; qup->dqblk.dqb_ihardlimit = dqblk.dqb_ihardlimit; qup->flags |= FOUND; if (dqblk.dqb_curblocks == qup->dqblk.dqb_curblocks && dqblk.dqb_curinodes == qup->dqblk.dqb_curinodes) break; warnx("%s: cannot change current allocation", fsp); break; } } fclose(fp); /* * Disable quotas for any filesystems that have not been found. */ for (qup = quplist; qup; qup = qup->next) { if (qup->flags & FOUND) { qup->flags &= ~FOUND; continue; } qup->dqblk.dqb_bsoftlimit = 0; qup->dqblk.dqb_bhardlimit = 0; qup->dqblk.dqb_isoftlimit = 0; qup->dqblk.dqb_ihardlimit = 0; } return(1); }
/* * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP * and VOP_STRATEGY operations. * * 'obp' is a pointer to the original request fed to the vnd device. */ static void handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) { int bsize, error, flags, skipped; size_t resid, sz; off_t bn, offset; struct vnode *vp; flags = obp->b_flags; if (!(flags & B_READ)) { vp = bp->b_vp; mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); } /* convert to a byte offset within the file. */ bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; skipped = 0; /* * Break the request into bsize pieces and feed them * sequentially using VOP_BMAP/VOP_STRATEGY. * We do it this way to keep from flooding NFS servers if we * are connected to an NFS file. This places the burden on * the client rather than the server. */ error = 0; bp->b_resid = bp->b_bcount; for (offset = 0, resid = bp->b_resid; resid; resid -= sz, offset += sz) { struct buf *nbp; daddr_t nbn; int off, nra; nra = 0; vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); VOP_UNLOCK(vnd->sc_vp); if (error == 0 && (long)nbn == -1) error = EIO; /* * If there was an error or a hole in the file...punt. * Note that we may have to wait for any operations * that we have already fired off before releasing * the buffer. * * XXX we could deal with holes here but it would be * a hassle (in the write case). */ if (error) { skipped += resid; break; } #ifdef DEBUG if (!dovndcluster) nra = 0; #endif off = bn % bsize; sz = MIN(((off_t)1 + nra) * bsize - off, resid); #ifdef DEBUG if (vnddebug & VDB_IO) printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, nbn, sz); #endif nbp = getiobuf(vp, true); nestiobuf_setup(bp, nbp, offset, sz); nbp->b_blkno = nbn + btodb(off); #if 0 /* XXX #ifdef DEBUG */ if (vnddebug & VDB_IO) printf("vndstart(%ld): bp %p vp %p blkno " "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", (long) (vnd-vnd_softc), &nbp->vb_buf, nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, nbp->vb_buf.b_flags, nbp->vb_buf.b_data, nbp->vb_buf.b_bcount); #endif VOP_STRATEGY(vp, nbp); bn += sz; } nestiobuf_done(bp, skipped, error); }
/* * If blocks are contiguous on disk, use this to provide clustered * read ahead. We will read as many blocks as possible sequentially * and then parcel them up into logical blocks in the buffer hash table. */ static struct buf * cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn, daddr_t blkno, long size, int run, int gbflags, struct buf *fbp) { struct buf *bp, *tbp; daddr_t bn; off_t off; long tinc, tsize; int i, inc, j, k, toff; KASSERT(size == vp->v_mount->mnt_stat.f_iosize, ("cluster_rbuild: size %ld != f_iosize %jd\n", size, (intmax_t)vp->v_mount->mnt_stat.f_iosize)); /* * avoid a division */ while ((u_quad_t) size * (lbn + run) > filesize) { --run; } if (fbp) { tbp = fbp; tbp->b_iocmd = BIO_READ; } else { tbp = getblk(vp, lbn, size, 0, 0, gbflags); if (tbp->b_flags & B_CACHE) return tbp; tbp->b_flags |= B_ASYNC | B_RAM; tbp->b_iocmd = BIO_READ; } tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) return tbp; bp = trypbuf(&cluster_pbuf_freecnt); if (bp == NULL) return tbp; /* * We are synthesizing a buffer out of vm_page_t's, but * if the block size is not page aligned then the starting * address may not be either. Inherit the b_data offset * from the original buffer. */ bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO; if ((gbflags & GB_UNMAPPED) != 0) { bp->b_data = unmapped_buf; } else { bp->b_data = (char *)((vm_offset_t)bp->b_data | ((vm_offset_t)tbp->b_data & PAGE_MASK)); } bp->b_iocmd = BIO_READ; bp->b_iodone = cluster_callback; bp->b_blkno = blkno; bp->b_lblkno = lbn; bp->b_offset = tbp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset")); pbgetvp(vp, bp); TAILQ_INIT(&bp->b_cluster.cluster_head); bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_npages = 0; inc = btodb(size); for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i == 0) { VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); vfs_drain_busy_pages(tbp); vm_object_pip_add(tbp->b_bufobj->bo_object, tbp->b_npages); for (k = 0; k < tbp->b_npages; k++) vm_page_sbusy(tbp->b_pages[k]); VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); } else { if ((bp->b_npages * PAGE_SIZE) + round_page(size) > vp->v_mount->mnt_iosize_max) { break; } tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT | (gbflags & GB_UNMAPPED)); /* Don't wait around for locked bufs. */ if (tbp == NULL) break; /* * Stop scanning if the buffer is fully valid * (marked B_CACHE), or locked (may be doing a * background write), or if the buffer is not * VMIO backed. The clustering code can only deal * with VMIO-backed buffers. The bo lock is not * required for the BKGRDINPROG check since it * can not be set without the buf lock. */ if ((tbp->b_vflags & BV_BKGRDINPROG) || (tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { bqrelse(tbp); break; } /* * The buffer must be completely invalid in order to * take part in the cluster. If it is partially valid * then we stop. */ off = tbp->b_offset; tsize = size; VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); for (j = 0; tsize > 0; j++) { toff = off & PAGE_MASK; tinc = tsize; if (toff + tinc > PAGE_SIZE) tinc = PAGE_SIZE - toff; VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object); if ((tbp->b_pages[j]->valid & vm_page_bits(toff, tinc)) != 0) break; if (vm_page_xbusied(tbp->b_pages[j])) break; vm_object_pip_add(tbp->b_bufobj->bo_object, 1); vm_page_sbusy(tbp->b_pages[j]); off += tinc; tsize -= tinc; } if (tsize > 0) { clean_sbusy: vm_object_pip_add(tbp->b_bufobj->bo_object, -j); for (k = 0; k < j; k++) vm_page_sunbusy(tbp->b_pages[k]); VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); bqrelse(tbp); break; } VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); /* * Set a read-ahead mark as appropriate */ if ((fbp && (i == 1)) || (i == (run - 1))) tbp->b_flags |= B_RAM; /* * Set the buffer up for an async read (XXX should * we do this only if we do not wind up brelse()ing?). * Set the block number if it isn't set, otherwise * if it is make sure it matches the block number we * expect. */ tbp->b_flags |= B_ASYNC; tbp->b_iocmd = BIO_READ; if (tbp->b_blkno == tbp->b_lblkno) { tbp->b_blkno = bn; } else if (tbp->b_blkno != bn) { VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); goto clean_sbusy; } } /* * XXX fbp from caller may not be B_ASYNC, but we are going * to biodone() it in cluster_callback() anyway */ BUF_KERNPROC(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { bp->b_pages[bp->b_npages] = m; bp->b_npages++; } if (m->valid == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); /* * Don't inherit tbp->b_bufsize as it may be larger due to * a non-page-aligned size. Instead just aggregate using * 'size'. */ if (tbp->b_bcount != size) printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size); if (tbp->b_bufsize != size) printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size); bp->b_bcount += size; bp->b_bufsize += size; } /* * Fully valid pages in the cluster are already good and do not need * to be re-read from disk. Replace the page with bogus_page */ VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (j = 0; j < bp->b_npages; j++) { VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object); if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL) bp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); if (bp->b_bufsize > bp->b_kvasize) panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); if (buf_mapped(bp)) { pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); } return (bp); }
/* * Doadump comes here after turning off memory management and * getting on the dump stack, either when called above, or by * the auto-restart code. */ void dumpsys() { int maj; int psize; daddr_t blkno; /* current block to write */ /* dump routine */ int (*dump)(dev_t, daddr_t, caddr_t, size_t); int pg; /* page being dumped */ paddr_t maddr; /* PA being dumped */ int error; /* error code from (*dump)() */ kcore_seg_t *kseg_p; cpu_kcore_hdr_t *chdr_p; char dump_hdr[dbtob(1)]; /* XXX assume hdr fits in 1 block */ extern int msgbufmapped; msgbufmapped = 0; /* Make sure dump device is valid. */ if (dumpdev == NODEV) return; if (dumpsize == 0) { dumpconf(); if (dumpsize == 0) return; } maj = major(dumpdev); if (dumplo < 0) { printf("\ndump to dev %u,%u not possible\n", maj, minor(dumpdev)); return; } dump = bdevsw[maj].d_dump; blkno = dumplo; printf("\ndumping to dev %u,%u offset %ld\n", maj, minor(dumpdev), dumplo); #ifdef UVM_SWAP_ENCRYPT uvm_swap_finicrypt_all(); #endif /* Setup the dump header */ kseg_p = (kcore_seg_t *)dump_hdr; chdr_p = (cpu_kcore_hdr_t *)&dump_hdr[ALIGN(sizeof(*kseg_p))]; bzero(dump_hdr, sizeof(dump_hdr)); CORE_SETMAGIC(*kseg_p, KCORE_MAGIC, MID_MACHINE, CORE_CPU); kseg_p->c_size = dbtob(1) - ALIGN(sizeof(*kseg_p)); *chdr_p = cpu_kcore_hdr; printf("dump "); psize = (*bdevsw[maj].d_psize)(dumpdev); if (psize == -1) { printf("area unavailable\n"); return; } /* Dump the header. */ error = (*dump)(dumpdev, blkno++, (caddr_t)dump_hdr, dbtob(1)); if (error != 0) goto abort; maddr = (paddr_t)0; for (pg = 0; pg < dumpsize; pg++) { #define NPGMB (1024 * 1024 / PAGE_SIZE) /* print out how many MBs we have dumped */ if (pg != 0 && (pg % NPGMB) == 0) printf("%d ", pg / NPGMB); #undef NPGMB error = (*dump)(dumpdev, blkno, (caddr_t)maddr, PAGE_SIZE); if (error == 0) { maddr += PAGE_SIZE; blkno += btodb(PAGE_SIZE); } else break; } abort: switch (error) { case 0: printf("succeeded\n"); break; case ENXIO: printf("device bad\n"); break; case EFAULT: printf("device not ready\n"); break; case EINVAL: printf("area improper\n"); break; case EIO: printf("i/o error\n"); break; case EINTR: printf("aborted from console\n"); break; default: printf("error %d\n", error); break; } }
/* ARGSUSED */ static int vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) { bool force; int unit = vndunit(dev); struct vnd_softc *vnd; struct vnd_ioctl *vio; struct vattr vattr; struct pathbuf *pb; struct nameidata nd; int error, part, pmask; uint64_t geomsize; int fflags; #ifdef __HAVE_OLD_DISKLABEL struct disklabel newlabel; #endif struct dkwedge_info *dkw; struct dkwedge_list *dkwl; #ifdef DEBUG if (vnddebug & VDB_FOLLOW) printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", dev, cmd, data, flag, l->l_proc, unit); #endif vnd = device_lookup_private(&vnd_cd, unit); if (vnd == NULL && #ifdef COMPAT_30 cmd != VNDIOCGET30 && #endif #ifdef COMPAT_50 cmd != VNDIOCGET50 && #endif cmd != VNDIOCGET) return ENXIO; vio = (struct vnd_ioctl *)data; /* Must be open for writes for these commands... */ switch (cmd) { case VNDIOCSET: case VNDIOCCLR: #ifdef COMPAT_50 case VNDIOCSET50: case VNDIOCCLR50: #endif case DIOCSDINFO: case DIOCWDINFO: #ifdef __HAVE_OLD_DISKLABEL case ODIOCSDINFO: case ODIOCWDINFO: #endif case DIOCKLABEL: case DIOCWLABEL: if ((flag & FWRITE) == 0) return EBADF; } /* Must be initialized for these... */ switch (cmd) { case VNDIOCCLR: #ifdef VNDIOCCLR50 case VNDIOCCLR50: #endif case DIOCGDINFO: case DIOCSDINFO: case DIOCWDINFO: case DIOCGPART: case DIOCKLABEL: case DIOCWLABEL: case DIOCGDEFLABEL: case DIOCCACHESYNC: #ifdef __HAVE_OLD_DISKLABEL case ODIOCGDINFO: case ODIOCSDINFO: case ODIOCWDINFO: case ODIOCGDEFLABEL: #endif if ((vnd->sc_flags & VNF_INITED) == 0) return ENXIO; } switch (cmd) { #ifdef VNDIOCSET50 case VNDIOCSET50: #endif case VNDIOCSET: if (vnd->sc_flags & VNF_INITED) return EBUSY; if ((error = vndlock(vnd)) != 0) return error; fflags = FREAD; if ((vio->vnd_flags & VNDIOF_READONLY) == 0) fflags |= FWRITE; error = pathbuf_copyin(vio->vnd_file, &pb); if (error) { goto unlock_and_exit; } NDINIT(&nd, LOOKUP, FOLLOW, pb); if ((error = vn_open(&nd, fflags, 0)) != 0) { pathbuf_destroy(pb); goto unlock_and_exit; } KASSERT(l); error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); if (!error && nd.ni_vp->v_type != VREG) error = EOPNOTSUPP; if (!error && vattr.va_bytes < vattr.va_size) /* File is definitely sparse, use vn_rdwr() */ vnd->sc_flags |= VNF_USE_VN_RDWR; if (error) { VOP_UNLOCK(nd.ni_vp); goto close_and_exit; } /* If using a compressed file, initialize its info */ /* (or abort with an error if kernel has no compression) */ if (vio->vnd_flags & VNF_COMP) { #ifdef VND_COMPRESSION struct vnd_comp_header *ch; int i; u_int32_t comp_size; u_int32_t comp_maxsize; /* allocate space for compresed file header */ ch = malloc(sizeof(struct vnd_comp_header), M_TEMP, M_WAITOK); /* read compressed file header */ error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); if (error) { free(ch, M_TEMP); VOP_UNLOCK(nd.ni_vp); goto close_and_exit; } /* save some header info */ vnd->sc_comp_blksz = ntohl(ch->block_size); /* note last offset is the file byte size */ vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; free(ch, M_TEMP); if (vnd->sc_comp_blksz == 0 || vnd->sc_comp_blksz % DEV_BSIZE !=0) { VOP_UNLOCK(nd.ni_vp); error = EINVAL; goto close_and_exit; } if (sizeof(struct vnd_comp_header) + sizeof(u_int64_t) * vnd->sc_comp_numoffs > vattr.va_size) { VOP_UNLOCK(nd.ni_vp); error = EINVAL; goto close_and_exit; } /* set decompressed file size */ vattr.va_size = ((u_quad_t)vnd->sc_comp_numoffs - 1) * (u_quad_t)vnd->sc_comp_blksz; /* allocate space for all the compressed offsets */ vnd->sc_comp_offsets = malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, M_DEVBUF, M_WAITOK); /* read in the offsets */ error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)vnd->sc_comp_offsets, sizeof(u_int64_t) * vnd->sc_comp_numoffs, sizeof(struct vnd_comp_header), UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); if (error) { VOP_UNLOCK(nd.ni_vp); goto close_and_exit; } /* * find largest block size (used for allocation limit). * Also convert offset to native byte order. */ comp_maxsize = 0; for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { vnd->sc_comp_offsets[i] = be64toh(vnd->sc_comp_offsets[i]); comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) - vnd->sc_comp_offsets[i]; if (comp_size > comp_maxsize) comp_maxsize = comp_size; } vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); /* create compressed data buffer */ vnd->sc_comp_buff = malloc(comp_maxsize, M_DEVBUF, M_WAITOK); /* create decompressed buffer */ vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, M_DEVBUF, M_WAITOK); vnd->sc_comp_buffblk = -1; /* Initialize decompress stream */ memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); vnd->sc_comp_stream.zalloc = vnd_alloc; vnd->sc_comp_stream.zfree = vnd_free; error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); if (error) { if (vnd->sc_comp_stream.msg) printf("vnd%d: compressed file, %s\n", unit, vnd->sc_comp_stream.msg); VOP_UNLOCK(nd.ni_vp); error = EINVAL; goto close_and_exit; } vnd->sc_flags |= VNF_COMP | VNF_READONLY; #else /* !VND_COMPRESSION */ VOP_UNLOCK(nd.ni_vp); error = EOPNOTSUPP; goto close_and_exit; #endif /* VND_COMPRESSION */ } VOP_UNLOCK(nd.ni_vp); vnd->sc_vp = nd.ni_vp; vnd->sc_size = btodb(vattr.va_size); /* note truncation */ /* * Use pseudo-geometry specified. If none was provided, * use "standard" Adaptec fictitious geometry. */ if (vio->vnd_flags & VNDIOF_HASGEOM) { memcpy(&vnd->sc_geom, &vio->vnd_geom, sizeof(vio->vnd_geom)); /* * Sanity-check the sector size. * XXX Don't allow secsize < DEV_BSIZE. Should * XXX we? */ if (vnd->sc_geom.vng_secsize < DEV_BSIZE || (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || vnd->sc_geom.vng_ncylinders == 0 || (vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_nsectors) == 0) { error = EINVAL; goto close_and_exit; } /* * Compute the size (in DEV_BSIZE blocks) specified * by the geometry. */ geomsize = (vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders) * (vnd->sc_geom.vng_secsize / DEV_BSIZE); /* * Sanity-check the size against the specified * geometry. */ if (vnd->sc_size < geomsize) { error = EINVAL; goto close_and_exit; } } else if (vnd->sc_size >= (32 * 64)) { /* * Size must be at least 2048 DEV_BSIZE blocks * (1M) in order to use this geometry. */ vnd->sc_geom.vng_secsize = DEV_BSIZE; vnd->sc_geom.vng_nsectors = 32; vnd->sc_geom.vng_ntracks = 64; vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); } else { vnd->sc_geom.vng_secsize = DEV_BSIZE; vnd->sc_geom.vng_nsectors = 1; vnd->sc_geom.vng_ntracks = 1; vnd->sc_geom.vng_ncylinders = vnd->sc_size; } vnd_set_geometry(vnd); if (vio->vnd_flags & VNDIOF_READONLY) { vnd->sc_flags |= VNF_READONLY; } if ((error = vndsetcred(vnd, l->l_cred)) != 0) goto close_and_exit; vndthrottle(vnd, vnd->sc_vp); vio->vnd_osize = dbtob(vnd->sc_size); #ifdef VNDIOCSET50 if (cmd != VNDIOCSET50) #endif vio->vnd_size = dbtob(vnd->sc_size); vnd->sc_flags |= VNF_INITED; /* create the kernel thread, wait for it to be up */ error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); if (error) goto close_and_exit; while ((vnd->sc_flags & VNF_KTHREAD) == 0) { tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); } #ifdef DEBUG if (vnddebug & VDB_INIT) printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", vnd->sc_vp, (unsigned long) vnd->sc_size, vnd->sc_geom.vng_secsize, vnd->sc_geom.vng_nsectors, vnd->sc_geom.vng_ntracks, vnd->sc_geom.vng_ncylinders); #endif /* Attach the disk. */ disk_attach(&vnd->sc_dkdev); disk_blocksize(&vnd->sc_dkdev, vnd->sc_geom.vng_secsize); /* Initialize the xfer and buffer pools. */ pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 0, 0, "vndxpl", NULL, IPL_BIO); vndunlock(vnd); pathbuf_destroy(pb); /* Discover wedges on this disk */ dkwedge_discover(&vnd->sc_dkdev); break; close_and_exit: (void) vn_close(nd.ni_vp, fflags, l->l_cred); pathbuf_destroy(pb); unlock_and_exit: #ifdef VND_COMPRESSION /* free any allocated memory (for compressed file) */ if (vnd->sc_comp_offsets) { free(vnd->sc_comp_offsets, M_DEVBUF); vnd->sc_comp_offsets = NULL; } if (vnd->sc_comp_buff) { free(vnd->sc_comp_buff, M_DEVBUF); vnd->sc_comp_buff = NULL; } if (vnd->sc_comp_decombuf) { free(vnd->sc_comp_decombuf, M_DEVBUF); vnd->sc_comp_decombuf = NULL; } #endif /* VND_COMPRESSION */ vndunlock(vnd); return error; #ifdef VNDIOCCLR50 case VNDIOCCLR50: #endif case VNDIOCCLR: part = DISKPART(dev); pmask = (1 << part); force = (vio->vnd_flags & VNDIOF_FORCE) != 0; if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) return error; break; #ifdef COMPAT_30 case VNDIOCGET30: { struct vnd_user30 *vnu; struct vattr va; vnu = (struct vnd_user30 *)data; KASSERT(l); switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { case 0: vnu->vnu_dev = va.va_fsid; vnu->vnu_ino = va.va_fileid; break; case -1: /* unused is not an error */ vnu->vnu_dev = 0; vnu->vnu_ino = 0; break; default: return error; } break; } #endif #ifdef COMPAT_50 case VNDIOCGET50: { struct vnd_user50 *vnu; struct vattr va; vnu = (struct vnd_user50 *)data; KASSERT(l); switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { case 0: vnu->vnu_dev = va.va_fsid; vnu->vnu_ino = va.va_fileid; break; case -1: /* unused is not an error */ vnu->vnu_dev = 0; vnu->vnu_ino = 0; break; default: return error; } break; } #endif case VNDIOCGET: { struct vnd_user *vnu; struct vattr va; vnu = (struct vnd_user *)data; KASSERT(l); switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { case 0: vnu->vnu_dev = va.va_fsid; vnu->vnu_ino = va.va_fileid; break; case -1: /* unused is not an error */ vnu->vnu_dev = 0; vnu->vnu_ino = 0; break; default: return error; } break; } case DIOCGDINFO: *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); break; #ifdef __HAVE_OLD_DISKLABEL case ODIOCGDINFO: newlabel = *(vnd->sc_dkdev.dk_label); if (newlabel.d_npartitions > OLDMAXPARTITIONS) return ENOTTY; memcpy(data, &newlabel, sizeof (struct olddisklabel)); break; #endif case DIOCGPART: ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; ((struct partinfo *)data)->part = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; break; case DIOCWDINFO: case DIOCSDINFO: #ifdef __HAVE_OLD_DISKLABEL case ODIOCWDINFO: case ODIOCSDINFO: #endif { struct disklabel *lp; if ((error = vndlock(vnd)) != 0) return error; vnd->sc_flags |= VNF_LABELLING; #ifdef __HAVE_OLD_DISKLABEL if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { memset(&newlabel, 0, sizeof newlabel); memcpy(&newlabel, data, sizeof (struct olddisklabel)); lp = &newlabel; } else #endif lp = (struct disklabel *)data; error = setdisklabel(vnd->sc_dkdev.dk_label, lp, 0, vnd->sc_dkdev.dk_cpulabel); if (error == 0) { if (cmd == DIOCWDINFO #ifdef __HAVE_OLD_DISKLABEL || cmd == ODIOCWDINFO #endif ) error = writedisklabel(VNDLABELDEV(dev), vndstrategy, vnd->sc_dkdev.dk_label, vnd->sc_dkdev.dk_cpulabel); } vnd->sc_flags &= ~VNF_LABELLING; vndunlock(vnd); if (error) return error; break; } case DIOCKLABEL: if (*(int *)data != 0) vnd->sc_flags |= VNF_KLABEL; else vnd->sc_flags &= ~VNF_KLABEL; break; case DIOCWLABEL: if (*(int *)data != 0) vnd->sc_flags |= VNF_WLABEL; else vnd->sc_flags &= ~VNF_WLABEL; break; case DIOCGDEFLABEL: vndgetdefaultlabel(vnd, (struct disklabel *)data); break; #ifdef __HAVE_OLD_DISKLABEL case ODIOCGDEFLABEL: vndgetdefaultlabel(vnd, &newlabel); if (newlabel.d_npartitions > OLDMAXPARTITIONS) return ENOTTY; memcpy(data, &newlabel, sizeof (struct olddisklabel)); break; #endif case DIOCCACHESYNC: vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); VOP_UNLOCK(vnd->sc_vp); return error; case DIOCAWEDGE: dkw = (void *) data; if ((flag & FWRITE) == 0) return EBADF; /* If the ioctl happens here, the parent is us. */ strlcpy(dkw->dkw_parent, device_xname(vnd->sc_dev), sizeof(dkw->dkw_parent)); return dkwedge_add(dkw); case DIOCDWEDGE: dkw = (void *) data; if ((flag & FWRITE) == 0) return EBADF; /* If the ioctl happens here, the parent is us. */ strlcpy(dkw->dkw_parent, device_xname(vnd->sc_dev), sizeof(dkw->dkw_parent)); return dkwedge_del(dkw); case DIOCLWEDGES: dkwl = (void *) data; return dkwedge_list(&vnd->sc_dkdev, dkwl, l); default: return ENOTTY; } return 0; }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp = ITOV(oip); int32_t lastblock; int32_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; int32_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct m_ext2fs *fs; struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; int i; int aflags, error, allerror; off_t osize; if (length < 0) return (EINVAL); if (ovp->v_type != VREG && ovp->v_type != VDIR && ovp->v_type != VLNK) return (0); if (ovp->v_type == VLNK && ext2fs_size(oip) < EXT2_MAXSYMLINKLEN) { #ifdef DIAGNOSTIC if (length != 0) panic("ext2fs_truncate: partial truncate of symlink"); #endif memset(&oip->i_e2din->e2di_shortlink, 0, ext2fs_size(oip)); (void)ext2fs_setsize(oip, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ext2fs_update(oip, 1)); } if (ext2fs_size(oip) == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ext2fs_update(oip, 0)); } fs = oip->i_e2fs; osize = ext2fs_size(oip); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { #if 0 /* XXX */ if (length > fs->fs_maxfilesize) return (EFBIG); #endif offset = blkoff(fs, length - 1); lbn = lblkno(fs, length - 1); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp, aflags); if (error) return (error); (void)ext2fs_setsize(oip, length); uvm_vnp_setsize(ovp, length); uvm_vnp_uncache(ovp); if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ext2fs_update(oip, 1)); } /* * Shorten the size of the file. If the file is not being * truncated to a block boundry, the contents of the * partial block following the end of the file must be * zero'ed in case it ever become accessible again because * of subsequent file growth. */ offset = blkoff(fs, length); if (offset == 0) { (void)ext2fs_setsize(oip, length); } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp, aflags); if (error) return (error); (void)ext2fs_setsize(oip, length); size = fs->e2fs_bsize; uvm_vnp_setsize(ovp, length); uvm_vnp_uncache(ovp); memset(bp->b_data + offset, 0, size - offset); bp->b_bcount = size; if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->e2fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->e2fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ext2fs_indirtrunc below. */ memcpy(oldblks, &oip->i_e2fs_blocks[0], sizeof(oldblks)); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { oip->i_e2fs_blocks[NDADDR + level] = 0; lastiblock[level] = -1; } for (i = NDADDR - 1; i > lastblock; i--) oip->i_e2fs_blocks[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; if ((error = ext2fs_update(oip, 1)) != 0) allerror = error; /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ memcpy(newblks, &oip->i_e2fs_blocks[0], sizeof(newblks)); memcpy(&oip->i_e2fs_blocks[0], oldblks, sizeof(oldblks)); (void)ext2fs_setsize(oip, osize); vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) -1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = letoh32(oip->i_e2fs_blocks[NDADDR + level]); if (bn != 0) { error = ext2fs_indirtrunc(oip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { oip->i_e2fs_blocks[NDADDR + level] = 0; ext2fs_blkfree(oip, bn); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { bn = letoh32(oip->i_e2fs_blocks[i]); if (bn == 0) continue; oip->i_e2fs_blocks[i] = 0; ext2fs_blkfree(oip, bn); blocksreleased += btodb(fs->e2fs_bsize); } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != oip->i_e2fs_blocks[NDADDR + level]) panic("ext2fs_truncate1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != oip->i_e2fs_blocks[i]) panic("ext2fs_truncate2"); if (length == 0 && (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) panic("ext2fs_truncate3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ (void)ext2fs_setsize(oip, length); if (blocksreleased >= oip->i_e2fs_nblock) oip->i_e2fs_nblock = 0; else oip->i_e2fs_nblock -= blocksreleased; oip->i_flag |= IN_CHANGE; return (allerror); }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ext2fs_indirtrunc(struct inode *ip, int32_t lbn, int32_t dbn, int32_t lastbn, int level, long *countp) { int i; struct buf *bp; struct m_ext2fs *fs = ip->i_e2fs; int32_t *bap; struct vnode *vp; int32_t *copy = NULL, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->e2fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { curproc->p_ru.ru_inblock++; /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("ext2fs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; VOP_STRATEGY(bp); error = biowait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (int32_t *)bp->b_data; if (lastbn >= 0) { copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); memcpy(copy, bap, fs->e2fs_bsize); memset(&bap[last + 1], 0, (NINDIR(fs) - (last + 1)) * sizeof(u_int32_t)); error = bwrite(bp); if (error) allerror = error; bap = copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = letoh32(bap[i]); if (nb == 0) continue; if (level > SINGLE) { error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } ext2fs_blkfree(ip, nb); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = letoh32(bap[i]); if (nb != 0) { error = ext2fs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP, fs->e2fs_bsize); } else { bp->b_flags |= B_INVAL; brelse(bp); } *countp = blocksreleased; return (allerror); }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ext2_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred) { struct vnode *ovp = vp; daddr_t lastblock; struct inode *oip; daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct ext2_sb_info *fs; struct buf *bp; int offset, size, level; long count, nblocks, blocksreleased = 0; int i; int aflags, error, allerror; off_t osize; /* kprintf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, length); */ /* * negative file sizes will totally break the code below and * are not meaningful anyways. */ if (length < 0) return EFBIG; oip = VTOI(ovp); if (ovp->v_type == VLNK && oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { #if DIAGNOSTIC if (length != 0) panic("ext2_truncate: partial truncate of symlink"); #endif bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); oip->i_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (EXT2_UPDATE(ovp, 1)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (EXT2_UPDATE(ovp, 0)); } #if QUOTA if ((error = ext2_getinoquota(oip)) != 0) return (error); #endif fs = oip->i_e2fs; osize = oip->i_size; ext2_discard_prealloc(oip); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { offset = blkoff(fs, length - 1); lbn = lblkno(fs, length - 1); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; vnode_pager_setsize(ovp, length); error = ext2_balloc(oip, lbn, offset + 1, cred, &bp, aflags); if (error) { vnode_pager_setsize(ovp, osize); return (error); } oip->i_size = length; if (aflags & IO_SYNC) bwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (EXT2_UPDATE(ovp, 1)); } /* * Shorten the size of the file. If the file is not being * truncated to a block boundry, the contents of the * partial block following the end of the file must be * zero'ed in case it ever become accessable again because * of subsequent file growth. */ /* I don't understand the comment above */ offset = blkoff(fs, length); if (offset == 0) { oip->i_size = length; } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = ext2_balloc(oip, lbn, offset, cred, &bp, aflags); if (error) return (error); oip->i_size = length; size = blksize(fs, oip, lbn); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); allocbuf(bp, size); if (aflags & IO_SYNC) bwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->s_blocksize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->s_blocksize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ext2_indirtrunc below. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { oip->i_ib[level] = 0; lastiblock[level] = -1; } for (i = NDADDR - 1; i > lastblock; i--) oip->i_db[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; allerror = EXT2_UPDATE(ovp, 1); /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); oip->i_size = osize; error = vtruncbuf(ovp, length, (int)fs->s_blocksize); if (error && (allerror == 0)) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ib[level]; if (bn != 0) { error = ext2_indirtrunc(oip, indir_lbn[level], fsbtodoff(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { oip->i_ib[level] = 0; ext2_blkfree(oip, bn, fs->s_frag_size); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { long bsize; bn = oip->i_db[i]; if (bn == 0) continue; oip->i_db[i] = 0; bsize = blksize(fs, oip, i); ext2_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_db[lastblock]; if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, oip, lastblock); oip->i_size = length; newspace = blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ext2_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } done: #if DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != oip->i_ib[level]) panic("itrunc1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != oip->i_db[i]) panic("itrunc2"); if (length == 0 && (!RB_EMPTY(&ovp->v_rbdirty_tree) || !RB_EMPTY(&ovp->v_rbclean_tree))) panic("itrunc3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = length; oip->i_blocks -= blocksreleased; if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; vnode_pager_setsize(ovp, length); #if QUOTA ext2_chkdq(oip, -blocksreleased, NOCRED, 0); #endif return (allerror); }
static int ext2_indirtrunc(struct inode *ip, daddr_t lbn, off_t doffset, daddr_t lastbn, int level, long *countp) { int i; struct buf *bp; struct ext2_sb_info *fs = ip->i_e2fs; daddr_t *bap; struct vnode *vp; daddr_t *copy, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->s_blocksize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the bio_offset field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->s_blocksize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags &= ~(B_ERROR | B_INVAL); bp->b_cmd = BUF_CMD_READ; if (bp->b_bcount > bp->b_bufsize) panic("ext2_indirtrunc: bad buffer size"); bp->b_bio2.bio_offset = doffset; bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; vfs_busy_pages(bp->b_vp, bp); vn_strategy(vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (daddr_t *)bp->b_data; copy = kmalloc(fs->s_blocksize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize); bzero((caddr_t)&bap[last + 1], (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); if (last == -1) bp->b_flags |= B_INVAL; error = bwrite(bp); if (error) allerror = error; bap = copy; /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), (daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ext2_blkfree(ip, nb, fs->s_blocksize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } kfree(copy, M_TEMP); *countp = blocksreleased; return (allerror); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, nb, newb, pref; struct fs *fs; struct buf *bp, *nbp; struct vnode *vp; struct proc *p; struct indir indirs[NIADDR + 2]; int32_t *bap; int deallocated, osize, nsize, num, i, error; int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1]; int unwindidx = -1; vp = ITOV(ip); fs = ip->i_fs; p = curproc; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; if (size > fs->fs_bsize) panic("ffs1_balloc: blk too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_ffs1_size); if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]), osize, (int)fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs1_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs1_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs1_size); ip->i_ffs1_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_ffs1_db[lbn]; if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we * want, grow it. */ error = ffs_realloccg(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_ffs1_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { *bpp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) (*bpp)->b_bcount = nsize; (*bpp)->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(*bpp); } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs1_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs1_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ffs1_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ffs1_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int32_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) { *allocib = 0; } else if (unwindidx >= 0) { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("Could not unwind indirect block, error %d", r); bap = (int32_t *)bp->b_data; bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void)ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs1_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }
int pass1check(struct inodesc *idesc) { int res = KEEPON; int anyout; int nfrags; daddr32_t lbn; daddr32_t fragno = idesc->id_blkno; struct dinode *dp; /* * If this is a fallocate'd file, block numbers may be stored * as negative. In that case negate the negative numbers. */ dp = ginode(idesc->id_number); if (dp->di_cflags & IFALLOCATE && fragno < 0) fragno = -fragno; if ((anyout = chkrange(fragno, idesc->id_numfrags)) != 0) { /* * Note that blkerror() exits when preening. */ blkerror(idesc->id_number, "OUT OF RANGE", fragno, idesc->id_lbn * sblock.fs_frag); dp = ginode(idesc->id_number); if ((((dp->di_mode & IFMT) == IFDIR) || ((dp->di_mode & IFMT) == IFATTRDIR)) && (idesc->id_firsthole < 0)) { idesc->id_firsthole = idesc->id_lbn; } if (++badblk >= MAXBAD) { pwarn("EXCESSIVE BAD FRAGMENTS I=%u", idesc->id_number); if (reply("CONTINUE") == 0) errexit("Program terminated."); /* * See discussion below as to why we don't * want to short-circuit the processing of * this inode. However, we know that this * particular block is bad, so we don't need * to go through the dup check loop. */ return (SKIP | STOP); } } /* * For each fragment, verify that it is a legal one (either * by having already found the entire run to be legal, or by * individual inspection), and if it is legal, see if we've * seen it before or not. If we haven't, note that we've seen * it and continue on. If we have (our in-core bitmap shows * it as already being busy), then this must be a duplicate * allocation. Whine and moan accordingly. * * Note that for full-block allocations, this will produce * a complaint for each fragment making up the block (i.e., * fs_frags' worth). Among other things, this could be * considered artificially inflating the dup-block count. * However, since it is possible that one file has a full * fs block allocated, but another is only claiming a frag * or two out of the middle, we'll just live it. */ for (nfrags = 0; nfrags < idesc->id_numfrags; fragno++, nfrags++) { if (anyout && chkrange(fragno, 1)) { /* bad fragment number */ res = SKIP; } else if (!testbmap(fragno)) { /* no other claims seen as yet */ note_used(fragno); } else { /* * We have a duplicate claim for the same fragment. * * blkerror() exits when preening. * * We want to report all the dups up until * hitting MAXDUP. Fortunately, blkerror()'s * side-effects on statemap[] are idempotent, * so the ``extra'' calls are harmless. */ lbn = idesc->id_lbn * sblock.fs_frag + nfrags; if (dupblk < MAXDUP) blkerror(idesc->id_number, "DUP", fragno, lbn); /* * Use ==, so we only complain once, no matter * how far over the limit we end up going. */ if (++dupblk == MAXDUP) { pwarn("EXCESSIVE DUPLICATE FRAGMENTS I=%u", idesc->id_number); if (reply("CONTINUE") == 0) errexit("Program terminated."); /* * If we stop the traversal here, then * there may be more dups in the * inode's block list that don't get * flagged. Later, if we're told to * clear one of the files claiming * these blocks, but not the other, we * will release blocks that are * actually still in use. An additional * fsck run would be necessary to undo * the damage. So, instead of the * traditional return (STOP) when told * to continue, we really do just continue. */ } (void) find_dup_ref(fragno, idesc->id_number, lbn, DB_CREATE | DB_INCR); } /* * id_entryno counts the number of disk blocks found. */ idesc->id_entryno += btodb(sblock.fs_fsize); } return (res); }
int ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, lastlbn, nb, newb, *blkp; daddr_t pref, *allocblk, allociblk[NIADDR + 1]; daddr_t *bap, *allocib; int deallocated, osize, nsize, num, i, error, unwindidx, r; struct buf *bp, *nbp; struct indir indirs[NIADDR + 2]; struct fs *fs; struct vnode *vp; struct proc *p; vp = ITOV(ip); fs = ip->i_fs; p = curproc; unwindidx = -1; lbn = lblkno(fs, off); size = blkoff(fs, off) + size; if (size > fs->fs_bsize) panic("ffs2_balloc: block too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, and the * file is currently composed of a fragment, this fragment has to be * extended to be a full block. */ lastlbn = lblkno(fs, ip->i_ffs2_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs2_blkpref(ip, lastlbn, nb, &ip->i_ffs2_db[0]), osize, (int) fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs2_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs2_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs2_size); ip->i_ffs2_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct. */ if (lbn < NDADDR) { nb = ip->i_ffs2_db[lbn]; if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) { /* * The direct block is already allocated and the file * extends past this block, thus this must be a whole * block. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider the need to allocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already at least as * big as we want. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we want, * grow it. */ error = ffs_realloccg(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, allocate a * new block or fragment. */ if (ip->i_ffs2_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { bp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) bp->b_bcount = nsize; bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs2_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; error = ufs_getlbns(vp, lbn, indirs, &num); if (error) return (error); #ifdef DIAGNOSTIC if (num < 1) panic("ffs2_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating it necessary. */ --num; nb = ip->i_ffs2_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(bp); if (error) goto fail; } unwindidx = 0; allocib = &ip->i_ffs2_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int64_t *) bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(nbp); if (error) { brelse(bp); goto fail; } } if (unwindidx < 0) unwindidx = i - 1; bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[num].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[num].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); if (unwindidx >= 0) { /* * First write out any buffers we've created to resolve their * softdeps. This must be done in reverse order of creation so * that we resolve the dependencies in one pass. * Write the cylinder group buffers for these buffers too. */ for (i = num; i >= unwindidx; i--) { if (i == 0) break; bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize, 0, 0); if (bp->b_flags & B_DELWRI) { nb = fsbtodb(fs, cgtod(fs, dtog(fs, dbtofsb(fs, bp->b_blkno)))); bwrite(bp); bp = getblk(ip->i_devvp, nb, (int) fs->fs_cgsize, 0, 0); if (bp->b_flags & B_DELWRI) bwrite(bp); else { bp->b_flags |= B_INVAL; brelse(bp); } } else { bp->b_flags |= B_INVAL; brelse(bp); } } if (DOINGSOFTDEP(vp) && unwindidx == 0) { ip->i_flag |= IN_CHANGE | IN_UPDATE; ffs_update(ip, 1); } /* * Now that any dependencies that we created have been * resolved, we can undo the partial allocation. */ if (unwindidx == 0) { *allocib = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (DOINGSOFTDEP(vp)) ffs_update(ip, 1); } else { r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("ffs2_balloc: unwind failed"); bap = (int64_t *) bp->b_data; bap[indirs[unwindidx].in_off] = 0; bwrite(bp); } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, 0); bp->b_flags |= B_INVAL; brelse(bp); } } for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void) ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs2_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }
static int udf_mountfs(struct vnode *devvp, struct mount *mp) { struct buf *bp = NULL; struct cdev *dev; struct anchor_vdp avdp; struct udf_mnt *udfmp = NULL; struct part_desc *pd; struct logvol_desc *lvd; struct fileset_desc *fsd; struct file_entry *root_fentry; uint32_t sector, size, mvds_start, mvds_end; uint32_t logical_secsize; uint32_t fsd_offset = 0; uint16_t part_num = 0, fsd_part = 0; int error = EINVAL; int logvol_found = 0, part_found = 0, fsd_found = 0; int bsize; struct g_consumer *cp; struct bufobj *bo; dev = devvp->v_rdev; dev_ref(dev); DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "udf", 0); g_topology_unlock(); PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); if (error) goto bail; bo = &devvp->v_bufobj; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; /* XXX: should be M_WAITOK */ udfmp = malloc(sizeof(struct udf_mnt), M_UDFMOUNT, M_NOWAIT | M_ZERO); if (udfmp == NULL) { printf("Cannot allocate UDF mount struct\n"); error = ENOMEM; goto bail; } mp->mnt_data = udfmp; mp->mnt_stat.f_fsid.val[0] = dev2udev(devvp->v_rdev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; MNT_IUNLOCK(mp); udfmp->im_mountp = mp; udfmp->im_dev = dev; udfmp->im_devvp = devvp; udfmp->im_d2l = NULL; udfmp->im_cp = cp; udfmp->im_bo = bo; #if 0 udfmp->im_l2d = NULL; #endif /* * The UDF specification defines a logical sectorsize of 2048 * for DVD media. */ logical_secsize = 2048; if (((logical_secsize % cp->provider->sectorsize) != 0) || (logical_secsize < cp->provider->sectorsize)) { error = EINVAL; goto bail; } bsize = cp->provider->sectorsize; /* * Get the Anchor Volume Descriptor Pointer from sector 256. * XXX Should also check sector n - 256, n, and 512. */ sector = 256; if ((error = bread(devvp, sector * btodb(logical_secsize), bsize, NOCRED, &bp)) != 0) goto bail; if ((error = udf_checktag((struct desc_tag *)bp->b_data, TAGID_ANCHOR))) goto bail; bcopy(bp->b_data, &avdp, sizeof(struct anchor_vdp)); brelse(bp); bp = NULL; /* * Extract the Partition Descriptor and Logical Volume Descriptor * from the Volume Descriptor Sequence. * XXX Should we care about the partition type right now? * XXX What about multiple partitions? */ mvds_start = le32toh(avdp.main_vds_ex.loc); mvds_end = mvds_start + (le32toh(avdp.main_vds_ex.len) - 1) / bsize; for (sector = mvds_start; sector < mvds_end; sector++) { if ((error = bread(devvp, sector * btodb(logical_secsize), bsize, NOCRED, &bp)) != 0) { printf("Can't read sector %d of VDS\n", sector); goto bail; } lvd = (struct logvol_desc *)bp->b_data; if (!udf_checktag(&lvd->tag, TAGID_LOGVOL)) { udfmp->bsize = le32toh(lvd->lb_size); udfmp->bmask = udfmp->bsize - 1; udfmp->bshift = ffs(udfmp->bsize) - 1; fsd_part = le16toh(lvd->_lvd_use.fsd_loc.loc.part_num); fsd_offset = le32toh(lvd->_lvd_use.fsd_loc.loc.lb_num); if (udf_find_partmaps(udfmp, lvd)) break; logvol_found = 1; } pd = (struct part_desc *)bp->b_data; if (!udf_checktag(&pd->tag, TAGID_PARTITION)) { part_found = 1; part_num = le16toh(pd->part_num); udfmp->part_len = le32toh(pd->part_len); udfmp->part_start = le32toh(pd->start_loc); } brelse(bp); bp = NULL; if ((part_found) && (logvol_found)) break; } if (!part_found || !logvol_found) { error = EINVAL; goto bail; } if (fsd_part != part_num) { printf("FSD does not lie within the partition!\n"); error = EINVAL; goto bail; } /* * Grab the Fileset Descriptor * Thanks to Chuck McCrobie <*****@*****.**> for pointing * me in the right direction here. */ sector = udfmp->part_start + fsd_offset; if ((error = RDSECTOR(devvp, sector, udfmp->bsize, &bp)) != 0) { printf("Cannot read sector %d of FSD\n", sector); goto bail; } fsd = (struct fileset_desc *)bp->b_data; if (!udf_checktag(&fsd->tag, TAGID_FSD)) { fsd_found = 1; bcopy(&fsd->rootdir_icb, &udfmp->root_icb, sizeof(struct long_ad)); } brelse(bp); bp = NULL; if (!fsd_found) { printf("Couldn't find the fsd\n"); error = EINVAL; goto bail; } /* * Find the file entry for the root directory. */ sector = le32toh(udfmp->root_icb.loc.lb_num) + udfmp->part_start; size = le32toh(udfmp->root_icb.len); if ((error = udf_readdevblks(udfmp, sector, size, &bp)) != 0) { printf("Cannot read sector %d\n", sector); goto bail; } root_fentry = (struct file_entry *)bp->b_data; if ((error = udf_checktag(&root_fentry->tag, TAGID_FENTRY))) { printf("Invalid root file entry!\n"); goto bail; } brelse(bp); bp = NULL; return 0; bail: if (udfmp != NULL) free(udfmp, M_UDFMOUNT); if (bp != NULL) brelse(bp); if (cp != NULL) { DROP_GIANT(); g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); PICKUP_GIANT(); } dev_rel(dev); return error; };
/* * Vnode op for write */ int spec_write(void *v) { struct vop_write_args *ap = v; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr64_t bn, bscale; int bsize; struct partinfo dpart; int n, on, majordev; int (*ioctl)(dev_t, u_long, caddr_t, int, struct proc *); int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((majordev = major(vp->v_rdev)) < nblkdev && (ioctl = bdevsw[majordev].d_ioctl) != NULL && (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { u_int32_t frag = DISKLABELV1_FFS_FRAG(dpart.part->p_fragblock); u_int32_t fsize = DISKLABELV1_FFS_FSIZE(dpart.part->p_fragblock); if (dpart.part->p_fstype == FS_BSDFFS && frag != 0 && fsize != 0) bsize = frag * fsize; } bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = min((bsize - on), uio->uio_resid); error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) bawrite(bp); else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ }
/* * Read and check superblock. * If it is an LFS, save information from the superblock. */ int try_lfs(void) { struct ufs_info *ufsinfo = &ufs_info; struct dlfs sblk, sblk2; struct dlfs *s = &sblk; daddr_t sbpos; int fsbshift; #ifdef DEBUG_WITH_STDIO printf("trying LFS\n"); #endif sbpos = btodb(LFS_LABELPAD); /* read primary superblock */ for (;;) { #ifdef DEBUG_WITH_STDIO printf("LFS: reading primary sblk at: 0x%x\n", (unsigned)sbpos); #endif RAW_READ(&sblk, sbpos, sizeof sblk); #ifdef DEBUG_WITH_STDIO printf("LFS: sblk: magic: 0x%x, version: %d\n", sblk.dlfs_magic, sblk.dlfs_version); #endif if (sblk.dlfs_magic != LFS_MAGIC) return 1; #ifdef DEBUG_WITH_STDIO printf("LFS: bsize %d, fsize %d, bshift %d, blktodb %d, fsbtodb %d, inopf %d, inopb %d\n", sblk.dlfs_bsize, sblk.dlfs_fsize, sblk.dlfs_bshift, sblk.dlfs_blktodb, sblk.dlfs_fsbtodb, sblk.dlfs_inopf, sblk.dlfs_inopb); #endif if ((fsi_lfs.version = sblk.dlfs_version) == 1) { fsbshift = 0; break; } else { daddr_t sbpos1; #if 0 fsbshift = sblk.dlfs_bshift - sblk.dlfs_blktodb + sblk.dlfs_fsbtodb - DEV_BSHIFT; #endif fsbshift = sblk.dlfs_fsbtodb; sbpos1 = sblk.dlfs_sboffs[0] << fsbshift; if (sbpos == sbpos1) break; #ifdef DEBUG_WITH_STDIO printf("LFS: correcting primary sblk location\n"); #endif sbpos = sbpos1; } } #ifdef DEBUG_WITH_STDIO printf("fsbshift: %d\n", fsbshift); printf("sboff[1]: %d\n", sblk.dlfs_sboffs[1]); #endif if (sblk.dlfs_sboffs[1] > 0) { #ifdef DEBUG_WITH_STDIO printf("LFS: reading secondary sblk at: 0x%x\n", sblk.dlfs_sboffs[1] << fsbshift); #endif /* read secondary superblock */ RAW_READ(&sblk2, (daddr_t) sblk.dlfs_sboffs[1] << fsbshift, sizeof sblk2); #ifdef DEBUG_WITH_STDIO printf("LFS: sblk2: magic: 0x%x, version: %d\n", sblk2.dlfs_magic, sblk2.dlfs_version); #endif if (sblk2.dlfs_magic == LFS_MAGIC) { if (fsi_lfs.version == 1) { if (sblk.dlfs_inopf > sblk2.dlfs_inopf) s = &sblk2; } else { if (sblk.dlfs_serial > sblk2.dlfs_serial) s = &sblk2; } } } /* This partition looks like an LFS. */ fsi.get_inode = get_lfs_inode; /* * version 1: disk addr is in disk sector --- no shifting * version 2: disk addr is in fragment */ fsi.fsbtodb = fsbshift; /* Get information from the superblock. */ fsi.bsize = s->dlfs_bsize; fsi.nindir = s->dlfs_nindir; fsi_lfs.idaddr = s->dlfs_idaddr; #if 0 fsi_lfs.ibsize = (fsi_lfs.version == 1) ? s->dlfs_bsize : s->dlfs_fsize; #else /* simplify calculation to reduce code size */ /* use fsi.bsize (larger than needed for v2, but probably no harm) */ #endif /* * version 1: number of inode per block * version 2: number of inode per fragment (but in dlfs_inopb) */ fsi_lfs.inopb = s->dlfs_inopb; fsi_lfs.ifpb = s->dlfs_ifpb; fsi_lfs.ioffset = s->dlfs_cleansz + s->dlfs_segtabsz; /* ifile is always used to look-up other inodes, so keep its inode. */ if (get_lfs_inode(LFS_IFILE_INUM, (union ufs_dinode *)&ifile_dinode)) return 1; /* OOPS, failed to find inode of ifile! */ fsi.fstype = UFSTYPE_LFS; return 0; }
int readliflabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, daddr_t *partoffp, int spoofonly) { struct lifdir *p; struct lifvol *lvp; int error = 0; daddr_t fsoff = 0, openbsdstart = MAXLIFSPACE; int i; /* read LIF volume header */ error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, btodb(LIF_VOLSTART))); if (error) return (error); lvp = (struct lifvol *)bp->b_data; if (lvp->vol_id != LIF_VOL_ID) { error = EINVAL; /* no LIF volume header */ goto done; } /* read LIF directory */ error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, lifstodb(lvp->vol_addr))); if (error) goto done; /* scan for LIF_DIR_FS dir entry */ for (i=0, p=(struct lifdir *)bp->b_data; i < LIF_NUMDIR; p++, i++) { if (p->dir_type == LIF_DIR_FS || p->dir_type == LIF_DIR_HPLBL) break; } if (p->dir_type == LIF_DIR_FS) { fsoff = lifstodb(p->dir_addr); openbsdstart = 0; goto finished; } /* Only came here to find the offset... */ if (partoffp) goto finished; if (p->dir_type == LIF_DIR_HPLBL) { struct hpux_label *hl; struct partition *pp; u_int8_t fstype; int i; /* read LIF directory */ error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, lifstodb(p->dir_addr))); if (error) goto done; hl = (struct hpux_label *)bp->b_data; if (hl->hl_magic1 != hl->hl_magic2 || hl->hl_magic != HPUX_MAGIC || hl->hl_version != 1) { error = EINVAL; /* HPUX label magic mismatch */ goto done; } lp->d_bbsize = 8192; lp->d_sbsize = 8192; for (i = 0; i < MAXPARTITIONS; i++) { DL_SETPSIZE(&lp->d_partitions[i], 0); DL_SETPOFFSET(&lp->d_partitions[i], 0); lp->d_partitions[i].p_fstype = 0; } for (i = 0; i < HPUX_MAXPART; i++) { if (!hl->hl_flags[i]) continue; if (hl->hl_flags[i] == HPUX_PART_ROOT) { pp = &lp->d_partitions[0]; fstype = FS_BSDFFS; } else if (hl->hl_flags[i] == HPUX_PART_SWAP) { pp = &lp->d_partitions[1]; fstype = FS_SWAP; } else if (hl->hl_flags[i] == HPUX_PART_BOOT) { pp = &lp->d_partitions[RAW_PART + 1]; fstype = FS_BSDFFS; } else continue; DL_SETPSIZE(pp, hl->hl_parts[i].hlp_length * 2); DL_SETPOFFSET(pp, hl->hl_parts[i].hlp_start * 2); pp->p_fstype = fstype; } DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp)); DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0); lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; lp->d_npartitions = MAXPARTITIONS; lp->d_magic = DISKMAGIC; lp->d_magic2 = DISKMAGIC; lp->d_version = 1; lp->d_checksum = 0; lp->d_checksum = dkcksum(lp); /* drop through */ } finished: /* record the OpenBSD partition's placement for the caller */ if (partoffp) *partoffp = fsoff; else { DL_SETBSTART(lp, DL_BLKTOSEC(lp, openbsdstart)); DL_SETBEND(lp, DL_GETDSIZE(lp)); /* XXX */ } /* don't read the on-disk label if we are in spoofed-only mode */ if (spoofonly) goto done; error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, fsoff + LABELSECTOR)); if (error) goto done; /* * Do OpenBSD disklabel validation/adjustment. * * N.B: No matter what the bits are on the disk, we now have the * OpenBSD disklabel for this lif disk. DO NOT proceed to * readdoslabel(), iso_spooflabel(), etc. */ checkdisklabel(bp->b_data, lp, openbsdstart, DL_GETDSIZE(lp)); error = 0; done: return (error); }