static int txp_attach(device_t dev) { struct txp_softc *sc; struct ifnet *ifp; uint16_t p1; uint32_t p2; uint8_t enaddr[ETHER_ADDR_LEN]; int error = 0, rid; sc = device_get_softc(dev); callout_init(&sc->txp_stat_timer); ifp = &sc->sc_arpcom.ac_if; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); pci_enable_busmaster(dev); rid = TXP_RID; sc->sc_res = bus_alloc_resource_any(dev, TXP_RES, &rid, RF_ACTIVE); if (sc->sc_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); return(ENXIO); } sc->sc_bt = rman_get_bustag(sc->sc_res); sc->sc_bh = rman_get_bushandle(sc->sc_res); /* Allocate interrupt */ rid = 0; sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->sc_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } if (txp_chip_init(sc)) { error = ENXIO; goto fail; } sc->sc_fwbuf = contigmalloc(32768, M_DEVBUF, M_WAITOK, 0, 0xffffffff, PAGE_SIZE, 0); error = txp_download_fw(sc); contigfree(sc->sc_fwbuf, 32768, M_DEVBUF); sc->sc_fwbuf = NULL; if (error) goto fail; sc->sc_ldata = contigmalloc(sizeof(struct txp_ldata), M_DEVBUF, M_WAITOK | M_ZERO, 0, 0xffffffff, PAGE_SIZE, 0); if (txp_alloc_rings(sc)) { error = ENXIO; goto fail; } if (txp_command(sc, TXP_CMD_MAX_PKT_SIZE_WRITE, TXP_MAX_PKTLEN, 0, 0, NULL, NULL, NULL, 1)) { error = ENXIO; goto fail; } if (txp_command(sc, TXP_CMD_STATION_ADDRESS_READ, 0, 0, 0, &p1, &p2, NULL, 1)) { error = ENXIO; goto fail; } txp_set_filter(sc); enaddr[0] = ((uint8_t *)&p1)[1]; enaddr[1] = ((uint8_t *)&p1)[0]; enaddr[2] = ((uint8_t *)&p2)[3]; enaddr[3] = ((uint8_t *)&p2)[2]; enaddr[4] = ((uint8_t *)&p2)[1]; enaddr[5] = ((uint8_t *)&p2)[0]; ifmedia_init(&sc->sc_ifmedia, 0, txp_ifmedia_upd, txp_ifmedia_sts); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX|IFM_HDX, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); sc->sc_xcvr = TXP_XCVR_AUTO; txp_command(sc, TXP_CMD_XCVR_SELECT, TXP_XCVR_AUTO, 0, 0, NULL, NULL, NULL, 0); ifmedia_set(&sc->sc_ifmedia, IFM_ETHER|IFM_AUTO); ifp->if_softc = sc; ifp->if_mtu = ETHERMTU; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = txp_ioctl; ifp->if_start = txp_start; ifp->if_watchdog = txp_watchdog; ifp->if_init = txp_init; ifp->if_baudrate = 100000000; ifq_set_maxlen(&ifp->if_snd, TX_ENTRIES); ifq_set_ready(&ifp->if_snd); ifp->if_hwassist = 0; txp_capabilities(sc); ether_ifattach(ifp, enaddr, NULL); error = bus_setup_intr(dev, sc->sc_irq, INTR_MPSAFE, txp_intr, sc, &sc->sc_intrhand, ifp->if_serializer); if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); goto fail; } ifp->if_cpuid = ithread_cpuid(rman_get_start(sc->sc_irq)); KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus); return(0); fail: txp_release_resources(dev); return(error); }
/* * Create a snapshot of the specified {parent, ochain} with the specified * label. The originating hammer2_inode must be exclusively locked for * safety. * * The ioctl code has already synced the filesystem. */ int hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster, hammer2_ioc_pfs_t *pfs) { hammer2_mount_t *hmp; hammer2_cluster_t *ncluster; const hammer2_inode_data_t *ipdata; hammer2_inode_data_t *wipdata; hammer2_inode_t *nip; size_t name_len; hammer2_key_t lhc; struct vattr vat; uuid_t opfs_clid; int error; kprintf("snapshot %s\n", pfs->name); name_len = strlen(pfs->name); lhc = hammer2_dirhash(pfs->name, name_len); ipdata = &hammer2_cluster_data(ocluster)->ipdata; opfs_clid = ipdata->pfs_clid; hmp = ocluster->focus->hmp; /* * Create the snapshot directory under the super-root * * Set PFS type, generate a unique filesystem id, and generate * a cluster id. Use the same clid when snapshotting a PFS root, * which theoretically allows the snapshot to be used as part of * the same cluster (perhaps as a cache). * * Copy the (flushed) blockref array. Theoretically we could use * chain_duplicate() but it becomes difficult to disentangle * the shared core so for now just brute-force it. */ VATTR_NULL(&vat); vat.va_type = VDIR; vat.va_mode = 0755; ncluster = NULL; nip = hammer2_inode_create(trans, hmp->spmp->iroot, &vat, proc0.p_ucred, pfs->name, name_len, &ncluster, &error); if (nip) { wipdata = hammer2_cluster_modify_ip(trans, nip, ncluster, 0); wipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT; kern_uuidgen(&wipdata->pfs_fsid, 1); if (ocluster->focus->flags & HAMMER2_CHAIN_PFSROOT) wipdata->pfs_clid = opfs_clid; else kern_uuidgen(&wipdata->pfs_clid, 1); hammer2_cluster_set_chainflags(ncluster, HAMMER2_CHAIN_PFSROOT); /* XXX hack blockset copy */ /* XXX doesn't work with real cluster */ KKASSERT(ocluster->nchains == 1); wipdata->u.blockset = ocluster->focus->data->ipdata.u.blockset; hammer2_cluster_modsync(ncluster); hammer2_inode_unlock_ex(nip, ncluster); } return (error); }
/* * Locate first match or overlap under parent, return a new cluster */ hammer2_cluster_t * hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, int flags, int *ddflagp) { hammer2_pfsmount_t *pmp; hammer2_cluster_t *cluster; hammer2_chain_t *chain; hammer2_key_t key_accum; hammer2_key_t key_next; hammer2_key_t bref_key; int bref_keybits; int null_count; int ddflag; int i; uint8_t bref_type; u_int bytes; pmp = cparent->pmp; /* can be NULL */ key_accum = *key_nextp; null_count = 0; bref_type = 0; bref_key = 0; bref_keybits = 0; bytes = 0; cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); cluster->pmp = pmp; /* can be NULL */ cluster->refs = 1; /* cluster->focus = NULL; already null */ cparent->focus = NULL; *ddflagp = 0; for (i = 0; i < cparent->nchains; ++i) { key_next = *key_nextp; if (cparent->array[i] == NULL) { ++null_count; continue; } chain = hammer2_chain_lookup(&cparent->array[i], &key_next, key_beg, key_end, &cparent->cache_index[i], flags, &ddflag); if (cparent->focus == NULL) cparent->focus = cparent->array[i]; cluster->array[i] = chain; if (chain == NULL) { ++null_count; } else { if (cluster->focus == NULL) { bref_type = chain->bref.type; bref_key = chain->bref.key; bref_keybits = chain->bref.keybits; bytes = chain->bytes; *ddflagp = ddflag; cluster->focus = chain; } KKASSERT(bref_type == chain->bref.type); KKASSERT(bref_key == chain->bref.key); KKASSERT(bref_keybits == chain->bref.keybits); KKASSERT(bytes == chain->bytes); KKASSERT(*ddflagp == ddflag); } if (key_accum > key_next) key_accum = key_next; } *key_nextp = key_accum; cluster->nchains = i; if (null_count == i) { hammer2_cluster_drop(cluster); cluster = NULL; } return (cluster); }
/* * Flush waiting shared locks. The lock's prior state is passed in and must * be adjusted atomically only if it matches and LINKSPIN is not set. * * IMPORTANT! The caller has left one active count on the lock for us to * consume. We will apply this to the first link, but must add * additional counts for any other links. */ static int mtx_chain_link_sh(mtx_t *mtx, u_int olock) { thread_t td = curthread; mtx_link_t *link; u_int addcount; u_int nlock; olock &= ~MTX_LINKSPIN; nlock = olock | MTX_LINKSPIN; nlock &= ~MTX_EXCLUSIVE; crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { /* * It should not be possible for SHWANTED to be set without * any links pending. */ KKASSERT(mtx->mtx_shlink != NULL); /* * We have to process the count for all shared locks before * we process any of the links. Count the additional shared * locks beyond the first link (which is already accounted * for) and associate the full count with the lock * immediately. */ addcount = 0; for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink; link = link->next) { ++addcount; } if (addcount > 0) atomic_add_int(&mtx->mtx_lock, addcount); /* * We can wakeup all waiting shared locks. */ while ((link = mtx->mtx_shlink) != NULL) { KKASSERT(link->state == MTX_LINK_LINKED_SH); if (link->next == link) { mtx->mtx_shlink = NULL; } else { mtx->mtx_shlink = link->next; link->next->prev = link->prev; link->prev->next = link->next; } link->next = NULL; link->prev = NULL; cpu_sfence(); if (link->callback) { link->state = MTX_LINK_CALLEDBACK; link->callback(link, link->arg, 0); } else { cpu_sfence(); link->state = MTX_LINK_ACQUIRED; wakeup(link); } } atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | MTX_SHWANTED); crit_exit_raw(td); return 1; } /* retry */ crit_exit_raw(td); return 0; }
/* * Strategy routine called from dm_strategy. */ static int dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp) { dm_target_stripe_config_t *tsc; struct bio *bio = &bp->b_bio1; struct buf *nestbuf; uint64_t blkno, blkoff; uint64_t stripe, blknr; uint32_t stripe_off, stripe_rest, num_blks, issue_blks; int devnr; tsc = table_en->target_config; if (tsc == NULL) return 0; /* calculate extent of request */ KKASSERT(bp->b_resid % DEV_BSIZE == 0); switch(bp->b_cmd) { case BUF_CMD_READ: case BUF_CMD_WRITE: case BUF_CMD_FREEBLKS: /* * Loop through to individual operations */ blkno = bp->b_bio1.bio_offset / DEV_BSIZE; blkoff = 0; num_blks = bp->b_resid / DEV_BSIZE; nestiobuf_init(bio); while (num_blks > 0) { /* blockno to strip piece nr */ stripe = blkno / tsc->stripe_chunksize; stripe_off = blkno % tsc->stripe_chunksize; /* where we are inside the strip */ devnr = stripe % tsc->stripe_num; blknr = stripe / tsc->stripe_num; /* how much is left before we hit a boundary */ stripe_rest = tsc->stripe_chunksize - stripe_off; /* issue this piece on stripe `stripe' */ issue_blks = MIN(stripe_rest, num_blks); nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, blkoff, issue_blks * DEV_BSIZE, NULL); /* I need number of bytes. */ nestbuf->b_bio1.bio_offset = blknr * tsc->stripe_chunksize + stripe_off; nestbuf->b_bio1.bio_offset += tsc->stripe_devs[devnr].offset; nestbuf->b_bio1.bio_offset *= DEV_BSIZE; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); blkno += issue_blks; blkoff += issue_blks * DEV_BSIZE; num_blks -= issue_blks; } nestiobuf_start(bio); break; case BUF_CMD_FLUSH: nestiobuf_init(bio); for (devnr = 0; devnr < tsc->stripe_num; ++devnr) { nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, 0, 0, NULL); nestbuf->b_bio1.bio_offset = 0; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); } nestiobuf_start(bio); break; default: bp->b_flags |= B_ERROR; bp->b_error = EIO; biodone(bio); break; } return 0; }
/* * Vnode op for VM putpages. * possible bug: all IO done in sync mode * Note that vop_close always invalidate pages before close, so it's * not necessary to open vnode. * * nwfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, * int a_sync, int *a_rtvals, vm_ooffset_t a_offset) */ int nwfs_putpages(struct vop_putpages_args *ap) { int error; struct thread *td = curthread; /* XXX */ struct vnode *vp = ap->a_vp; struct ucred *cred; #ifndef NWFS_RWCACHE KKASSERT(td->td_proc); cred = td->td_proc->p_ucred; /* XXX */ VOP_OPEN(vp, FWRITE, cred, NULL); error = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); VOP_CLOSE(vp, FWRITE, cred); return error; #else struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; int i, npages, count; int *rtvals; struct nwmount *nmp; struct nwnode *np; vm_page_t *pages; KKASSERT(td->td_proc); cred = td->td_proc->p_ucred; /* XXX */ /* VOP_OPEN(vp, FWRITE, cred, NULL);*/ np = VTONW(vp); nmp = VFSTONWFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; rtvals = ap->a_rtvals; npages = btoc(count); for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_AGAIN; } bp = getpbuf_kva(&nwfs_pbuf_freecnt); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; NCPVNDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid); error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, &uio, cred); /* VOP_CLOSE(vp, FWRITE, cred);*/ NCPVNDEBUG("paged write done: %d\n", error); pmap_qremove(kva, npages); relpbuf(bp, &nwfs_pbuf_freecnt); if (!error) { int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; for (i = 0; i < nwritten; i++) { rtvals[i] = VM_PAGER_OK; vm_page_undirty(pages[i]); } } return rtvals[0]; #endif /* NWFS_RWCACHE */ }
/* * Remove a directory entry. At this point the file represented by the * directory entry to be removed is still full length until noone has it * open. When the file no longer being used msdosfs_inactive() is called * and will truncate the file to 0 length. When the vnode containing the * denode is needed for some other purpose by VFS it will call * msdosfs_reclaim() which will remove the denode from the denode cache. */ int removede(struct denode *pdep, /* directory where the entry is removed */ struct denode *dep) /* file to be removed */ { int error; struct direntry *ep; struct buf *bp; daddr_t bn; int blsize; struct msdosfsmount *pmp = pdep->de_pmp; u_long offset = pdep->de_fndoffset; #ifdef MSDOSFS_DEBUG kprintf("removede(): filename %s, dep %p, offset %08lx\n", dep->de_Name, dep, offset); #endif KKASSERT(dep->de_refcnt > 0); dep->de_refcnt--; offset += sizeof(struct direntry); do { offset -= sizeof(struct direntry); error = pcbmap(pdep, de_cluster(pmp, offset), &bn, NULL, &blsize); if (error) return error; error = bread(pmp->pm_devvp, de_bntodoff(pmp, bn), blsize, &bp); if (error) { brelse(bp); return error; } ep = bptoep(pmp, bp, offset); /* * Check whether, if we came here the second time, i.e. * when underflowing into the previous block, the last * entry in this block is a longfilename entry, too. */ if (ep->deAttributes != ATTR_WIN95 && offset != pdep->de_fndoffset) { brelse(bp); break; } offset += sizeof(struct direntry); while (1) { /* * We are a bit agressive here in that we delete any Win95 * entries preceding this entry, not just the ones we "own". * Since these presumably aren't valid anyway, * there should be no harm. */ offset -= sizeof(struct direntry); ep--->deName[0] = SLOT_DELETED; if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) || !(offset & pmp->pm_crbomask) || ep->deAttributes != ATTR_WIN95) break; } if ((error = bwrite(bp)) != 0) return error; } while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95) && !(offset & pmp->pm_crbomask) && offset); return 0; }
static int tmpfs_nrmdir(struct vop_nrmdir_args *v) { struct vnode *dvp = v->a_dvp; struct namecache *ncp = v->a_nch->ncp; struct vnode *vp; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; int error; /* * We have to acquire the vp from v->a_nch because we will likely * unresolve the namecache entry, and a vrele/vput is needed to * trigger the tmpfs_inactive/tmpfs_reclaim sequence. * * We have to use vget to clear any inactive state on the vnode, * otherwise the vnode may remain inactive and thus tmpfs_inactive * will not get called when we release it. */ error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp); KKASSERT(error == 0); vn_unlock(vp); /* * Prevalidate so we don't hit an assertion later */ if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* Directories with more than two entries ('.' and '..') cannot be * removed. */ if (node->tn_size > 0) { error = ENOTEMPTY; goto out; } if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* This invariant holds only if we are not trying to remove "..". * We checked for that above so this is safe now. */ KKASSERT(node->tn_dir.tn_parent == dnode); /* Get the directory entry associated with node (vp). This was * filled by tmpfs_lookup while looking up the entry. */ de = tmpfs_dir_lookup(dnode, node, ncp); KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen)); /* Check flags to see if we are allowed to remove the directory. */ if ((dnode->tn_flags & APPEND) || node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dnode, de); /* No vnode should be allocated for this entry from this point */ TMPFS_NODE_LOCK(node); TMPFS_ASSERT_ELOCKED(node); TMPFS_NODE_LOCK(dnode); TMPFS_ASSERT_ELOCKED(dnode); #if 0 /* handled by tmpfs_free_node */ KKASSERT(node->tn_links > 0); node->tn_links--; node->tn_dir.tn_parent = NULL; #endif node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; #if 0 /* handled by tmpfs_free_node */ KKASSERT(dnode->tn_links > 0); dnode->tn_links--; #endif dnode->tn_status |= TMPFS_NODE_ACCESSED | \ TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(dnode); TMPFS_NODE_UNLOCK(node); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ TMPFS_NODE_LOCK(dnode); dnode->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(dnode); tmpfs_update(dvp); cache_setunresolved(v->a_nch); cache_setvp(v->a_nch, NULL); /*cache_inval_vp(vp, CINV_DESTROY);*/ tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); error = 0; out: vrele(vp); return error; }
static int tmpfs_readdir(struct vop_readdir_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int *eofflag = v->a_eofflag; off_t **cookies = v->a_cookies; int *ncookies = v->a_ncookies; struct tmpfs_mount *tmp; int error; off_t startoff; off_t cnt = 0; struct tmpfs_node *node; /* This operation only makes sense on directory nodes. */ if (vp->v_type != VDIR) return ENOTDIR; tmp = VFS_TO_TMPFS(vp->v_mount); node = VP_TO_TMPFS_DIR(vp); startoff = uio->uio_offset; if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { error = tmpfs_dir_getdotdent(node, uio); if (error != 0) goto outok; cnt++; } if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { error = tmpfs_dir_getdotdotdent(tmp, node, uio); if (error != 0) goto outok; cnt++; } error = tmpfs_dir_getdents(node, uio, &cnt); outok: KKASSERT(error >= -1); if (error == -1) error = 0; if (eofflag != NULL) *eofflag = (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); /* Update NFS-related variables. */ if (error == 0 && cookies != NULL && ncookies != NULL) { off_t i; off_t off = startoff; struct tmpfs_dirent *de = NULL; *ncookies = cnt; *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); for (i = 0; i < cnt; i++) { KKASSERT(off != TMPFS_DIRCOOKIE_EOF); if (off == TMPFS_DIRCOOKIE_DOT) { off = TMPFS_DIRCOOKIE_DOTDOT; } else { if (off == TMPFS_DIRCOOKIE_DOTDOT) { de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); } else if (de != NULL) { de = TAILQ_NEXT(de, td_entries); } else { de = tmpfs_dir_lookupbycookie(node, off); KKASSERT(de != NULL); de = TAILQ_NEXT(de, td_entries); } if (de == NULL) off = TMPFS_DIRCOOKIE_EOF; else off = tmpfs_dircookie(de); } (*cookies)[i] = off; } KKASSERT(uio->uio_offset == off); } return error; }
/* * Remote IPI for callout_reset_bycpu(). The operation is performed only * on the 1->0 transition of the counter, otherwise there are callout_stop()s * pending after us. * * The IPI counter and PENDING flags must be set atomically with the * 1->0 transition. The ACTIVE flag was set prior to the ipi being * sent and we do not want to race a caller on the original cpu trying * to deactivate() the flag concurrent with our installation of the * callout. */ static void callout_reset_ipi(void *arg) { struct callout *c = arg; globaldata_t gd = mycpu; globaldata_t tgd; int flags; int nflags; for (;;) { flags = c->c_flags; cpu_ccfence(); KKASSERT((flags & CALLOUT_IPI_MASK) > 0); /* * We should already be armed for our cpu, if armed to another * cpu, chain the IPI. If for some reason we are not armed, * we can arm ourselves. */ if (flags & CALLOUT_ARMED) { if (CALLOUT_FLAGS_TO_CPU(flags) != gd->gd_cpuid) { tgd = globaldata_find( CALLOUT_FLAGS_TO_CPU(flags)); lwkt_send_ipiq(tgd, callout_reset_ipi, c); return; } nflags = (flags & ~CALLOUT_EXECUTED); } else { nflags = (flags & ~(CALLOUT_CPU_MASK | CALLOUT_EXECUTED)) | CALLOUT_ARMED | CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid); } /* * Decrement the IPI count, retain and clear the WAITING * status, clear EXECUTED. * * NOTE: It is possible for the callout to already have been * marked pending due to SMP races. */ nflags = nflags - 1; if ((flags & CALLOUT_IPI_MASK) == 1) { nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); nflags |= CALLOUT_PENDING; } if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Only install the callout on the 1->0 transition * of the IPI count, and only if PENDING was not * already set. The latter situation should never * occur but we check anyway. */ if ((flags & (CALLOUT_PENDING|CALLOUT_IPI_MASK)) == 1) { softclock_pcpu_t sc; sc = &softclock_pcpu_ary[gd->gd_cpuid]; c->c_time = sc->curticks + c->c_load; TAILQ_INSERT_TAIL( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); } break; } /* retry */ cpu_pause(); } /* * Issue wakeup if requested. */ if (flags & CALLOUT_WAITING) wakeup(c); }
/* * Stop a running timer and ensure that any running callout completes before * returning. If the timer is running on another cpu this function may block * to interlock against the callout. If the callout is currently executing * or blocked in another thread this function may also block to interlock * against the callout. * * The caller must be careful to avoid deadlocks, either by using * callout_init_lk() (which uses the lockmgr lock cancelation feature), * by using tokens and dealing with breaks in the serialization, or using * the lockmgr lock cancelation feature yourself in the callout callback * function. * * callout_stop() returns non-zero if the callout was pending. */ static int _callout_stop(struct callout *c, int issync) { globaldata_t gd = mycpu; globaldata_t tgd; softclock_pcpu_t sc; int flags; int nflags; int rc; int cpuid; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_stop(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif crit_enter_gd(gd); /* * Fast path operations: * * If ARMED and owned by our cpu, or not ARMED, and other simple * conditions are met, we can just clear ACTIVE and EXECUTED * and we are done. */ for (;;) { flags = c->c_flags; cpu_ccfence(); cpuid = CALLOUT_FLAGS_TO_CPU(flags); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count for the IPI we intend to send and break out of * the fast path to enter the slow path. */ if (flags & CALLOUT_ARMED) { if (gd->gd_cpuid != cpuid) { nflags = flags + 1; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* break to slow path */ break; } continue; /* retry */ } } else { cpuid = gd->gd_cpuid; KKASSERT((flags & CALLOUT_IPI_MASK) == 0); KKASSERT((flags & CALLOUT_PENDING) == 0); } /* * Process pending IPIs and retry (only if not called from * an IPI). */ if (flags & CALLOUT_IPI_MASK) { lwkt_process_ipiq(); continue; /* retry */ } /* * Transition to the stopped state, recover the EXECUTED * status. If pending we cannot clear ARMED until after * we have removed (c) from the callwheel. * * NOTE: The callout might already not be armed but in this * case it should also not be pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_EXECUTED | CALLOUT_WAITING | CALLOUT_PENDING); /* NOTE: IPI_MASK already tested */ if ((flags & CALLOUT_PENDING) == 0) nflags &= ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); goto skip_slow; } /* retry */ } /* * Slow path (and not called via an IPI). * * When ARMED to a different cpu the stop must be processed on that * cpu. Issue the IPI and wait for completion. We have already * incremented the IPI count. */ tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); for (;;) { int flags; int nflags; flags = c->c_flags; cpu_ccfence(); if ((flags & CALLOUT_IPI_MASK) == 0) /* fast path */ break; nflags = flags | CALLOUT_WAITING; tsleep_interlock(c, 0); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { tsleep(c, PINTERLOCKED, "cstp1", 0); } } skip_slow: /* * If (issync) we must also wait for any in-progress callbacks to * complete, unless the stop is being executed from the callback * itself. The EXECUTED flag is set prior to the callback * being made so our existing flags status already has it. * * If auto-lock mode is being used, this is where we cancel any * blocked lock that is potentially preventing the target cpu * from completing the callback. */ while (issync) { intptr_t *runp; intptr_t runco; sc = &softclock_pcpu_ary[cpuid]; if (gd->gd_curthread == &sc->thread) /* stop from cb */ break; runp = &sc->running; runco = *runp; cpu_ccfence(); if ((runco & ~(intptr_t)1) != (intptr_t)c) break; if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_BEG); tsleep_interlock(c, 0); if (atomic_cmpset_long(runp, runco, runco | 1)) tsleep(c, PINTERLOCKED, "cstp3", 0); if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_END); } crit_exit_gd(gd); rc = (flags & CALLOUT_EXECUTED) != 0; return rc; }
/* * This procedure is the main loop of our per-cpu helper thread. The * sc->isrunning flag prevents us from racing hardclock_softtick() and * a critical section is sufficient to interlock sc->curticks and protect * us from remote IPI's / list removal. * * The thread starts with the MP lock released and not in a critical * section. The loop itself is MP safe while individual callbacks * may or may not be, so we obtain or release the MP lock as appropriate. */ static void softclock_handler(void *arg) { softclock_pcpu_t sc; struct callout *c; struct callout_tailq *bucket; struct callout slotimer; int mpsafe = 1; int flags; /* * Setup pcpu slow clocks which we want to run from the callout * thread. */ callout_init_mp(&slotimer); callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer); /* * Run the callout thread at the same priority as other kernel * threads so it can be round-robined. */ /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ /* * Loop critical section against ipi operations to this cpu. */ sc = arg; crit_enter(); loop: while (sc->softticks != (int)(sc->curticks + 1)) { bucket = &sc->callwheel[sc->softticks & cwheelmask]; for (c = TAILQ_FIRST(bucket); c; c = sc->next) { if (c->c_time != sc->softticks) { sc->next = TAILQ_NEXT(c, c_links.tqe); continue; } flags = c->c_flags; if (flags & CALLOUT_MPSAFE) { if (mpsafe == 0) { mpsafe = 1; rel_mplock(); } } else { /* * The request might be removed while we * are waiting to get the MP lock. If it * was removed sc->next will point to the * next valid request or NULL, loop up. */ if (mpsafe) { mpsafe = 0; sc->next = c; get_mplock(); if (c != sc->next) continue; } } /* * Queue protection only exists while we hold the * critical section uninterrupted. * * Adjust sc->next when removing (c) from the queue, * note that an IPI on this cpu may make further * adjustments to sc->next. */ sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(bucket, c, c_links.tqe); KASSERT((c->c_flags & CALLOUT_ARMED) && (c->c_flags & CALLOUT_PENDING) && CALLOUT_FLAGS_TO_CPU(c->c_flags) == mycpu->gd_cpuid, ("callout %p: bad flags %08x", c, c->c_flags)); /* * Once CALLOUT_PENDING is cleared, sc->running * protects the callout structure's existance but * only until we call c_func(). A callout_stop() * or callout_reset() issued from within c_func() * will not block. The callout can also be kfree()d * by c_func(). * * We set EXECUTED before calling c_func() so a * callout_stop() issued from within c_func() returns * the correct status. */ if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) == (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) { void (*c_func)(void *); void *c_arg; struct lock *c_lk; int error; /* * NOTE: sc->running must be set prior to * CALLOUT_PENDING being cleared to * avoid missed CANCELs and *_stop() * races. */ sc->running = (intptr_t)c; c_func = c->c_func; c_arg = c->c_arg; c_lk = c->c_lk; c->c_func = NULL; KKASSERT(c->c_flags & CALLOUT_DID_INIT); flags = callout_unpend_disarm(c); error = lockmgr(c_lk, LK_EXCLUSIVE | LK_CANCELABLE); if (error == 0) { atomic_set_int(&c->c_flags, CALLOUT_EXECUTED); crit_exit(); c_func(c_arg); crit_enter(); lockmgr(c_lk, LK_RELEASE); } } else if (flags & CALLOUT_ACTIVE) { void (*c_func)(void *); void *c_arg; sc->running = (intptr_t)c; c_func = c->c_func; c_arg = c->c_arg; c->c_func = NULL; KKASSERT(c->c_flags & CALLOUT_DID_INIT); flags = callout_unpend_disarm(c); atomic_set_int(&c->c_flags, CALLOUT_EXECUTED); crit_exit(); c_func(c_arg); crit_enter(); } else { flags = callout_unpend_disarm(c); } /* * Read and clear sc->running. If bit 0 was set, * a callout_stop() is likely blocked waiting for * the callback to complete. * * The sigclear above also cleared CALLOUT_WAITING * and returns the contents of flags prior to clearing * any bits. * * Interlock wakeup any _stop's waiting on us. Note * that once c_func() was called, the callout * structure (c) pointer may no longer be valid. It * can only be used for the wakeup. */ if ((atomic_readandclear_ptr(&sc->running) & 1) || (flags & CALLOUT_WAITING)) { wakeup(c); } /* NOTE: list may have changed */ } ++sc->softticks; } /* * Don't leave us holding the MP lock when we deschedule ourselves. */ if (mpsafe == 0) { mpsafe = 1; rel_mplock(); } sc->isrunning = 0; lwkt_deschedule_self(&sc->thread); /* == curthread */ lwkt_switch(); goto loop; /* NOT REACHED */ }
/* * Attempt to acquire a shared or exclusive token. Returns TRUE on success, * FALSE on failure. * * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive * token, otherwise are attempting to get a shared token. * * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise * it is a non-blocking operation (for both exclusive or shared acquisions). */ static __inline int _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode) { lwkt_token_t tok; lwkt_tokref_t oref; long count; tok = ref->tr_tok; KASSERT(((mode & TOK_EXCLREQ) == 0 || /* non blocking */ td->td_gd->gd_intr_nesting_level == 0 || panic_cpu_gd == mycpu), ("Attempt to acquire token %p not already " "held in hard code section", tok)); if (mode & TOK_EXCLUSIVE) { /* * Attempt to get an exclusive token */ for (;;) { count = tok->t_count; oref = tok->t_ref; /* can be NULL */ cpu_ccfence(); if ((count & ~TOK_EXCLREQ) == 0) { /* * It is possible to get the exclusive bit. * We must clear TOK_EXCLREQ on successful * acquisition. */ if (atomic_cmpset_long(&tok->t_count, count, (count & ~TOK_EXCLREQ) | TOK_EXCLUSIVE)) { KKASSERT(tok->t_ref == NULL); tok->t_ref = ref; return TRUE; } /* retry */ } else if ((count & TOK_EXCLUSIVE) && oref >= &td->td_toks_base && oref < td->td_toks_stop) { /* * Our thread already holds the exclusive * bit, we treat this tokref as a shared * token (sorta) to make the token release * code easier. * * NOTE: oref cannot race above if it * happens to be ours, so we're good. * But we must still have a stable * variable for both parts of the * comparison. * * NOTE: Since we already have an exclusive * lock and don't need to check EXCLREQ * we can just use an atomic_add here */ atomic_add_long(&tok->t_count, TOK_INCR); ref->tr_count &= ~TOK_EXCLUSIVE; return TRUE; } else if ((mode & TOK_EXCLREQ) && (count & TOK_EXCLREQ) == 0) { /* * Unable to get the exclusive bit but being * asked to set the exclusive-request bit. * Since we are going to retry anyway just * set the bit unconditionally. */ atomic_set_long(&tok->t_count, TOK_EXCLREQ); return FALSE; } else { /* * Unable to get the exclusive bit and not * being asked to set the exclusive-request * (aka lwkt_trytoken()), or EXCLREQ was * already set. */ cpu_pause(); return FALSE; } /* retry */ } } else { /* * Attempt to get a shared token. Note that TOK_EXCLREQ * for shared tokens simply means the caller intends to * block. We never actually set the bit in tok->t_count. */ for (;;) { count = tok->t_count; oref = tok->t_ref; /* can be NULL */ cpu_ccfence(); if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) { /* XXX EXCLREQ should work */ /* * It is possible to get the token shared. */ if (atomic_cmpset_long(&tok->t_count, count, count + TOK_INCR)) { return TRUE; } /* retry */ } else if ((count & TOK_EXCLUSIVE) && oref >= &td->td_toks_base && oref < td->td_toks_stop) { /* * We own the exclusive bit on the token so * we can in fact also get it shared. */ atomic_add_long(&tok->t_count, TOK_INCR); return TRUE; } else { /* * We failed to get the token shared */ return FALSE; } /* retry */ } } }
/* * Do all IO operations on dm logical devices. */ static int dmstrategy(struct dev_strategy_args *ap) { cdev_t dev = ap->a_head.a_dev; struct bio *bio = ap->a_bio; struct buf *bp = bio->bio_buf; int bypass; dm_dev_t *dmv; dm_table_t *tbl; dm_table_entry_t *table_en; struct buf *nestbuf; uint32_t dev_type; uint64_t buf_start, buf_len, issued_len; uint64_t table_start, table_end; uint64_t start, end; buf_start = bio->bio_offset; buf_len = bp->b_bcount; tbl = NULL; table_end = 0; dev_type = 0; issued_len = 0; dmv = dev->si_drv1; switch(bp->b_cmd) { case BUF_CMD_READ: case BUF_CMD_WRITE: case BUF_CMD_FREEBLKS: bypass = 0; break; case BUF_CMD_FLUSH: bypass = 1; KKASSERT(buf_len == 0); break; default: bp->b_error = EIO; bp->b_resid = bp->b_bcount; biodone(bio); return 0; } if (bypass == 0 && bounds_check_with_mediasize(bio, DEV_BSIZE, dm_table_size(&dmv->table_head)) <= 0) { bp->b_resid = bp->b_bcount; biodone(bio); return 0; } /* Select active table */ tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); nestiobuf_init(bio); devstat_start_transaction(&dmv->stats); /* * Find out what tables I want to select. */ SLIST_FOREACH(table_en, tbl, next) { /* * I need need number of bytes not blocks. */ table_start = table_en->start * DEV_BSIZE; table_end = table_start + (table_en->length) * DEV_BSIZE; /* * Calculate the start and end */ start = MAX(table_start, buf_start); end = MIN(table_end, buf_start + buf_len); aprint_debug("----------------------------------------\n"); aprint_debug("table_start %010" PRIu64", table_end %010" PRIu64 "\n", table_start, table_end); aprint_debug("buf_start %010" PRIu64", buf_len %010" PRIu64"\n", buf_start, buf_len); aprint_debug("start-buf_start %010"PRIu64", end %010" PRIu64"\n", start - buf_start, end); aprint_debug("start %010" PRIu64" , end %010" PRIu64"\n", start, end); aprint_debug("\n----------------------------------------\n"); if (bypass) { nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats); nestbuf->b_bio1.bio_offset = 0; table_en->target->strategy(table_en, nestbuf); } else if (start < end) { nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, start - buf_start, (end - start), &dmv->stats); issued_len += end - start; nestbuf->b_bio1.bio_offset = (start - table_start); table_en->target->strategy(table_en, nestbuf); } }
/* * Do an I/O operation to/from a cache block. */ int nwfs_doio(struct vnode *vp, struct bio *bio, struct ucred *cr, struct thread *td) { struct buf *bp = bio->bio_buf; struct uio *uiop; struct nwnode *np; struct nwmount *nmp; int error = 0; struct uio uio; struct iovec io; np = VTONW(vp); nmp = VFSTONWFS(vp->v_mount); uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = td; if (bp->b_cmd == BUF_CMD_READ) { io.iov_len = uiop->uio_resid = (size_t)bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: uiop->uio_offset = bio->bio_offset; error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, uiop, cr); if (error) break; if (uiop->uio_resid) { size_t left = uiop->uio_resid; size_t nread = bp->b_bcount - left; if (left > 0) bzero((char *)bp->b_data + nread, left); } break; /* case VDIR: nfsstats.readdir_bios++; uiop->uio_offset = bio->bio_offset; if (nmp->nm_flag & NFSMNT_RDIRPLUS) { error = nfs_readdirplusrpc(vp, uiop, cr); if (error == NFSERR_NOTSUPP) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) error = nfs_readdirrpc(vp, uiop, cr); if (error == 0 && uiop->uio_resid == (size_t)bp->b_bcount) bp->b_flags |= B_INVAL; break; */ default: kprintf("nwfs_doio: type %x unexpected\n",vp->v_type); break; } if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } } else { /* write */ KKASSERT(bp->b_cmd == BUF_CMD_WRITE); if (bio->bio_offset + bp->b_dirtyend > np->n_size) bp->b_dirtyend = np->n_size - bio->bio_offset; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = (size_t)(bp->b_dirtyend - bp->b_dirtyoff); uiop->uio_offset = bio->bio_offset + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, uiop, cr); /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, * so we can't set B_ERROR and report the interruption * by setting B_EINTR. For the async case, B_EINTR * is not relevant, so the rpc attempt is essentially * a noop. For the case of a V3 write rpc not being * committed to stable storage, the block is still * dirty and requires either a commit rpc or another * write rpc with iomode == NFSV3WRITE_FILESYNC before * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { crit_enter(); bp->b_flags &= ~(B_INVAL|B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) bdirty(bp); bp->b_flags |= B_EINTR; crit_exit(); } else { if (error) { bp->b_flags |= B_ERROR; bp->b_error /*= np->n_error */= error; /* np->n_flag |= NWRITEERR;*/ } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; biodone(bio); return (0); } } bp->b_resid = (int)uiop->uio_resid; biodone(bio); return (error); }
static int tmpfs_nremove(struct vop_nremove_args *v) { struct vnode *dvp = v->a_dvp; struct namecache *ncp = v->a_nch->ncp; struct vnode *vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; /* * We have to acquire the vp from v->a_nch because we will likely * unresolve the namecache entry, and a vrele/vput is needed to * trigger the tmpfs_inactive/tmpfs_reclaim sequence. * * We have to use vget to clear any inactive state on the vnode, * otherwise the vnode may remain inactive and thus tmpfs_inactive * will not get called when we release it. */ error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp); KKASSERT(error == 0); vn_unlock(vp); if (vp->v_type == VDIR) { error = EISDIR; goto out; } dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); de = tmpfs_dir_lookup(dnode, node, ncp); if (de == NULL) { error = ENOENT; goto out; } /* Files marked as immutable or append-only cannot be deleted. */ if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->tn_flags & APPEND)) { error = EPERM; goto out; } /* Remove the entry from the directory; as it is a file, we do not * have to change the number of hard links of the directory. */ tmpfs_dir_detach(dnode, de); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); if (node->tn_links > 0) { TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(node); } cache_setunresolved(v->a_nch); cache_setvp(v->a_nch, NULL); tmpfs_knote(vp, NOTE_DELETE); /*cache_inval_vp(vp, CINV_DESTROY);*/ tmpfs_knote(dvp, NOTE_WRITE); error = 0; out: vrele(vp); return error; }
/* * Vnode op for VM getpages. * Wish wish .... get rid from multiple IO routines * * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, * int a_reqpage, vm_ooffset_t a_offset) */ int nwfs_getpages(struct vop_getpages_args *ap) { #ifndef NWFS_RWCACHE return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage, ap->a_seqaccess); #else int i, error, npages; size_t nextoff, toff; size_t count; size_t size; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td = curthread; /* XXX */ struct ucred *cred; struct nwmount *nmp; struct nwnode *np; vm_page_t *pages; KKASSERT(td->td_proc); cred = td->td_proc->p_ucred; vp = ap->a_vp; np = VTONW(vp); nmp = VFSTONWFS(vp->v_mount); pages = ap->a_m; count = (size_t)ap->a_count; if (vp->v_object == NULL) { kprintf("nwfs_getpages: called with non-merged cache vnode??\n"); return VM_PAGER_ERROR; } bp = getpbuf_kva(&nwfs_pbuf_freecnt); npages = btoc(count); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred); pmap_qremove(kva, npages); relpbuf(bp, &nwfs_pbuf_freecnt); if (error && (uio.uio_resid == count)) { kprintf("nwfs_getpages: error %d\n",error); for (i = 0; i < npages; i++) { if (ap->a_reqpage != i) vnode_pager_freepage(pages[i]); } return VM_PAGER_ERROR; } size = count - uio.uio_resid; for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; m->flags &= ~PG_ZERO; /* * NOTE: pmap dirty bit should have already been cleared. * We do not clear it here. */ if (nextoff <= size) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { if (m->flags & PG_REFERENCED) vm_page_activate(m); else vm_page_deactivate(m); vm_page_wakeup(m); } else { vnode_pager_freepage(m); } } } return 0; #endif /* NWFS_RWCACHE */ }
static int tmpfs_nlink(struct vop_nlink_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; struct namecache *ncp = v->a_nch->ncp; struct tmpfs_dirent *de; struct tmpfs_node *node; struct tmpfs_node *dnode; int error; KKASSERT(dvp != vp); /* XXX When can this be false? */ node = VP_TO_TMPFS_NODE(vp); dnode = VP_TO_TMPFS_NODE(dvp); /* XXX: Why aren't the following two tests done by the caller? */ /* Hard links of directories are forbidden. */ if (vp->v_type == VDIR) { error = EPERM; goto out; } /* Cannot create cross-device links. */ if (dvp->v_mount != vp->v_mount) { error = EXDEV; goto out; } /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ KKASSERT(node->tn_links <= LINK_MAX); if (node->tn_links == LINK_MAX) { error = EMLINK; goto out; } /* We cannot create links of files marked immutable or append-only. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Allocate a new directory entry to represent the node. */ error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, ncp->nc_name, ncp->nc_nlen, &de); if (error != 0) goto out; /* Insert the new directory entry into the appropriate directory. */ tmpfs_dir_attach(dnode, de); /* vp link count has changed, so update node times. */ TMPFS_NODE_LOCK(node); node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); tmpfs_update(vp); tmpfs_knote(vp, NOTE_LINK); cache_setunresolved(v->a_nch); cache_setvp(v->a_nch, vp); tmpfs_knote(dvp, NOTE_WRITE); error = 0; out: return error; }
/* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. Unlike the other * pru_*() routines, the mbuf chains are our responsibility. We * must either enqueue them or free them. The other pru_* routines * generally are caller-frees. */ static void tcp_usr_send(netmsg_t msg) { struct socket *so = msg->send.base.nm_so; int flags = msg->send.nm_flags; struct mbuf *m = msg->send.nm_m; int error = 0; struct inpcb *inp; struct tcpcb *tp; TCPDEBUG0; KKASSERT(msg->send.nm_control == NULL); KKASSERT(msg->send.nm_addr == NULL); KKASSERT((flags & PRUS_FREEADDR) == 0); inp = so->so_pcb; if (inp == NULL) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a * network interrupt in the non-critical section of sosend(). */ m_freem(m); error = ECONNRESET; /* XXX EPIPE? */ tp = NULL; TCPDEBUG1(); goto out; } tp = intotcpcb(inp); TCPDEBUG1(); #ifdef foo /* * This is no longer necessary, since: * - sosendtcp() has already checked it for us * - It does not work with asynchronized send */ /* * Don't let too much OOB data build up */ if (flags & PRUS_OOB) { if (ssb_space(&so->so_snd) < -512) { m_freem(m); error = ENOBUFS; goto out; } } #endif /* * Pump the data into the socket. */ if (m) { ssb_appendstream(&so->so_snd, m); sowwakeup(so); } if (flags & PRUS_OOB) { /* * According to RFC961 (Assigned Protocols), * the urgent pointer points to the last octet * of urgent data. We continue, however, * to consider it to indicate the first octet * of data past the urgent section. * Otherwise, snd_up should be one lower. */ tp->snd_up = tp->snd_una + so->so_snd.ssb_cc; tp->t_flags |= TF_FORCE; error = tcp_output(tp); tp->t_flags &= ~TF_FORCE; } else { if (flags & PRUS_EOF) { /* * Close the send side of the connection after * the data is sent. */ socantsendmore(so); tp = tcp_usrclosed(tp); } if (tp != NULL && !tcp_output_pending(tp)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; error = tcp_output_fair(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } } COMMON_END1((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), (flags & PRUS_NOREPLY)); }
static int tmpfs_nrename(struct vop_nrename_args *v) { struct vnode *fdvp = v->a_fdvp; struct namecache *fncp = v->a_fnch->ncp; struct vnode *fvp = fncp->nc_vp; struct vnode *tdvp = v->a_tdvp; struct namecache *tncp = v->a_tnch->ncp; struct vnode *tvp; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; char *newname; char *oldname; int error; /* * Because tvp can get overwritten we have to vget it instead of * just vref or use it, otherwise it's VINACTIVE flag may not get * cleared and the node won't get destroyed. */ error = cache_vget(v->a_tnch, v->a_cred, LK_SHARED, &tvp); if (error == 0) { tnode = VP_TO_TMPFS_NODE(tvp); vn_unlock(tvp); } else { tnode = NULL; } /* Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); de = tmpfs_dir_lookup(fdnode, fnode, fncp); /* Avoid manipulating '.' and '..' entries. */ if (de == NULL) { error = ENOENT; goto out_locked; } KKASSERT(de->td_node == fnode); /* * If replacing an entry in the target directory and that entry * is a directory, it must be empty. * * Kern_rename gurantees the destination to be a directory * if the source is one (it does?). */ if (tvp != NULL) { KKASSERT(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size > 0) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { KKASSERT(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* * Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fncp->nc_nlen != tncp->nc_nlen || bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) { newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); if (newname == NULL) { error = ENOSPC; goto out_locked; } bcopy(tncp->nc_name, newname, tncp->nc_nlen); newname[tncp->nc_nlen] = '\0'; } else { newname = NULL; } /* * Unlink entry from source directory. Note that the kernel has * already checked for illegal recursion cases (renaming a directory * into a subdirectory of itself). */ if (fdnode != tdnode) tmpfs_dir_detach(fdnode, de); /* * Handle any name change. Swap with newname, we will * deallocate it at the end. */ if (newname != NULL) { #if 0 TMPFS_NODE_LOCK(fnode); fnode->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(fnode); #endif oldname = de->td_name; de->td_name = newname; de->td_namelen = (uint16_t)tncp->nc_nlen; newname = oldname; } /* * Link entry to target directory. If the entry * represents a directory move the parent linkage * as well. */ if (fdnode != tdnode) { if (de->td_node->tn_type == VDIR) { TMPFS_VALIDATE_DIR(fnode); TMPFS_NODE_LOCK(tdnode); tdnode->tn_links++; tdnode->tn_status |= TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(tdnode); TMPFS_NODE_LOCK(fnode); fnode->tn_dir.tn_parent = tdnode; fnode->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(fnode); TMPFS_NODE_LOCK(fdnode); fdnode->tn_links--; fdnode->tn_status |= TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(fdnode); } tmpfs_dir_attach(tdnode, de); } else { TMPFS_NODE_LOCK(tdnode); tdnode->tn_status |= TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(tdnode); } /* * If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { /* Remove the old entry from the target directory. */ de = tmpfs_dir_lookup(tdnode, tnode, tncp); tmpfs_dir_detach(tdnode, de); tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE); /* * Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de); /*cache_inval_vp(tvp, CINV_DESTROY);*/ } /* * Finish up */ if (newname) { kfree(newname, tmp->tm_name_zone); newname = NULL; } cache_rename(v->a_fnch, v->a_tnch); tmpfs_knote(v->a_fdvp, NOTE_WRITE); tmpfs_knote(v->a_tdvp, NOTE_WRITE); if (fnode->tn_vnode) tmpfs_knote(fnode->tn_vnode, NOTE_RENAME); error = 0; out_locked: ; out: if (tvp) vrele(tvp); return error; }
/* * Unlock a lock. The caller must hold the lock either shared or exclusive. * * On the last release we handle any pending chains. */ void _mtx_unlock(mtx_t *mtx) { thread_t td __debugvar = curthread; u_int lock; u_int nlock; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); switch(lock) { case MTX_EXCLUSIVE | 1: /* * Last release, exclusive lock. * No exclusive or shared requests pending. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; nlock = 0; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; case MTX_EXCLUSIVE | MTX_EXWANTED | 1: case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: /* * Last release, exclusive lock. * Exclusive requests pending. * Exclusive requests have priority over shared reqs. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; if (mtx_chain_link_ex(mtx, lock)) goto done; break; case MTX_EXCLUSIVE | MTX_SHWANTED | 1: /* * Last release, exclusive lock. * * Shared requests are pending. Transfer our count (1) * to the first shared request, wakeup all shared reqs. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; if (mtx_chain_link_sh(mtx, lock)) goto done; break; case 1: /* * Last release, shared lock. * No exclusive or shared requests pending. */ nlock = 0; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; case MTX_EXWANTED | 1: case MTX_EXWANTED | MTX_SHWANTED | 1: /* * Last release, shared lock. * * Exclusive requests are pending. Upgrade this * final shared lock to exclusive and transfer our * count (1) to the next exclusive request. * * Exclusive requests have priority over shared reqs. */ if (mtx_chain_link_ex(mtx, lock)) goto done; break; case MTX_SHWANTED | 1: /* * Last release, shared lock. * Shared requests pending. */ if (mtx_chain_link_sh(mtx, lock)) goto done; break; default: /* * We have to loop if this is the last release but * someone is fiddling with LINKSPIN. */ if ((lock & MTX_MASK) == 1) { KKASSERT(lock & MTX_LINKSPIN); break; } /* * Not the last release (shared or exclusive) */ nlock = lock - 1; KKASSERT((nlock & MTX_MASK) != MTX_MASK); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; } /* loop try again */ cpu_pause(); } done: ; }
int hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_volume *ioc) { struct hammer_mount *hmp = trans->hmp; struct mount *mp = hmp->mp; struct hammer_volume_ondisk ondisk; struct bigblock_stat stat; hammer_volume_t volume; int free_vol_no = 0; int error; if (mp->mnt_flag & MNT_RDONLY) { hmkprintf(hmp, "Cannot add volume to read-only HAMMER filesystem\n"); return (EINVAL); } if (hmp->nvolumes >= HAMMER_MAX_VOLUMES) { hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n"); return (EINVAL); } if (hammer_lock_ex_try(&hmp->volume_lock) != 0) { hmkprintf(hmp, "Another volume operation is in progress!\n"); return (EAGAIN); } /* * Find an unused volume number. */ while (free_vol_no < HAMMER_MAX_VOLUMES && HAMMER_VOLUME_NUMBER_IS_SET(hmp, free_vol_no)) { ++free_vol_no; } if (free_vol_no >= HAMMER_MAX_VOLUMES) { hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n"); error = EINVAL; goto end; } error = hammer_format_volume_header( hmp, &ondisk, hmp->rootvol->ondisk->vol_name, free_vol_no, hmp->nvolumes+1, ioc->vol_size, ioc->boot_area_size, ioc->mem_area_size); if (error) goto end; error = hammer_install_volume(hmp, ioc->device_name, NULL, &ondisk); if (error) goto end; hammer_sync_lock_sh(trans); hammer_lock_ex(&hmp->blkmap_lock); volume = hammer_get_volume(hmp, free_vol_no, &error); KKASSERT(volume != NULL && error == 0); error = hammer_format_freemap(trans, volume, &stat); KKASSERT(error == 0); hammer_rel_volume(volume, 0); ++hmp->nvolumes; error = hammer_update_volumes_header(trans, &stat); KKASSERT(error == 0); hammer_unlock(&hmp->blkmap_lock); hammer_sync_unlock(trans); KKASSERT(error == 0); end: hammer_unlock(&hmp->volume_lock); if (error) hmkprintf(hmp, "An error occurred: %d\n", error); return (error); }
/* * Exclusive-lock a mutex, block until acquired unless link is async. * Recursion is allowed. * * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS * if async. If immediately successful an async exclusive lock will return 0 * and not issue the async callback or link the link structure. The caller * must handle this case (typically this is an optimal code path). * * A tsleep() error can only be returned if PCATCH is specified in the flags. */ static __inline int __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) { thread_t td; u_int lock; u_int nlock; int error; int isasync; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); if (lock == 0) { nlock = MTX_EXCLUSIVE | 1; if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { mtx->mtx_owner = curthread; cpu_sfence(); link->state = MTX_LINK_ACQUIRED; error = 0; break; } continue; } if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { KKASSERT((lock & MTX_MASK) != MTX_MASK); nlock = lock + 1; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { cpu_sfence(); link->state = MTX_LINK_ACQUIRED; error = 0; break; } continue; } /* * We need MTX_LINKSPIN to manipulate exlink or * shlink. * * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate * pending exclusive requests. It cannot be set as a separate * operation prior to acquiring MTX_LINKSPIN. * * To avoid unnecessary cpu cache traffic we poll * for collisions. It is also possible that EXWANTED * state failing the above test was spurious, so all the * tests must be repeated if we cannot obtain LINKSPIN * with the prior state tests intact (i.e. don't reload * the (lock) variable here, for heaven's sake!). */ if (lock & MTX_LINKSPIN) { cpu_pause(); continue; } td = curthread; nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { crit_exit_raw(td); continue; } /* * Check for early abort. */ if (link->state == MTX_LINK_ABORTED) { if (mtx->mtx_exlink == NULL) { atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | MTX_EXWANTED); } else { atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); } crit_exit_raw(td); link->state = MTX_LINK_IDLE; error = ENOLCK; break; } /* * Add our link to the exlink list and release LINKSPIN. */ link->owner = td; link->state = MTX_LINK_LINKED_EX; if (mtx->mtx_exlink) { link->next = mtx->mtx_exlink; link->prev = link->next->prev; link->next->prev = link; link->prev->next = link; } else { link->next = link; link->prev = link; mtx->mtx_exlink = link; } isasync = (link->callback != NULL); atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); crit_exit_raw(td); /* * If asynchronous lock request return without * blocking, leave link structure linked. */ if (isasync) { error = EINPROGRESS; break; } /* * Wait for lock */ error = mtx_wait_link(mtx, link, flags, to); break; } return (error); }
/* * mmap_args(void *addr, size_t len, int prot, int flags, int fd, * long pad, off_t pos) * * Memory Map (mmap) system call. Note that the file offset * and address are allowed to be NOT page aligned, though if * the MAP_FIXED flag it set, both must have the same remainder * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not * page-aligned, the actual mapping starts at trunc_page(addr) * and the return value is adjusted up by the page offset. * * Generally speaking, only character devices which are themselves * memory-based, such as a video framebuffer, can be mmap'd. Otherwise * there would be no cache coherency between a descriptor and a VM mapping * both to the same character device. * * Block devices can be mmap'd no matter what they represent. Cache coherency * is maintained as long as you do not write directly to the underlying * character device. * * No requirements; sys_mmap path holds the vm_token */ int kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, int uprot, int uflags, int fd, off_t upos, void **res) { struct thread *td = curthread; struct proc *p = td->td_proc; struct file *fp = NULL; struct vnode *vp; vm_offset_t addr; vm_offset_t tmpaddr; vm_size_t size, pageoff; vm_prot_t prot, maxprot; void *handle; int flags, error; off_t pos; vm_object_t obj; KKASSERT(p); addr = (vm_offset_t) uaddr; size = ulen; prot = uprot & VM_PROT_ALL; flags = uflags; pos = upos; /* * Make sure mapping fits into numeric range etc. * * NOTE: We support the full unsigned range for size now. */ if (((flags & MAP_ANON) && (fd != -1 || pos != 0))) return (EINVAL); if (flags & MAP_STACK) { if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE))) return (EINVAL); flags |= MAP_ANON; pos = 0; } /* * Virtual page tables cannot be used with MAP_STACK. Apart from * it not making any sense, the aux union is used by both * types. * * Because the virtual page table is stored in the backing object * and might be updated by the kernel, the mapping must be R+W. */ if (flags & MAP_VPAGETABLE) { if (vkernel_enable == 0) return (EOPNOTSUPP); if (flags & MAP_STACK) return (EINVAL); if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE)) return (EINVAL); } /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (pos & PAGE_MASK); pos -= pageoff; /* Adjust size for rounding (on both ends). */ size += pageoff; /* low end... */ size = (vm_size_t) round_page(size); /* hi end */ if (size < ulen) /* wrap */ return(EINVAL); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & (MAP_FIXED | MAP_TRYFIXED)) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ addr -= pageoff; if (addr & PAGE_MASK) return (EINVAL); /* * Address range must be all in user VM space and not wrap. */ tmpaddr = addr + size; if (tmpaddr < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) return (EINVAL); if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) return (EINVAL); } else { /* * Get a hint of where to map. It also provides mmap offset * randomization if enabled. */ addr = vm_map_hint(p, addr, prot); } if (flags & MAP_ANON) { /* * Mapping blank space is trivial. */ handle = NULL; maxprot = VM_PROT_ALL; } else { /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ fp = holdfp(p->p_fd, fd, -1); if (fp == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) { error = EINVAL; goto done; } /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support * read(2)/write(2) -- or even open(2). Thus, we can * use MAP_ASYNC to trade on-disk coherence for speed. * The shm_open(3) library routine turns on the FPOSIXSHM * flag to request this behavior. */ if (fp->f_flag & FPOSIXSHM) flags |= MAP_NOSYNC; vp = (struct vnode *) fp->f_data; /* * Validate the vnode for the operation. */ switch(vp->v_type) { case VREG: /* * Get the proper underlying object */ if ((obj = vp->v_object) == NULL) { error = EINVAL; goto done; } KKASSERT((struct vnode *)obj->handle == vp); break; case VCHR: /* * Make sure a device has not been revoked. * Mappability is handled by the device layer. */ if (vp->v_rdev == NULL) { error = EBADF; goto done; } break; default: /* * Nothing else is mappable. */ error = EINVAL; goto done; } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { handle = NULL; maxprot = VM_PROT_ALL; flags |= MAP_ANON; pos = 0; } else { /* * cdevs does not provide private mappings of any kind. */ if (vp->v_type == VCHR && (flags & (MAP_PRIVATE|MAP_COPY))) { error = EINVAL; goto done; } /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_flag & FREAD) { maxprot |= VM_PROT_READ; } else if (prot & PROT_READ) { error = EACCES; goto done; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. Check for superuser, only if * we're at securelevel < 1, to allow the XIG X server * to continue to work. */ if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) { if ((fp->f_flag & FWRITE) != 0) { struct vattr va; if ((error = VOP_GETATTR(vp, &va))) { goto done; } if ((va.va_flags & (IMMUTABLE|APPEND)) == 0) { maxprot |= VM_PROT_WRITE; } else if (prot & PROT_WRITE) { error = EPERM; goto done; } } else if ((prot & PROT_WRITE) != 0) { error = EACCES; goto done; } } else { maxprot |= VM_PROT_WRITE; } handle = (void *)vp; } } /* Token serializes access to vm_map.nentries against vm_mmap */ lwkt_gettoken(&vm_token); /* * Do not allow more then a certain number of vm_map_entry structures * per process. Scale with the number of rforks sharing the map * to make the limit reasonable for threads. */ if (max_proc_mmap && vms->vm_map.nentries >= max_proc_mmap * vms->vm_sysref.refcnt) { error = ENOMEM; lwkt_reltoken(&vm_token); goto done; } error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, flags, handle, pos); if (error == 0) *res = (void *)(addr + pageoff); lwkt_reltoken(&vm_token); done: if (fp) fdrop(fp); return (error); }
static int dm_target_stripe_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset) { dm_target_stripe_config_t *tsc; uint64_t blkno, blkoff; uint64_t stripe, blknr; uint32_t stripe_off, stripe_rest, num_blks, issue_blks; uint64_t off2, len2; int devnr; tsc = table_en->target_config; if (tsc == NULL) return 0; /* calculate extent of request */ KKASSERT(length % DEV_BSIZE == 0); blkno = offset / DEV_BSIZE; blkoff = 0; num_blks = length / DEV_BSIZE; /* * 0 length means flush buffers and return */ if (length == 0) { for (devnr = 0; devnr < tsc->stripe_num; ++devnr) { if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL) return ENXIO; dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev, data, 0, offset, 0); } return 0; } while (num_blks > 0) { /* blockno to strip piece nr */ stripe = blkno / tsc->stripe_chunksize; stripe_off = blkno % tsc->stripe_chunksize; /* where we are inside the strip */ devnr = stripe % tsc->stripe_num; blknr = stripe / tsc->stripe_num; /* how much is left before we hit a boundary */ stripe_rest = tsc->stripe_chunksize - stripe_off; /* issue this piece on stripe `stripe' */ issue_blks = MIN(stripe_rest, num_blks); #if 0 nestiobuf_add(bio, nestbuf, blkoff, issue_blks * DEV_BSIZE); #endif len2 = issue_blks * DEV_BSIZE; /* I need number of bytes. */ off2 = blknr * tsc->stripe_chunksize + stripe_off; off2 += tsc->stripe_devs[devnr].offset; off2 *= DEV_BSIZE; off2 = dm_pdev_correct_dump_offset(tsc->stripe_devs[devnr].pdev, off2); if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL) return ENXIO; dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev, (char *)data + blkoff, 0, off2, len2); blkno += issue_blks; blkoff += issue_blks * DEV_BSIZE; num_blks -= issue_blks; } return 0; }
static int cbq_add_queue_locked(struct pf_altq *a, cbq_state_t *cbqp) { struct rm_class *borrow, *parent; struct rm_class *cl; struct cbq_opts *opts; int i; KKASSERT(a->qid != 0); /* * find a free slot in the class table. if the slot matching * the lower bits of qid is free, use this slot. otherwise, * use the first free slot. */ i = a->qid % CBQ_MAX_CLASSES; if (cbqp->cbq_class_tbl[i] != NULL) { for (i = 0; i < CBQ_MAX_CLASSES; i++) if (cbqp->cbq_class_tbl[i] == NULL) break; if (i == CBQ_MAX_CLASSES) return (EINVAL); } opts = &a->pq_u.cbq_opts; /* check parameters */ if (a->priority >= CBQ_MAXPRI) return (EINVAL); /* Get pointers to parent and borrow classes. */ parent = clh_to_clp(cbqp, a->parent_qid); if (opts->flags & CBQCLF_BORROW) borrow = parent; else borrow = NULL; /* * A class must borrow from it's parent or it can not * borrow at all. Hence, borrow can be null. */ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) { kprintf("cbq_add_queue: no parent class!\n"); return (EINVAL); } if ((borrow != parent) && (borrow != NULL)) { kprintf("cbq_add_class: borrow class != parent\n"); return (EINVAL); } /* * check parameters */ switch (opts->flags & CBQCLF_CLASSMASK) { case CBQCLF_ROOTCLASS: if (parent != NULL) return (EINVAL); if (cbqp->ifnp.root_) return (EINVAL); break; case CBQCLF_DEFCLASS: if (cbqp->ifnp.default_) return (EINVAL); break; case 0: if (a->qid == 0) return (EINVAL); break; default: /* more than two flags bits set */ return (EINVAL); } /* * create a class. if this is a root class, initialize the * interface. */ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) { rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte, cbqrestart, a->qlimit, RM_MAXQUEUED, opts->maxidle, opts->minidle, opts->offtime, opts->flags); cl = cbqp->ifnp.root_; } else { cl = rmc_newclass(a->priority, &cbqp->ifnp, opts->ns_per_byte, rmc_delay_action, a->qlimit, parent, borrow, opts->maxidle, opts->minidle, opts->offtime, opts->pktsize, opts->flags); } if (cl == NULL) return (ENOMEM); /* return handle to user space. */ cl->stats_.handle = a->qid; cl->stats_.depth = cl->depth_; /* save the allocated class */ cbqp->cbq_class_tbl[i] = cl; if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS) cbqp->ifnp.default_ = cl; return (0); }
/* * Allocates a cluster and its underlying chain structures. The underlying * chains will be locked. The cluster and underlying chains will have one * ref. */ hammer2_cluster_t * hammer2_cluster_alloc(hammer2_pfsmount_t *pmp, hammer2_trans_t *trans, hammer2_blockref_t *bref) { hammer2_cluster_t *cluster; hammer2_cluster_t *rcluster; hammer2_chain_t *chain; u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); int i; KKASSERT(pmp != NULL); /* * Construct the appropriate system structure. */ switch(bref->type) { case HAMMER2_BREF_TYPE_INODE: case HAMMER2_BREF_TYPE_INDIRECT: case HAMMER2_BREF_TYPE_FREEMAP_NODE: case HAMMER2_BREF_TYPE_DATA: case HAMMER2_BREF_TYPE_FREEMAP_LEAF: /* * Chain's are really only associated with the hmp but we * maintain a pmp association for per-mount memory tracking * purposes. The pmp can be NULL. */ break; case HAMMER2_BREF_TYPE_VOLUME: case HAMMER2_BREF_TYPE_FREEMAP: chain = NULL; panic("hammer2_cluster_alloc volume type illegal for op"); default: chain = NULL; panic("hammer2_cluster_alloc: unrecognized blockref type: %d", bref->type); } cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); cluster->refs = 1; rcluster = &pmp->iroot->cluster; for (i = 0; i < rcluster->nchains; ++i) { chain = hammer2_chain_alloc(rcluster->array[i]->hmp, pmp, trans, bref); chain->hmp = rcluster->array[i]->hmp; chain->bref = *bref; chain->bytes = bytes; chain->refs = 1; chain->flags = HAMMER2_CHAIN_ALLOCATED; chain->delete_xid = HAMMER2_XID_MAX; /* * Set modify_tid if a transaction is creating the inode. * Enforce update_xlo = 0 so nearby transactions do not think * it has been flushed when it hasn't. * * NOTE: When loading a chain from backing store or creating a * snapshot, trans will be NULL and the caller is * responsible for setting these fields. */ if (trans) { chain->modify_xid = trans->sync_xid; chain->update_xlo = 0; } cluster->array[i] = chain; } cluster->nchains = i; cluster->pmp = pmp; cluster->focus = cluster->array[0]; return (cluster); }
static int le_pci_attach(device_t dev) { struct le_pci_softc *lesc; struct lance_softc *sc; int error, i; lesc = device_get_softc(dev); sc = &lesc->sc_am79900.lsc; pci_enable_busmaster(dev); pci_enable_io(dev, PCIM_CMD_PORTEN); lesc->sc_rrid = PCIR_BAR(0); lesc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &lesc->sc_rrid, RF_ACTIVE); if (lesc->sc_rres == NULL) { device_printf(dev, "cannot allocate registers\n"); error = ENXIO; goto fail_mtx; } lesc->sc_regt = rman_get_bustag(lesc->sc_rres); lesc->sc_regh = rman_get_bushandle(lesc->sc_rres); lesc->sc_irid = 0; if ((lesc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &lesc->sc_irid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "cannot allocate interrupt\n"); error = ENXIO; goto fail_rres; } error = bus_dma_tag_create( NULL, /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ BUS_DMA_WAITOK, /* flags */ &lesc->sc_pdmat); if (error != 0) { device_printf(dev, "cannot allocate parent DMA tag\n"); goto fail_ires; } sc->sc_memsize = PCNET_MEMSIZE; /* * For Am79C970A, Am79C971 and Am79C978 the init block must be 2-byte * aligned and the ring descriptors must be 16-byte aligned when using * a 32-bit software style. */ error = bus_dma_tag_create( lesc->sc_pdmat, /* parent */ 16, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->sc_memsize, /* maxsize */ 1, /* nsegments */ sc->sc_memsize, /* maxsegsize */ BUS_DMA_WAITOK, /* flags */ &lesc->sc_dmat); if (error != 0) { device_printf(dev, "cannot allocate buffer DMA tag\n"); goto fail_pdtag; } error = bus_dmamem_alloc(lesc->sc_dmat, (void **)&sc->sc_mem, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &lesc->sc_dmam); if (error != 0) { device_printf(dev, "cannot allocate DMA buffer memory\n"); goto fail_dtag; } sc->sc_addr = 0; error = bus_dmamap_load(lesc->sc_dmat, lesc->sc_dmam, sc->sc_mem, sc->sc_memsize, le_pci_dma_callback, sc, 0); if (error != 0 || sc->sc_addr == 0) { device_printf(dev, "cannot load DMA buffer map\n"); goto fail_dmem; } sc->sc_flags = LE_BSWAP; sc->sc_conf3 = 0; sc->sc_mediastatus = NULL; switch (pci_get_device(dev)) { case AMD_PCNET_HOME: sc->sc_mediachange = le_pci_mediachange; sc->sc_supmedia = le_home_supmedia; sc->sc_nsupmedia = sizeof(le_home_supmedia) / sizeof(int); sc->sc_defaultmedia = le_home_supmedia[0]; break; default: sc->sc_mediachange = le_pci_mediachange; sc->sc_supmedia = le_pci_supmedia; sc->sc_nsupmedia = sizeof(le_pci_supmedia) / sizeof(int); sc->sc_defaultmedia = le_pci_supmedia[0]; } /* * Extract the physical MAC address from the ROM. */ for (i = 0; i < sizeof(sc->sc_enaddr); i++) sc->sc_enaddr[i] = bus_space_read_1(lesc->sc_regt, lesc->sc_regh, i); sc->sc_copytodesc = lance_copytobuf_contig; sc->sc_copyfromdesc = lance_copyfrombuf_contig; sc->sc_copytobuf = lance_copytobuf_contig; sc->sc_copyfrombuf = lance_copyfrombuf_contig; sc->sc_zerobuf = lance_zerobuf_contig; sc->sc_rdcsr = le_pci_rdcsr; sc->sc_wrcsr = le_pci_wrcsr; sc->sc_hwreset = le_pci_hwreset; sc->sc_hwinit = NULL; sc->sc_hwintr = NULL; sc->sc_nocarrier = NULL; error = am79900_config(&lesc->sc_am79900, device_get_name(dev), device_get_unit(dev)); if (error != 0) { device_printf(dev, "cannot attach Am79900\n"); goto fail_dmap; } error = bus_setup_intr(dev, lesc->sc_ires, INTR_MPSAFE, am79900_intr, sc, &lesc->sc_ih, sc->ifp->if_serializer); if (error != 0) { device_printf(dev, "cannot set up interrupt\n"); goto fail_am79900; } sc->ifp->if_cpuid = rman_get_cpuid(lesc->sc_ires); KKASSERT(sc->ifp->if_cpuid >= 0 && sc->ifp->if_cpuid < ncpus); return (0); fail_am79900: am79900_detach(&lesc->sc_am79900); fail_dmap: bus_dmamap_unload(lesc->sc_dmat, lesc->sc_dmam); fail_dmem: bus_dmamem_free(lesc->sc_dmat, sc->sc_mem, lesc->sc_dmam); fail_dtag: bus_dma_tag_destroy(lesc->sc_dmat); fail_pdtag: bus_dma_tag_destroy(lesc->sc_pdmat); fail_ires: bus_release_resource(dev, SYS_RES_IRQ, lesc->sc_irid, lesc->sc_ires); fail_rres: bus_release_resource(dev, SYS_RES_IOPORT, lesc->sc_rrid, lesc->sc_rres); fail_mtx: return (error); }
/* * vop_compat_nmknod { struct nchandle *a_nch, XXX STOPGAP FUNCTION * struct vnode *a_dvp, * struct vnode **a_vpp, * struct ucred *a_cred, * struct vattr *a_vap } * * Create a device or fifo node as specified by a_vap. Compatibility requires * us to issue the appropriate VOP_OLD_LOOKUP before we issue VOP_OLD_MKNOD * in order to setup the directory inode's i_offset and i_count (e.g. in UFS). */ int vop_compat_nmknod(struct vop_nmknod_args *ap) { struct thread *td = curthread; struct componentname cnp; struct nchandle *nch; struct namecache *ncp; struct vnode *dvp; int error; /* * Sanity checks, get a locked directory vnode. */ nch = ap->a_nch; /* locked namecache node */ ncp = nch->ncp; dvp = ap->a_dvp; if ((error = vget(dvp, LK_EXCLUSIVE)) != 0) { kprintf("[diagnostic] vop_compat_resolve: EAGAIN on ncp %p %s\n", ncp, ncp->nc_name); return(EAGAIN); } /* * Setup the cnp for a traditional vop_old_lookup() call. The lookup * caches all information required to create the entry in the * directory inode. We expect a return code of EJUSTRETURN for * the CREATE case. The cnp must simulated a saved-name situation. */ bzero(&cnp, sizeof(cnp)); cnp.cn_nameiop = NAMEI_CREATE; cnp.cn_flags = CNP_LOCKPARENT; cnp.cn_nameptr = ncp->nc_name; cnp.cn_namelen = ncp->nc_nlen; cnp.cn_cred = ap->a_cred; cnp.cn_td = td; *ap->a_vpp = NULL; error = vop_old_lookup(ap->a_head.a_ops, dvp, ap->a_vpp, &cnp); /* * EJUSTRETURN should be returned for this case, which means that * the VFS has setup the directory inode for the create. The dvp we * passed in is expected to remain in a locked state. * * If the VOP_OLD_MKNOD is successful we are responsible for updating * the cache state of the locked ncp that was passed to us. */ if (error == EJUSTRETURN) { KKASSERT((cnp.cn_flags & CNP_PDIRUNLOCK) == 0); error = VOP_OLD_MKNOD(dvp, ap->a_vpp, &cnp, ap->a_vap); if (error == 0) { cache_setunresolved(nch); cache_setvp(nch, *ap->a_vpp); } } else { if (error == 0) { vput(*ap->a_vpp); *ap->a_vpp = NULL; error = EEXIST; } KKASSERT(*ap->a_vpp == NULL); } if ((cnp.cn_flags & CNP_PDIRUNLOCK) == 0) vn_unlock(dvp); vrele(dvp); return (error); }
/* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { struct sysinit *sip; /* system initialization*/ struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); #if defined(__amd64__) && defined(_KERNEL_VIRTUAL) /* * XXX For whatever reason, on 64-bit vkernels * the value of sysinit obtained from the * linker set is wrong. */ if ((long)sysinit % 8 != 0) { kprintf("Fixing sysinit value...\n"); sysinit = (void *)((long)(intptr_t)sysinit + 4); } #endif sysinit_end = SET_LIMIT(sysinit_set); } #if defined(__amd64__) && defined(_KERNEL_VIRTUAL) KKASSERT((long)sysinit % 8 == 0); #endif restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. * * The last item on the list is expected to be the scheduler, * which will not return. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { sip = *sipp; if (sip->subsystem == SI_SPECIAL_DUMMY) continue; /* skip dummy task(s)*/ if (sip->subsystem == SI_SPECIAL_DONE) continue; /* Call function */ (*(sip->func))(sip->udata); /* Check off the one we're just done */ sip->subsystem = SI_SPECIAL_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) kfree(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } panic("Shouldn't get here!"); /* NOTREACHED*/ }