Example #1
0
static int
txp_attach(device_t dev)
{
	struct txp_softc *sc;
	struct ifnet *ifp;
	uint16_t p1;
	uint32_t p2;
	uint8_t enaddr[ETHER_ADDR_LEN];
	int error = 0, rid;

	sc = device_get_softc(dev);
	callout_init(&sc->txp_stat_timer);

	ifp = &sc->sc_arpcom.ac_if;
	if_initname(ifp, device_get_name(dev), device_get_unit(dev));

	pci_enable_busmaster(dev);

	rid = TXP_RID;
	sc->sc_res = bus_alloc_resource_any(dev, TXP_RES, &rid, RF_ACTIVE);

	if (sc->sc_res == NULL) {
		device_printf(dev, "couldn't map ports/memory\n");
		return(ENXIO);
	}

	sc->sc_bt = rman_get_bustag(sc->sc_res);
	sc->sc_bh = rman_get_bushandle(sc->sc_res);

	/* Allocate interrupt */
	rid = 0;
	sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
	    RF_SHAREABLE | RF_ACTIVE);

	if (sc->sc_irq == NULL) {
		device_printf(dev, "couldn't map interrupt\n");
		error = ENXIO;
		goto fail;
	}

	if (txp_chip_init(sc)) {
		error = ENXIO;
		goto fail;
	}

	sc->sc_fwbuf = contigmalloc(32768, M_DEVBUF,
	    M_WAITOK, 0, 0xffffffff, PAGE_SIZE, 0);
	error = txp_download_fw(sc);
	contigfree(sc->sc_fwbuf, 32768, M_DEVBUF);
	sc->sc_fwbuf = NULL;

	if (error)
		goto fail;

	sc->sc_ldata = contigmalloc(sizeof(struct txp_ldata), M_DEVBUF,
	    M_WAITOK | M_ZERO, 0, 0xffffffff, PAGE_SIZE, 0);

	if (txp_alloc_rings(sc)) {
		error = ENXIO;
		goto fail;
	}

	if (txp_command(sc, TXP_CMD_MAX_PKT_SIZE_WRITE, TXP_MAX_PKTLEN, 0, 0,
	    NULL, NULL, NULL, 1)) {
		error = ENXIO;
		goto fail;
	}

	if (txp_command(sc, TXP_CMD_STATION_ADDRESS_READ, 0, 0, 0,
	    &p1, &p2, NULL, 1)) {
		error = ENXIO;
		goto fail;
	}

	txp_set_filter(sc);

	enaddr[0] = ((uint8_t *)&p1)[1];
	enaddr[1] = ((uint8_t *)&p1)[0];
	enaddr[2] = ((uint8_t *)&p2)[3];
	enaddr[3] = ((uint8_t *)&p2)[2];
	enaddr[4] = ((uint8_t *)&p2)[1];
	enaddr[5] = ((uint8_t *)&p2)[0];

	ifmedia_init(&sc->sc_ifmedia, 0, txp_ifmedia_upd, txp_ifmedia_sts);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T|IFM_HDX, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX|IFM_HDX, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
	ifmedia_add(&sc->sc_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);

	sc->sc_xcvr = TXP_XCVR_AUTO;
	txp_command(sc, TXP_CMD_XCVR_SELECT, TXP_XCVR_AUTO, 0, 0,
	    NULL, NULL, NULL, 0);
	ifmedia_set(&sc->sc_ifmedia, IFM_ETHER|IFM_AUTO);

	ifp->if_softc = sc;
	ifp->if_mtu = ETHERMTU;
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ifp->if_ioctl = txp_ioctl;
	ifp->if_start = txp_start;
	ifp->if_watchdog = txp_watchdog;
	ifp->if_init = txp_init;
	ifp->if_baudrate = 100000000;
	ifq_set_maxlen(&ifp->if_snd, TX_ENTRIES);
	ifq_set_ready(&ifp->if_snd);
	ifp->if_hwassist = 0;
	txp_capabilities(sc);

	ether_ifattach(ifp, enaddr, NULL);

	error = bus_setup_intr(dev, sc->sc_irq, INTR_MPSAFE,
			       txp_intr, sc, &sc->sc_intrhand, 
			       ifp->if_serializer);
	if (error) {
		device_printf(dev, "couldn't set up irq\n");
		ether_ifdetach(ifp);
		goto fail;
	}

	ifp->if_cpuid = ithread_cpuid(rman_get_start(sc->sc_irq));
	KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus);

	return(0);

fail:
	txp_release_resources(dev);
	return(error);
}
Example #2
0
/*
 * Create a snapshot of the specified {parent, ochain} with the specified
 * label.  The originating hammer2_inode must be exclusively locked for
 * safety.
 *
 * The ioctl code has already synced the filesystem.
 */
int
hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster,
		       hammer2_ioc_pfs_t *pfs)
{
	hammer2_mount_t *hmp;
	hammer2_cluster_t *ncluster;
	const hammer2_inode_data_t *ipdata;
	hammer2_inode_data_t *wipdata;
	hammer2_inode_t *nip;
	size_t name_len;
	hammer2_key_t lhc;
	struct vattr vat;
	uuid_t opfs_clid;
	int error;

	kprintf("snapshot %s\n", pfs->name);

	name_len = strlen(pfs->name);
	lhc = hammer2_dirhash(pfs->name, name_len);

	ipdata = &hammer2_cluster_data(ocluster)->ipdata;
	opfs_clid = ipdata->pfs_clid;
	hmp = ocluster->focus->hmp;

	/*
	 * Create the snapshot directory under the super-root
	 *
	 * Set PFS type, generate a unique filesystem id, and generate
	 * a cluster id.  Use the same clid when snapshotting a PFS root,
	 * which theoretically allows the snapshot to be used as part of
	 * the same cluster (perhaps as a cache).
	 *
	 * Copy the (flushed) blockref array.  Theoretically we could use
	 * chain_duplicate() but it becomes difficult to disentangle
	 * the shared core so for now just brute-force it.
	 */
	VATTR_NULL(&vat);
	vat.va_type = VDIR;
	vat.va_mode = 0755;
	ncluster = NULL;
	nip = hammer2_inode_create(trans, hmp->spmp->iroot, &vat,
				   proc0.p_ucred, pfs->name, name_len,
				   &ncluster, &error);

	if (nip) {
		wipdata = hammer2_cluster_modify_ip(trans, nip, ncluster, 0);
		wipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT;
		kern_uuidgen(&wipdata->pfs_fsid, 1);
		if (ocluster->focus->flags & HAMMER2_CHAIN_PFSROOT)
			wipdata->pfs_clid = opfs_clid;
		else
			kern_uuidgen(&wipdata->pfs_clid, 1);
		hammer2_cluster_set_chainflags(ncluster, HAMMER2_CHAIN_PFSROOT);

		/* XXX hack blockset copy */
		/* XXX doesn't work with real cluster */
		KKASSERT(ocluster->nchains == 1);
		wipdata->u.blockset = ocluster->focus->data->ipdata.u.blockset;
		hammer2_cluster_modsync(ncluster);
		hammer2_inode_unlock_ex(nip, ncluster);
	}
	return (error);
}
Example #3
0
/*
 * Locate first match or overlap under parent, return a new cluster
 */
hammer2_cluster_t *
hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp,
		     hammer2_key_t key_beg, hammer2_key_t key_end,
		     int flags, int *ddflagp)
{
	hammer2_pfsmount_t *pmp;
	hammer2_cluster_t *cluster;
	hammer2_chain_t *chain;
	hammer2_key_t key_accum;
	hammer2_key_t key_next;
	hammer2_key_t bref_key;
	int bref_keybits;
	int null_count;
	int ddflag;
	int i;
	uint8_t bref_type;
	u_int bytes;

	pmp = cparent->pmp;				/* can be NULL */
	key_accum = *key_nextp;
	null_count = 0;
	bref_type = 0;
	bref_key = 0;
	bref_keybits = 0;
	bytes = 0;

	cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
	cluster->pmp = pmp;				/* can be NULL */
	cluster->refs = 1;
	/* cluster->focus = NULL; already null */
	cparent->focus = NULL;
	*ddflagp = 0;

	for (i = 0; i < cparent->nchains; ++i) {
		key_next = *key_nextp;
		if (cparent->array[i] == NULL) {
			++null_count;
			continue;
		}
		chain = hammer2_chain_lookup(&cparent->array[i], &key_next,
					     key_beg, key_end,
					     &cparent->cache_index[i],
					     flags, &ddflag);
		if (cparent->focus == NULL)
			cparent->focus = cparent->array[i];
		cluster->array[i] = chain;
		if (chain == NULL) {
			++null_count;
		} else {
			if (cluster->focus == NULL) {
				bref_type = chain->bref.type;
				bref_key = chain->bref.key;
				bref_keybits = chain->bref.keybits;
				bytes = chain->bytes;
				*ddflagp = ddflag;
				cluster->focus = chain;
			}
			KKASSERT(bref_type == chain->bref.type);
			KKASSERT(bref_key == chain->bref.key);
			KKASSERT(bref_keybits == chain->bref.keybits);
			KKASSERT(bytes == chain->bytes);
			KKASSERT(*ddflagp == ddflag);
		}
		if (key_accum > key_next)
			key_accum = key_next;
	}
	*key_nextp = key_accum;
	cluster->nchains = i;

	if (null_count == i) {
		hammer2_cluster_drop(cluster);
		cluster = NULL;
	}

	return (cluster);
}
Example #4
0
/*
 * Flush waiting shared locks.  The lock's prior state is passed in and must
 * be adjusted atomically only if it matches and LINKSPIN is not set.
 *
 * IMPORTANT! The caller has left one active count on the lock for us to
 *	      consume.  We will apply this to the first link, but must add
 *	      additional counts for any other links.
 */
static int
mtx_chain_link_sh(mtx_t *mtx, u_int olock)
{
	thread_t td = curthread;
	mtx_link_t *link;
	u_int	addcount;
	u_int	nlock;

	olock &= ~MTX_LINKSPIN;
	nlock = olock | MTX_LINKSPIN;
	nlock &= ~MTX_EXCLUSIVE;
	crit_enter_raw(td);
	if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
		/*
		 * It should not be possible for SHWANTED to be set without
		 * any links pending.
		 */
		KKASSERT(mtx->mtx_shlink != NULL);

		/*
		 * We have to process the count for all shared locks before
		 * we process any of the links.  Count the additional shared
		 * locks beyond the first link (which is already accounted
		 * for) and associate the full count with the lock
		 * immediately.
		 */
		addcount = 0;
		for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink;
		     link = link->next) {
			++addcount;
		}
		if (addcount > 0)
			atomic_add_int(&mtx->mtx_lock, addcount);

		/*
		 * We can wakeup all waiting shared locks.
		 */
		while ((link = mtx->mtx_shlink) != NULL) {
			KKASSERT(link->state == MTX_LINK_LINKED_SH);
			if (link->next == link) {
				mtx->mtx_shlink = NULL;
			} else {
				mtx->mtx_shlink = link->next;
				link->next->prev = link->prev;
				link->prev->next = link->next;
			}
			link->next = NULL;
			link->prev = NULL;
			cpu_sfence();
			if (link->callback) {
				link->state = MTX_LINK_CALLEDBACK;
				link->callback(link, link->arg, 0);
			} else {
				cpu_sfence();
				link->state = MTX_LINK_ACQUIRED;
				wakeup(link);
			}
		}
		atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
						 MTX_SHWANTED);
		crit_exit_raw(td);
		return 1;
	}
	/* retry */
	crit_exit_raw(td);

	return 0;
}
/*
 * Strategy routine called from dm_strategy.
 */
static int
dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
{
	dm_target_stripe_config_t *tsc;
	struct bio *bio = &bp->b_bio1;
	struct buf *nestbuf;
	uint64_t blkno, blkoff;
	uint64_t stripe, blknr;
	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
	int devnr;

	tsc = table_en->target_config;
	if (tsc == NULL)
		return 0;

	/* calculate extent of request */
	KKASSERT(bp->b_resid % DEV_BSIZE == 0);

	switch(bp->b_cmd) {
	case BUF_CMD_READ:
	case BUF_CMD_WRITE:
	case BUF_CMD_FREEBLKS:
		/*
		 * Loop through to individual operations
		 */
		blkno = bp->b_bio1.bio_offset / DEV_BSIZE;
		blkoff = 0;
		num_blks = bp->b_resid / DEV_BSIZE;
		nestiobuf_init(bio);

		while (num_blks > 0) {
			/* blockno to strip piece nr */
			stripe = blkno / tsc->stripe_chunksize;
			stripe_off = blkno % tsc->stripe_chunksize;

			/* where we are inside the strip */
			devnr = stripe % tsc->stripe_num;
			blknr = stripe / tsc->stripe_num;

			/* how much is left before we hit a boundary */
			stripe_rest = tsc->stripe_chunksize - stripe_off;

			/* issue this piece on stripe `stripe' */
			issue_blks = MIN(stripe_rest, num_blks);
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, blkoff,
					issue_blks * DEV_BSIZE, NULL);

			/* I need number of bytes. */
			nestbuf->b_bio1.bio_offset =
				blknr * tsc->stripe_chunksize + stripe_off;
			nestbuf->b_bio1.bio_offset +=
				tsc->stripe_devs[devnr].offset;
			nestbuf->b_bio1.bio_offset *= DEV_BSIZE;

			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);

			blkno += issue_blks;
			blkoff += issue_blks * DEV_BSIZE;
			num_blks -= issue_blks;
		}
		nestiobuf_start(bio);
		break;
	case BUF_CMD_FLUSH:
		nestiobuf_init(bio);
		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, 0, 0, NULL);
			nestbuf->b_bio1.bio_offset = 0;
			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);
		}
		nestiobuf_start(bio);
		break;
	default:
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
		biodone(bio);
		break;
	}
	return 0;
}
Example #6
0
/*
 * Vnode op for VM putpages.
 * possible bug: all IO done in sync mode
 * Note that vop_close always invalidate pages before close, so it's
 * not necessary to open vnode.
 *
 * nwfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_sync, int *a_rtvals, vm_ooffset_t a_offset)
 */
int
nwfs_putpages(struct vop_putpages_args *ap)
{
	int error;
	struct thread *td = curthread;	/* XXX */
	struct vnode *vp = ap->a_vp;
	struct ucred *cred;

#ifndef NWFS_RWCACHE
	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */
	VOP_OPEN(vp, FWRITE, cred, NULL);
	error = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
		ap->a_sync, ap->a_rtvals);
	VOP_CLOSE(vp, FWRITE, cred);
	return error;
#else
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, npages, count;
	int *rtvals;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */

/*	VOP_OPEN(vp, FWRITE, cred, NULL);*/
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);

	for (i = 0; i < npages; i++) {
		rtvals[i] = VM_PAGER_AGAIN;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;
	NCPVNDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);

	error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, &uio, cred);
/*	VOP_CLOSE(vp, FWRITE, cred);*/
	NCPVNDEBUG("paged write done: %d\n", error);

	pmap_qremove(kva, npages);
	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(pages[i]);
		}
	}
	return rtvals[0];
#endif /* NWFS_RWCACHE */
}
Example #7
0
/*
 * Remove a directory entry. At this point the file represented by the
 * directory entry to be removed is still full length until noone has it
 * open.  When the file no longer being used msdosfs_inactive() is called
 * and will truncate the file to 0 length.  When the vnode containing the
 * denode is needed for some other purpose by VFS it will call
 * msdosfs_reclaim() which will remove the denode from the denode cache.
 */
int
removede(struct denode *pdep,	/* directory where the entry is removed */
	 struct denode *dep)	/* file to be removed */
{
	int error;
	struct direntry *ep;
	struct buf *bp;
	daddr_t bn;
	int blsize;
	struct msdosfsmount *pmp = pdep->de_pmp;
	u_long offset = pdep->de_fndoffset;

#ifdef MSDOSFS_DEBUG
	kprintf("removede(): filename %s, dep %p, offset %08lx\n",
	    dep->de_Name, dep, offset);
#endif

	KKASSERT(dep->de_refcnt > 0);
	dep->de_refcnt--;
	offset += sizeof(struct direntry);
	do {
		offset -= sizeof(struct direntry);
		error = pcbmap(pdep, de_cluster(pmp, offset),
			       &bn, NULL, &blsize);
		if (error)
			return error;
		error = bread(pmp->pm_devvp, de_bntodoff(pmp, bn), blsize, &bp);
		if (error) {
			brelse(bp);
			return error;
		}
		ep = bptoep(pmp, bp, offset);
		/*
		 * Check whether, if we came here the second time, i.e.
		 * when underflowing into the previous block, the last
		 * entry in this block is a longfilename entry, too.
		 */
		if (ep->deAttributes != ATTR_WIN95
		    && offset != pdep->de_fndoffset) {
			brelse(bp);
			break;
		}
		offset += sizeof(struct direntry);
		while (1) {
			/*
			 * We are a bit agressive here in that we delete any Win95
			 * entries preceding this entry, not just the ones we "own".
			 * Since these presumably aren't valid anyway,
			 * there should be no harm.
			 */
			offset -= sizeof(struct direntry);
			ep--->deName[0] = SLOT_DELETED;
			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95)
			    || !(offset & pmp->pm_crbomask)
			    || ep->deAttributes != ATTR_WIN95)
				break;
		}
		if ((error = bwrite(bp)) != 0)
			return error;
	} while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95)
	    && !(offset & pmp->pm_crbomask)
	    && offset);
	return 0;
}
Example #8
0
static int
tmpfs_nrmdir(struct vop_nrmdir_args *v)
{
	struct vnode *dvp = v->a_dvp;
	struct namecache *ncp = v->a_nch->ncp;
	struct vnode *vp;
	struct tmpfs_dirent *de;
	struct tmpfs_mount *tmp;
	struct tmpfs_node *dnode;
	struct tmpfs_node *node;
	int error;

	/*
	 * We have to acquire the vp from v->a_nch because we will likely
	 * unresolve the namecache entry, and a vrele/vput is needed to
	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
	 *
	 * We have to use vget to clear any inactive state on the vnode,
	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
	 * will not get called when we release it.
	 */
	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
	KKASSERT(error == 0);
	vn_unlock(vp);

	/*
	 * Prevalidate so we don't hit an assertion later
	 */
	if (vp->v_type != VDIR) {
		error = ENOTDIR;
		goto out;
	}

	tmp = VFS_TO_TMPFS(dvp->v_mount);
	dnode = VP_TO_TMPFS_DIR(dvp);
	node = VP_TO_TMPFS_DIR(vp);

	/* Directories with more than two entries ('.' and '..') cannot be
	 * removed. */
	 if (node->tn_size > 0) {
		 error = ENOTEMPTY;
		 goto out;
	 }

	if ((dnode->tn_flags & APPEND)
	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
		error = EPERM;
		goto out;
	}

	/* This invariant holds only if we are not trying to remove "..".
	  * We checked for that above so this is safe now. */
	KKASSERT(node->tn_dir.tn_parent == dnode);

	/* Get the directory entry associated with node (vp).  This was
	 * filled by tmpfs_lookup while looking up the entry. */
	de = tmpfs_dir_lookup(dnode, node, ncp);
	KKASSERT(TMPFS_DIRENT_MATCHES(de,
	    ncp->nc_name,
	    ncp->nc_nlen));

	/* Check flags to see if we are allowed to remove the directory. */
	if ((dnode->tn_flags & APPEND) ||
	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
		error = EPERM;
		goto out;
	}


	/* Detach the directory entry from the directory (dnode). */
	tmpfs_dir_detach(dnode, de);

	/* No vnode should be allocated for this entry from this point */
	TMPFS_NODE_LOCK(node);
	TMPFS_ASSERT_ELOCKED(node);
	TMPFS_NODE_LOCK(dnode);
	TMPFS_ASSERT_ELOCKED(dnode);

#if 0
	/* handled by tmpfs_free_node */
	KKASSERT(node->tn_links > 0);
	node->tn_links--;
	node->tn_dir.tn_parent = NULL;
#endif
	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
	    TMPFS_NODE_MODIFIED;

#if 0
	/* handled by tmpfs_free_node */
	KKASSERT(dnode->tn_links > 0);
	dnode->tn_links--;
#endif
	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;

	TMPFS_NODE_UNLOCK(dnode);
	TMPFS_NODE_UNLOCK(node);

	/* Free the directory entry we just deleted.  Note that the node
	 * referred by it will not be removed until the vnode is really
	 * reclaimed. */
	tmpfs_free_dirent(tmp, de);

	/* Release the deleted vnode (will destroy the node, notify
	 * interested parties and clean it from the cache). */

	TMPFS_NODE_LOCK(dnode);
	dnode->tn_status |= TMPFS_NODE_CHANGED;
	TMPFS_NODE_UNLOCK(dnode);
	tmpfs_update(dvp);

	cache_setunresolved(v->a_nch);
	cache_setvp(v->a_nch, NULL);
	/*cache_inval_vp(vp, CINV_DESTROY);*/
	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
	error = 0;

out:
	vrele(vp);

	return error;
}
Example #9
0
static int
tmpfs_readdir(struct vop_readdir_args *v)
{
	struct vnode *vp = v->a_vp;
	struct uio *uio = v->a_uio;
	int *eofflag = v->a_eofflag;
	off_t **cookies = v->a_cookies;
	int *ncookies = v->a_ncookies;
	struct tmpfs_mount *tmp;
	int error;
	off_t startoff;
	off_t cnt = 0;
	struct tmpfs_node *node;

	/* This operation only makes sense on directory nodes. */
	if (vp->v_type != VDIR)
		return ENOTDIR;

	tmp = VFS_TO_TMPFS(vp->v_mount);
	node = VP_TO_TMPFS_DIR(vp);
	startoff = uio->uio_offset;

	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
		error = tmpfs_dir_getdotdent(node, uio);
		if (error != 0)
			goto outok;
		cnt++;
	}

	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
		if (error != 0)
			goto outok;
		cnt++;
	}

	error = tmpfs_dir_getdents(node, uio, &cnt);

outok:
	KKASSERT(error >= -1);

	if (error == -1)
		error = 0;

	if (eofflag != NULL)
		*eofflag =
		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);

	/* Update NFS-related variables. */
	if (error == 0 && cookies != NULL && ncookies != NULL) {
		off_t i;
		off_t off = startoff;
		struct tmpfs_dirent *de = NULL;

		*ncookies = cnt;
		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);

		for (i = 0; i < cnt; i++) {
			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
			if (off == TMPFS_DIRCOOKIE_DOT) {
				off = TMPFS_DIRCOOKIE_DOTDOT;
			} else {
				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
					de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
				} else if (de != NULL) {
					de = TAILQ_NEXT(de, td_entries);
				} else {
					de = tmpfs_dir_lookupbycookie(node,
					    off);
					KKASSERT(de != NULL);
					de = TAILQ_NEXT(de, td_entries);
				}
				if (de == NULL)
					off = TMPFS_DIRCOOKIE_EOF;
				else
					off = tmpfs_dircookie(de);
			}

			(*cookies)[i] = off;
		}
		KKASSERT(uio->uio_offset == off);
	}

	return error;
}
Example #10
0
/*
 * Remote IPI for callout_reset_bycpu().  The operation is performed only
 * on the 1->0 transition of the counter, otherwise there are callout_stop()s
 * pending after us.
 *
 * The IPI counter and PENDING flags must be set atomically with the
 * 1->0 transition.  The ACTIVE flag was set prior to the ipi being
 * sent and we do not want to race a caller on the original cpu trying
 * to deactivate() the flag concurrent with our installation of the
 * callout.
 */
static void
callout_reset_ipi(void *arg)
{
	struct callout *c = arg;
	globaldata_t gd = mycpu;
	globaldata_t tgd;
	int flags;
	int nflags;

	for (;;) {
		flags = c->c_flags;
		cpu_ccfence();
		KKASSERT((flags & CALLOUT_IPI_MASK) > 0);

		/*
		 * We should already be armed for our cpu, if armed to another
		 * cpu, chain the IPI.  If for some reason we are not armed,
		 * we can arm ourselves.
		 */
		if (flags & CALLOUT_ARMED) {
			if (CALLOUT_FLAGS_TO_CPU(flags) != gd->gd_cpuid) {
				tgd = globaldata_find(
						CALLOUT_FLAGS_TO_CPU(flags));
				lwkt_send_ipiq(tgd, callout_reset_ipi, c);
				return;
			}
			nflags = (flags & ~CALLOUT_EXECUTED);
		} else {
			nflags = (flags & ~(CALLOUT_CPU_MASK |
					    CALLOUT_EXECUTED)) |
				 CALLOUT_ARMED |
				 CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid);
		}

		/*
		 * Decrement the IPI count, retain and clear the WAITING
		 * status, clear EXECUTED.
		 *
		 * NOTE: It is possible for the callout to already have been
		 *	 marked pending due to SMP races.
		 */
		nflags = nflags - 1;
		if ((flags & CALLOUT_IPI_MASK) == 1) {
			nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED);
			nflags |= CALLOUT_PENDING;
		}

		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			/*
			 * Only install the callout on the 1->0 transition
			 * of the IPI count, and only if PENDING was not
			 * already set.  The latter situation should never
			 * occur but we check anyway.
			 */
			if ((flags & (CALLOUT_PENDING|CALLOUT_IPI_MASK)) == 1) {
				softclock_pcpu_t sc;

				sc = &softclock_pcpu_ary[gd->gd_cpuid];
				c->c_time = sc->curticks + c->c_load;
				TAILQ_INSERT_TAIL(
					&sc->callwheel[c->c_time & cwheelmask],
					c, c_links.tqe);
			}
			break;
		}
		/* retry */
		cpu_pause();
	}

	/*
	 * Issue wakeup if requested.
	 */
	if (flags & CALLOUT_WAITING)
		wakeup(c);
}
Example #11
0
/*
 * Stop a running timer and ensure that any running callout completes before
 * returning.  If the timer is running on another cpu this function may block
 * to interlock against the callout.  If the callout is currently executing
 * or blocked in another thread this function may also block to interlock
 * against the callout.
 *
 * The caller must be careful to avoid deadlocks, either by using
 * callout_init_lk() (which uses the lockmgr lock cancelation feature),
 * by using tokens and dealing with breaks in the serialization, or using
 * the lockmgr lock cancelation feature yourself in the callout callback
 * function.
 *
 * callout_stop() returns non-zero if the callout was pending.
 */
static int
_callout_stop(struct callout *c, int issync)
{
	globaldata_t gd = mycpu;
	globaldata_t tgd;
	softclock_pcpu_t sc;
	int flags;
	int nflags;
	int rc;
	int cpuid;

#ifdef INVARIANTS
        if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
		callout_init(c);
		kprintf(
		    "callout_stop(%p) from %p: callout was not initialized\n",
		    c, ((int **)&c)[-1]);
		print_backtrace(-1);
	}
#endif
	crit_enter_gd(gd);

	/*
	 * Fast path operations:
	 *
	 * If ARMED and owned by our cpu, or not ARMED, and other simple
	 * conditions are met, we can just clear ACTIVE and EXECUTED
	 * and we are done.
	 */
	for (;;) {
		flags = c->c_flags;
		cpu_ccfence();

		cpuid = CALLOUT_FLAGS_TO_CPU(flags);

		/*
		 * Can't handle an armed callout in the fast path if it is
		 * not on the current cpu.  We must atomically increment the
		 * IPI count for the IPI we intend to send and break out of
		 * the fast path to enter the slow path.
		 */
		if (flags & CALLOUT_ARMED) {
			if (gd->gd_cpuid != cpuid) {
				nflags = flags + 1;
				if (atomic_cmpset_int(&c->c_flags,
						      flags, nflags)) {
					/* break to slow path */
					break;
				}
				continue;	/* retry */
			}
		} else {
			cpuid = gd->gd_cpuid;
			KKASSERT((flags & CALLOUT_IPI_MASK) == 0);
			KKASSERT((flags & CALLOUT_PENDING) == 0);
		}

		/*
		 * Process pending IPIs and retry (only if not called from
		 * an IPI).
		 */
		if (flags & CALLOUT_IPI_MASK) {
			lwkt_process_ipiq();
			continue;	/* retry */
		}

		/*
		 * Transition to the stopped state, recover the EXECUTED
		 * status.  If pending we cannot clear ARMED until after
		 * we have removed (c) from the callwheel.
		 *
		 * NOTE: The callout might already not be armed but in this
		 *	 case it should also not be pending.
		 */
		nflags = flags & ~(CALLOUT_ACTIVE |
				   CALLOUT_EXECUTED |
				   CALLOUT_WAITING |
				   CALLOUT_PENDING);

		/* NOTE: IPI_MASK already tested */
		if ((flags & CALLOUT_PENDING) == 0)
			nflags &= ~CALLOUT_ARMED;
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			/*
			 * Can only remove from callwheel if currently
			 * pending.
			 */
			if (flags & CALLOUT_PENDING) {
				sc = &softclock_pcpu_ary[gd->gd_cpuid];
				if (sc->next == c)
					sc->next = TAILQ_NEXT(c, c_links.tqe);
				TAILQ_REMOVE(
					&sc->callwheel[c->c_time & cwheelmask],
					c,
					c_links.tqe);
				c->c_func = NULL;

				/*
				 * NOTE: Can't clear ARMED until we have
				 *	 physically removed (c) from the
				 *	 callwheel.
				 *
				 * NOTE: WAITING bit race exists when doing
				 *	 unconditional bit clears.
				 */
				callout_maybe_clear_armed(c);
				if (c->c_flags & CALLOUT_WAITING)
					flags |= CALLOUT_WAITING;
			}

			/*
			 * ARMED has been cleared at this point and (c)
			 * might now be stale.  Only good for wakeup()s.
			 */
			if (flags & CALLOUT_WAITING)
				wakeup(c);

			goto skip_slow;
		}
		/* retry */
	}

	/*
	 * Slow path (and not called via an IPI).
	 *
	 * When ARMED to a different cpu the stop must be processed on that
	 * cpu.  Issue the IPI and wait for completion.  We have already
	 * incremented the IPI count.
	 */
	tgd = globaldata_find(cpuid);
	lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync);

	for (;;) {
		int flags;
		int nflags;

		flags = c->c_flags;
		cpu_ccfence();
		if ((flags & CALLOUT_IPI_MASK) == 0)	/* fast path */
			break;
		nflags = flags | CALLOUT_WAITING;
		tsleep_interlock(c, 0);
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			tsleep(c, PINTERLOCKED, "cstp1", 0);
		}
	}

skip_slow:

	/*
	 * If (issync) we must also wait for any in-progress callbacks to
	 * complete, unless the stop is being executed from the callback
	 * itself.  The EXECUTED flag is set prior to the callback
	 * being made so our existing flags status already has it.
	 *
	 * If auto-lock mode is being used, this is where we cancel any
	 * blocked lock that is potentially preventing the target cpu
	 * from completing the callback.
	 */
	while (issync) {
		intptr_t *runp;
		intptr_t runco;

		sc = &softclock_pcpu_ary[cpuid];
		if (gd->gd_curthread == &sc->thread)	/* stop from cb */
			break;
		runp = &sc->running;
		runco = *runp;
		cpu_ccfence();
		if ((runco & ~(intptr_t)1) != (intptr_t)c)
			break;
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_BEG);
		tsleep_interlock(c, 0);
		if (atomic_cmpset_long(runp, runco, runco | 1))
			tsleep(c, PINTERLOCKED, "cstp3", 0);
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_END);
	}

	crit_exit_gd(gd);
	rc = (flags & CALLOUT_EXECUTED) != 0;

	return rc;
}
Example #12
0
/*
 * This procedure is the main loop of our per-cpu helper thread.  The
 * sc->isrunning flag prevents us from racing hardclock_softtick() and
 * a critical section is sufficient to interlock sc->curticks and protect
 * us from remote IPI's / list removal.
 *
 * The thread starts with the MP lock released and not in a critical
 * section.  The loop itself is MP safe while individual callbacks
 * may or may not be, so we obtain or release the MP lock as appropriate.
 */
static void
softclock_handler(void *arg)
{
	softclock_pcpu_t sc;
	struct callout *c;
	struct callout_tailq *bucket;
	struct callout slotimer;
	int mpsafe = 1;
	int flags;

	/*
	 * Setup pcpu slow clocks which we want to run from the callout
	 * thread.
	 */
	callout_init_mp(&slotimer);
	callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);

	/*
	 * Run the callout thread at the same priority as other kernel
	 * threads so it can be round-robined.
	 */
	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/

	/*
	 * Loop critical section against ipi operations to this cpu.
	 */
	sc = arg;
	crit_enter();
loop:
	while (sc->softticks != (int)(sc->curticks + 1)) {
		bucket = &sc->callwheel[sc->softticks & cwheelmask];

		for (c = TAILQ_FIRST(bucket); c; c = sc->next) {
			if (c->c_time != sc->softticks) {
				sc->next = TAILQ_NEXT(c, c_links.tqe);
				continue;
			}

			flags = c->c_flags;
			if (flags & CALLOUT_MPSAFE) {
				if (mpsafe == 0) {
					mpsafe = 1;
					rel_mplock();
				}
			} else {
				/*
				 * The request might be removed while we 
				 * are waiting to get the MP lock.  If it
				 * was removed sc->next will point to the
				 * next valid request or NULL, loop up.
				 */
				if (mpsafe) {
					mpsafe = 0;
					sc->next = c;
					get_mplock();
					if (c != sc->next)
						continue;
				}
			}

			/*
			 * Queue protection only exists while we hold the
			 * critical section uninterrupted.
			 *
			 * Adjust sc->next when removing (c) from the queue,
			 * note that an IPI on this cpu may make further
			 * adjustments to sc->next.
			 */
			sc->next = TAILQ_NEXT(c, c_links.tqe);
			TAILQ_REMOVE(bucket, c, c_links.tqe);

			KASSERT((c->c_flags & CALLOUT_ARMED) &&
				(c->c_flags & CALLOUT_PENDING) &&
				CALLOUT_FLAGS_TO_CPU(c->c_flags) ==
				mycpu->gd_cpuid,
				("callout %p: bad flags %08x", c, c->c_flags));

			/*
			 * Once CALLOUT_PENDING is cleared, sc->running
			 * protects the callout structure's existance but
			 * only until we call c_func().  A callout_stop()
			 * or callout_reset() issued from within c_func()
			 * will not block.  The callout can also be kfree()d
			 * by c_func().
			 *
			 * We set EXECUTED before calling c_func() so a
			 * callout_stop() issued from within c_func() returns
			 * the correct status.
			 */
			if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) ==
			    (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) {
				void (*c_func)(void *);
				void *c_arg;
				struct lock *c_lk;
				int error;

				/*
				 * NOTE: sc->running must be set prior to
				 *	 CALLOUT_PENDING being cleared to
				 *	 avoid missed CANCELs and *_stop()
				 *	 races.
				 */
				sc->running = (intptr_t)c;
				c_func = c->c_func;
				c_arg = c->c_arg;
				c_lk = c->c_lk;
				c->c_func = NULL;
				KKASSERT(c->c_flags & CALLOUT_DID_INIT);
				flags = callout_unpend_disarm(c);
				error = lockmgr(c_lk, LK_EXCLUSIVE |
						      LK_CANCELABLE);
				if (error == 0) {
					atomic_set_int(&c->c_flags,
						       CALLOUT_EXECUTED);
					crit_exit();
					c_func(c_arg);
					crit_enter();
					lockmgr(c_lk, LK_RELEASE);
				}
			} else if (flags & CALLOUT_ACTIVE) {
				void (*c_func)(void *);
				void *c_arg;

				sc->running = (intptr_t)c;
				c_func = c->c_func;
				c_arg = c->c_arg;
				c->c_func = NULL;
				KKASSERT(c->c_flags & CALLOUT_DID_INIT);
				flags = callout_unpend_disarm(c);
				atomic_set_int(&c->c_flags, CALLOUT_EXECUTED);
				crit_exit();
				c_func(c_arg);
				crit_enter();
			} else {
				flags = callout_unpend_disarm(c);
			}

			/*
			 * Read and clear sc->running.  If bit 0 was set,
			 * a callout_stop() is likely blocked waiting for
			 * the callback to complete.
			 *
			 * The sigclear above also cleared CALLOUT_WAITING
			 * and returns the contents of flags prior to clearing
			 * any bits.
			 *
			 * Interlock wakeup any _stop's waiting on us.  Note
			 * that once c_func() was called, the callout
			 * structure (c) pointer may no longer be valid.  It
			 * can only be used for the wakeup.
			 */
			if ((atomic_readandclear_ptr(&sc->running) & 1) ||
			    (flags & CALLOUT_WAITING)) {
				wakeup(c);
			}
			/* NOTE: list may have changed */
		}
		++sc->softticks;
	}

	/*
	 * Don't leave us holding the MP lock when we deschedule ourselves.
	 */
	if (mpsafe == 0) {
		mpsafe = 1;
		rel_mplock();
	}
	sc->isrunning = 0;
	lwkt_deschedule_self(&sc->thread);	/* == curthread */
	lwkt_switch();
	goto loop;
	/* NOT REACHED */
}
Example #13
0
/*
 * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
 * FALSE on failure.
 *
 * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
 * token, otherwise are attempting to get a shared token.
 *
 * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
 * it is a non-blocking operation (for both exclusive or shared acquisions).
 */
static __inline
int
_lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
{
	lwkt_token_t tok;
	lwkt_tokref_t oref;
	long count;

	tok = ref->tr_tok;
	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
		td->td_gd->gd_intr_nesting_level == 0 ||
		panic_cpu_gd == mycpu),
		("Attempt to acquire token %p not already "
		"held in hard code section", tok));

	if (mode & TOK_EXCLUSIVE) {
		/*
		 * Attempt to get an exclusive token
		 */
		for (;;) {
			count = tok->t_count;
			oref = tok->t_ref;	/* can be NULL */
			cpu_ccfence();
			if ((count & ~TOK_EXCLREQ) == 0) {
				/*
				 * It is possible to get the exclusive bit.
				 * We must clear TOK_EXCLREQ on successful
				 * acquisition.
				 */
				if (atomic_cmpset_long(&tok->t_count, count,
						       (count & ~TOK_EXCLREQ) |
						       TOK_EXCLUSIVE)) {
					KKASSERT(tok->t_ref == NULL);
					tok->t_ref = ref;
					return TRUE;
				}
				/* retry */
			} else if ((count & TOK_EXCLUSIVE) &&
				   oref >= &td->td_toks_base &&
				   oref < td->td_toks_stop) {
				/*
				 * Our thread already holds the exclusive
				 * bit, we treat this tokref as a shared
				 * token (sorta) to make the token release
				 * code easier.
				 *
				 * NOTE: oref cannot race above if it
				 *	 happens to be ours, so we're good.
				 *	 But we must still have a stable
				 *	 variable for both parts of the
				 *	 comparison.
				 *
				 * NOTE: Since we already have an exclusive
				 *	 lock and don't need to check EXCLREQ
				 *	 we can just use an atomic_add here
				 */
				atomic_add_long(&tok->t_count, TOK_INCR);
				ref->tr_count &= ~TOK_EXCLUSIVE;
				return TRUE;
			} else if ((mode & TOK_EXCLREQ) &&
				   (count & TOK_EXCLREQ) == 0) {
				/*
				 * Unable to get the exclusive bit but being
				 * asked to set the exclusive-request bit.
				 * Since we are going to retry anyway just
				 * set the bit unconditionally.
				 */
				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
				return FALSE;
			} else {
				/*
				 * Unable to get the exclusive bit and not
				 * being asked to set the exclusive-request
				 * (aka lwkt_trytoken()), or EXCLREQ was
				 * already set.
				 */
				cpu_pause();
				return FALSE;
			}
			/* retry */
		}
	} else {
		/*
		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
		 * for shared tokens simply means the caller intends to
		 * block.  We never actually set the bit in tok->t_count.
		 */
		for (;;) {
			count = tok->t_count;
			oref = tok->t_ref;	/* can be NULL */
			cpu_ccfence();
			if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) {
				/* XXX EXCLREQ should work */
				/*
				 * It is possible to get the token shared.
				 */
				if (atomic_cmpset_long(&tok->t_count, count,
						       count + TOK_INCR)) {
					return TRUE;
				}
				/* retry */
			} else if ((count & TOK_EXCLUSIVE) &&
				   oref >= &td->td_toks_base &&
				   oref < td->td_toks_stop) {
				/*
				 * We own the exclusive bit on the token so
				 * we can in fact also get it shared.
				 */
				atomic_add_long(&tok->t_count, TOK_INCR);
				return TRUE;
			} else {
				/*
				 * We failed to get the token shared
				 */
				return FALSE;
			}
			/* retry */
		}
	}
}
Example #14
0
/*
 * Do all IO operations on dm logical devices.
 */
static int
dmstrategy(struct dev_strategy_args *ap)
{
	cdev_t dev = ap->a_head.a_dev;
	struct bio *bio = ap->a_bio;
	struct buf *bp = bio->bio_buf;
	int bypass;

	dm_dev_t *dmv;
	dm_table_t  *tbl;
	dm_table_entry_t *table_en;
	struct buf *nestbuf;

	uint32_t dev_type;

	uint64_t buf_start, buf_len, issued_len;
	uint64_t table_start, table_end;
	uint64_t start, end;

	buf_start = bio->bio_offset;
	buf_len = bp->b_bcount;

	tbl = NULL;

	table_end = 0;
	dev_type = 0;
	issued_len = 0;

	dmv = dev->si_drv1;

	switch(bp->b_cmd) {
	case BUF_CMD_READ:
	case BUF_CMD_WRITE:
	case BUF_CMD_FREEBLKS:
		bypass = 0;
		break;
	case BUF_CMD_FLUSH:
		bypass = 1;
		KKASSERT(buf_len == 0);
		break;
	default:
		bp->b_error = EIO;
		bp->b_resid = bp->b_bcount;
		biodone(bio);
		return 0;
	}

	if (bypass == 0 &&
	    bounds_check_with_mediasize(bio, DEV_BSIZE,
					dm_table_size(&dmv->table_head)) <= 0) {
		bp->b_resid = bp->b_bcount;
		biodone(bio);
		return 0;
	}

	/* Select active table */
	tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);

	nestiobuf_init(bio);
	devstat_start_transaction(&dmv->stats);

	/*
	 * Find out what tables I want to select.
	 */
	SLIST_FOREACH(table_en, tbl, next) {
		/*
		 * I need need number of bytes not blocks.
		 */
		table_start = table_en->start * DEV_BSIZE;
		table_end = table_start + (table_en->length) * DEV_BSIZE;

		/*
		 * Calculate the start and end
		 */
		start = MAX(table_start, buf_start);
		end = MIN(table_end, buf_start + buf_len);

		aprint_debug("----------------------------------------\n");
		aprint_debug("table_start %010" PRIu64", table_end %010"
		    PRIu64 "\n", table_start, table_end);
		aprint_debug("buf_start %010" PRIu64", buf_len %010"
		    PRIu64"\n", buf_start, buf_len);
		aprint_debug("start-buf_start %010"PRIu64", end %010"
		    PRIu64"\n", start - buf_start, end);
		aprint_debug("start %010" PRIu64" , end %010"
                    PRIu64"\n", start, end);
		aprint_debug("\n----------------------------------------\n");

		if (bypass) {
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats);
			nestbuf->b_bio1.bio_offset = 0;
			table_en->target->strategy(table_en, nestbuf);
		} else if (start < end) {
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf,
				      start - buf_start, (end - start),
				      &dmv->stats);
			issued_len += end - start;

			nestbuf->b_bio1.bio_offset = (start - table_start);
			table_en->target->strategy(table_en, nestbuf);
		}
	}
Example #15
0
/*
 * Do an I/O operation to/from a cache block.
 */
int
nwfs_doio(struct vnode *vp, struct bio *bio, struct ucred *cr, struct thread *td)
{
	struct buf *bp = bio->bio_buf;
	struct uio *uiop;
	struct nwnode *np;
	struct nwmount *nmp;
	int error = 0;
	struct uio uio;
	struct iovec io;

	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	uiop = &uio;
	uiop->uio_iov = &io;
	uiop->uio_iovcnt = 1;
	uiop->uio_segflg = UIO_SYSSPACE;
	uiop->uio_td = td;

	if (bp->b_cmd == BUF_CMD_READ) {
	    io.iov_len = uiop->uio_resid = (size_t)bp->b_bcount;
	    io.iov_base = bp->b_data;
	    uiop->uio_rw = UIO_READ;
	    switch (vp->v_type) {
	      case VREG:
		uiop->uio_offset = bio->bio_offset;
		error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, uiop, cr);
		if (error)
			break;
		if (uiop->uio_resid) {
			size_t left = uiop->uio_resid;
			size_t nread = bp->b_bcount - left;
			if (left > 0)
				bzero((char *)bp->b_data + nread, left);
		}
		break;
/*	    case VDIR:
		nfsstats.readdir_bios++;
		uiop->uio_offset = bio->bio_offset;
		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
			error = nfs_readdirplusrpc(vp, uiop, cr);
			if (error == NFSERR_NOTSUPP)
				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
		}
		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
			error = nfs_readdirrpc(vp, uiop, cr);
		if (error == 0 && uiop->uio_resid == (size_t)bp->b_bcount)
			bp->b_flags |= B_INVAL;
		break;
*/
	    default:
		kprintf("nwfs_doio:  type %x unexpected\n",vp->v_type);
		break;
	    }
	    if (error) {
		bp->b_flags |= B_ERROR;
		bp->b_error = error;
	    }
	} else { /* write */
	    KKASSERT(bp->b_cmd == BUF_CMD_WRITE);
	    if (bio->bio_offset + bp->b_dirtyend > np->n_size)
		bp->b_dirtyend = np->n_size - bio->bio_offset;

	    if (bp->b_dirtyend > bp->b_dirtyoff) {
		io.iov_len = uiop->uio_resid =
			(size_t)(bp->b_dirtyend - bp->b_dirtyoff);
		uiop->uio_offset = bio->bio_offset + bp->b_dirtyoff;
		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
		uiop->uio_rw = UIO_WRITE;
		error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, uiop, cr);

		/*
		 * For an interrupted write, the buffer is still valid
		 * and the write hasn't been pushed to the server yet,
		 * so we can't set B_ERROR and report the interruption
		 * by setting B_EINTR. For the async case, B_EINTR
		 * is not relevant, so the rpc attempt is essentially
		 * a noop.  For the case of a V3 write rpc not being
		 * committed to stable storage, the block is still
		 * dirty and requires either a commit rpc or another
		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
		 * the block is reused. This is indicated by setting
		 * the B_DELWRI and B_NEEDCOMMIT flags.
		 */
    		if (error == EINTR
		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {

			crit_enter();
			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
			if ((bp->b_flags & B_PAGING) == 0)
			    bdirty(bp);
			bp->b_flags |= B_EINTR;
			crit_exit();
	    	} else {
			if (error) {
				bp->b_flags |= B_ERROR;
				bp->b_error /*= np->n_error */= error;
/*				np->n_flag |= NWRITEERR;*/
			}
			bp->b_dirtyoff = bp->b_dirtyend = 0;
		}
	    } else {
		bp->b_resid = 0;
		biodone(bio);
		return (0);
	    }
	}
	bp->b_resid = (int)uiop->uio_resid;
	biodone(bio);
	return (error);
}
Example #16
0
static int
tmpfs_nremove(struct vop_nremove_args *v)
{
	struct vnode *dvp = v->a_dvp;
	struct namecache *ncp = v->a_nch->ncp;
	struct vnode *vp;
	int error;
	struct tmpfs_dirent *de;
	struct tmpfs_mount *tmp;
	struct tmpfs_node *dnode;
	struct tmpfs_node *node;

	/*
	 * We have to acquire the vp from v->a_nch because we will likely
	 * unresolve the namecache entry, and a vrele/vput is needed to
	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
	 *
	 * We have to use vget to clear any inactive state on the vnode,
	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
	 * will not get called when we release it.
	 */
	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
	KKASSERT(error == 0);
	vn_unlock(vp);

	if (vp->v_type == VDIR) {
		error = EISDIR;
		goto out;
	}

	dnode = VP_TO_TMPFS_DIR(dvp);
	node = VP_TO_TMPFS_NODE(vp);
	tmp = VFS_TO_TMPFS(vp->v_mount);
	de = tmpfs_dir_lookup(dnode, node, ncp);
	if (de == NULL) {
		error = ENOENT;
		goto out;
	}

	/* Files marked as immutable or append-only cannot be deleted. */
	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
	    (dnode->tn_flags & APPEND)) {
		error = EPERM;
		goto out;
	}

	/* Remove the entry from the directory; as it is a file, we do not
	 * have to change the number of hard links of the directory. */
	tmpfs_dir_detach(dnode, de);

	/* Free the directory entry we just deleted.  Note that the node
	 * referred by it will not be removed until the vnode is really
	 * reclaimed. */
	tmpfs_free_dirent(tmp, de);

	if (node->tn_links > 0) {
	        TMPFS_NODE_LOCK(node);
		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
	                TMPFS_NODE_MODIFIED;
	        TMPFS_NODE_UNLOCK(node);
	}

	cache_setunresolved(v->a_nch);
	cache_setvp(v->a_nch, NULL);
	tmpfs_knote(vp, NOTE_DELETE);
	/*cache_inval_vp(vp, CINV_DESTROY);*/
	tmpfs_knote(dvp, NOTE_WRITE);
	error = 0;

out:
	vrele(vp);

	return error;
}
Example #17
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_reqpage, vm_ooffset_t a_offset)
 */
int
nwfs_getpages(struct vop_getpages_args *ap)
{
#ifndef NWFS_RWCACHE
	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
					    ap->a_reqpage, ap->a_seqaccess);
#else
	int i, error, npages;
	size_t nextoff, toff;
	size_t count;
	size_t size;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;

	vp = ap->a_vp;
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("nwfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("nwfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* NWFS_RWCACHE */
}
Example #18
0
static int
tmpfs_nlink(struct vop_nlink_args *v)
{
	struct vnode *dvp = v->a_dvp;
	struct vnode *vp = v->a_vp;
	struct namecache *ncp = v->a_nch->ncp;
	struct tmpfs_dirent *de;
	struct tmpfs_node *node;
	struct tmpfs_node *dnode;
	int error;

	KKASSERT(dvp != vp); /* XXX When can this be false? */

	node = VP_TO_TMPFS_NODE(vp);
	dnode = VP_TO_TMPFS_NODE(dvp);

	/* XXX: Why aren't the following two tests done by the caller? */

	/* Hard links of directories are forbidden. */
	if (vp->v_type == VDIR) {
		error = EPERM;
		goto out;
	}

	/* Cannot create cross-device links. */
	if (dvp->v_mount != vp->v_mount) {
		error = EXDEV;
		goto out;
	}

	/* Ensure that we do not overflow the maximum number of links imposed
	 * by the system. */
	KKASSERT(node->tn_links <= LINK_MAX);
	if (node->tn_links == LINK_MAX) {
		error = EMLINK;
		goto out;
	}

	/* We cannot create links of files marked immutable or append-only. */
	if (node->tn_flags & (IMMUTABLE | APPEND)) {
		error = EPERM;
		goto out;
	}

	/* Allocate a new directory entry to represent the node. */
	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
	    ncp->nc_name, ncp->nc_nlen, &de);
	if (error != 0)
		goto out;

	/* Insert the new directory entry into the appropriate directory. */
	tmpfs_dir_attach(dnode, de);

	/* vp link count has changed, so update node times. */

	TMPFS_NODE_LOCK(node);
	node->tn_status |= TMPFS_NODE_CHANGED;
	TMPFS_NODE_UNLOCK(node);
	tmpfs_update(vp);

	tmpfs_knote(vp, NOTE_LINK);
	cache_setunresolved(v->a_nch);
	cache_setvp(v->a_nch, vp);
	tmpfs_knote(dvp, NOTE_WRITE);
	error = 0;

out:
	return error;
}
Example #19
0
/*
 * Do a send by putting data in output queue and updating urgent
 * marker if URG set.  Possibly send more data.  Unlike the other
 * pru_*() routines, the mbuf chains are our responsibility.  We
 * must either enqueue them or free them.  The other pru_* routines
 * generally are caller-frees.
 */
static void
tcp_usr_send(netmsg_t msg)
{
	struct socket *so = msg->send.base.nm_so;
	int flags = msg->send.nm_flags;
	struct mbuf *m = msg->send.nm_m;
	int error = 0;
	struct inpcb *inp;
	struct tcpcb *tp;
	TCPDEBUG0;

	KKASSERT(msg->send.nm_control == NULL);
	KKASSERT(msg->send.nm_addr == NULL);
	KKASSERT((flags & PRUS_FREEADDR) == 0);

	inp = so->so_pcb;

	if (inp == NULL) {
		/*
		 * OOPS! we lost a race, the TCP session got reset after
		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
		 * network interrupt in the non-critical section of sosend().
		 */
		m_freem(m);
		error = ECONNRESET;	/* XXX EPIPE? */
		tp = NULL;
		TCPDEBUG1();
		goto out;
	}
	tp = intotcpcb(inp);
	TCPDEBUG1();

#ifdef foo
	/*
	 * This is no longer necessary, since:
	 * - sosendtcp() has already checked it for us
	 * - It does not work with asynchronized send
	 */

	/*
	 * Don't let too much OOB data build up
	 */
	if (flags & PRUS_OOB) {
		if (ssb_space(&so->so_snd) < -512) {
			m_freem(m);
			error = ENOBUFS;
			goto out;
		}
	}
#endif

	/*
	 * Pump the data into the socket.
	 */
	if (m) {
		ssb_appendstream(&so->so_snd, m);
		sowwakeup(so);
	}
	if (flags & PRUS_OOB) {
		/*
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section.
		 * Otherwise, snd_up should be one lower.
		 */
		tp->snd_up = tp->snd_una + so->so_snd.ssb_cc;
		tp->t_flags |= TF_FORCE;
		error = tcp_output(tp);
		tp->t_flags &= ~TF_FORCE;
	} else {
		if (flags & PRUS_EOF) {
			/*
			 * Close the send side of the connection after
			 * the data is sent.
			 */
			socantsendmore(so);
			tp = tcp_usrclosed(tp);
		}
		if (tp != NULL && !tcp_output_pending(tp)) {
			if (flags & PRUS_MORETOCOME)
				tp->t_flags |= TF_MORETOCOME;
			error = tcp_output_fair(tp);
			if (flags & PRUS_MORETOCOME)
				tp->t_flags &= ~TF_MORETOCOME;
		}
	}
	COMMON_END1((flags & PRUS_OOB) ? PRU_SENDOOB :
		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND),
		   (flags & PRUS_NOREPLY));
}
Example #20
0
static int
tmpfs_nrename(struct vop_nrename_args *v)
{
	struct vnode *fdvp = v->a_fdvp;
	struct namecache *fncp = v->a_fnch->ncp;
	struct vnode *fvp = fncp->nc_vp;
	struct vnode *tdvp = v->a_tdvp;
	struct namecache *tncp = v->a_tnch->ncp;
	struct vnode *tvp;
	struct tmpfs_dirent *de;
	struct tmpfs_mount *tmp;
	struct tmpfs_node *fdnode;
	struct tmpfs_node *fnode;
	struct tmpfs_node *tnode;
	struct tmpfs_node *tdnode;
	char *newname;
	char *oldname;
	int error;

	/*
	 * Because tvp can get overwritten we have to vget it instead of
	 * just vref or use it, otherwise it's VINACTIVE flag may not get
	 * cleared and the node won't get destroyed.
	 */
	error = cache_vget(v->a_tnch, v->a_cred, LK_SHARED, &tvp);
	if (error == 0) {
		tnode = VP_TO_TMPFS_NODE(tvp);
		vn_unlock(tvp);
	} else {
		tnode = NULL;
	}

	/* Disallow cross-device renames.
	 * XXX Why isn't this done by the caller? */
	if (fvp->v_mount != tdvp->v_mount ||
	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
		error = EXDEV;
		goto out;
	}

	tmp = VFS_TO_TMPFS(tdvp->v_mount);
	tdnode = VP_TO_TMPFS_DIR(tdvp);

	/* If source and target are the same file, there is nothing to do. */
	if (fvp == tvp) {
		error = 0;
		goto out;
	}

	fdnode = VP_TO_TMPFS_DIR(fdvp);
	fnode = VP_TO_TMPFS_NODE(fvp);
	de = tmpfs_dir_lookup(fdnode, fnode, fncp);

	/* Avoid manipulating '.' and '..' entries. */
	if (de == NULL) {
		error = ENOENT;
		goto out_locked;
	}
	KKASSERT(de->td_node == fnode);

	/*
	 * If replacing an entry in the target directory and that entry
	 * is a directory, it must be empty.
	 *
	 * Kern_rename gurantees the destination to be a directory
	 * if the source is one (it does?).
	 */
	if (tvp != NULL) {
		KKASSERT(tnode != NULL);

		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
			error = EPERM;
			goto out_locked;
		}

		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
			if (tnode->tn_size > 0) {
				error = ENOTEMPTY;
				goto out_locked;
			}
		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
			error = ENOTDIR;
			goto out_locked;
		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
			error = EISDIR;
			goto out_locked;
		} else {
			KKASSERT(fnode->tn_type != VDIR &&
				tnode->tn_type != VDIR);
		}
	}

	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
		error = EPERM;
		goto out_locked;
	}

	/*
	 * Ensure that we have enough memory to hold the new name, if it
	 * has to be changed.
	 */
	if (fncp->nc_nlen != tncp->nc_nlen ||
	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, 
				  M_WAITOK | M_NULLOK);
		if (newname == NULL) {
			error = ENOSPC;
			goto out_locked;
		}
		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
		newname[tncp->nc_nlen] = '\0';
	} else {
		newname = NULL;
	}

	/*
	 * Unlink entry from source directory.  Note that the kernel has
	 * already checked for illegal recursion cases (renaming a directory
	 * into a subdirectory of itself).
	 */
	if (fdnode != tdnode)
		tmpfs_dir_detach(fdnode, de);

	/*
	 * Handle any name change.  Swap with newname, we will
	 * deallocate it at the end.
	 */
	if (newname != NULL) {
#if 0
		TMPFS_NODE_LOCK(fnode);
		fnode->tn_status |= TMPFS_NODE_CHANGED;
		TMPFS_NODE_UNLOCK(fnode);
#endif
		oldname = de->td_name;
		de->td_name = newname;
		de->td_namelen = (uint16_t)tncp->nc_nlen;
		newname = oldname;
	}

	/*
	 * Link entry to target directory.  If the entry
	 * represents a directory move the parent linkage
	 * as well.
	 */
	if (fdnode != tdnode) {
		if (de->td_node->tn_type == VDIR) {
			TMPFS_VALIDATE_DIR(fnode);

			TMPFS_NODE_LOCK(tdnode);
			tdnode->tn_links++;
			tdnode->tn_status |= TMPFS_NODE_MODIFIED;
			TMPFS_NODE_UNLOCK(tdnode);

			TMPFS_NODE_LOCK(fnode);
			fnode->tn_dir.tn_parent = tdnode;
			fnode->tn_status |= TMPFS_NODE_CHANGED;
			TMPFS_NODE_UNLOCK(fnode);

			TMPFS_NODE_LOCK(fdnode);
			fdnode->tn_links--;
			fdnode->tn_status |= TMPFS_NODE_MODIFIED;
			TMPFS_NODE_UNLOCK(fdnode);
		}
		tmpfs_dir_attach(tdnode, de);
	} else {
		TMPFS_NODE_LOCK(tdnode);
		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
		TMPFS_NODE_UNLOCK(tdnode);
	}

	/*
	 * If we are overwriting an entry, we have to remove the old one
	 * from the target directory.
	 */
	if (tvp != NULL) {
		/* Remove the old entry from the target directory. */
		de = tmpfs_dir_lookup(tdnode, tnode, tncp);
		tmpfs_dir_detach(tdnode, de);
		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);

		/*
		 * Free the directory entry we just deleted.  Note that the
		 * node referred by it will not be removed until the vnode is
		 * really reclaimed.
		 */
		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de);
		/*cache_inval_vp(tvp, CINV_DESTROY);*/
	}

	/*
	 * Finish up
	 */
	if (newname) {
		kfree(newname, tmp->tm_name_zone);
		newname = NULL;
	}
	cache_rename(v->a_fnch, v->a_tnch);
	tmpfs_knote(v->a_fdvp, NOTE_WRITE);
	tmpfs_knote(v->a_tdvp, NOTE_WRITE);
	if (fnode->tn_vnode)
		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
	error = 0;

out_locked:
	;

out:
	if (tvp)
		vrele(tvp);

	return error;
}
Example #21
0
/*
 * Unlock a lock.  The caller must hold the lock either shared or exclusive.
 *
 * On the last release we handle any pending chains.
 */
void
_mtx_unlock(mtx_t *mtx)
{
	thread_t td __debugvar = curthread;
	u_int	lock;
	u_int	nlock;

	for (;;) {
		lock = mtx->mtx_lock;
		cpu_ccfence();

		switch(lock) {
		case MTX_EXCLUSIVE | 1:
			/*
			 * Last release, exclusive lock.
			 * No exclusive or shared requests pending.
			 */
			KKASSERT(mtx->mtx_owner == td ||
				 mtx->mtx_owner == NULL);
			mtx->mtx_owner = NULL;
			nlock = 0;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
				goto done;
			break;
		case MTX_EXCLUSIVE | MTX_EXWANTED | 1:
		case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1:
			/*
			 * Last release, exclusive lock.
			 * Exclusive requests pending.
			 * Exclusive requests have priority over shared reqs.
			 */
			KKASSERT(mtx->mtx_owner == td ||
				 mtx->mtx_owner == NULL);
			mtx->mtx_owner = NULL;
			if (mtx_chain_link_ex(mtx, lock))
				goto done;
			break;
		case MTX_EXCLUSIVE | MTX_SHWANTED | 1:
			/*
			 * Last release, exclusive lock.
			 *
			 * Shared requests are pending.  Transfer our count (1)
			 * to the first shared request, wakeup all shared reqs.
			 */
			KKASSERT(mtx->mtx_owner == td ||
				 mtx->mtx_owner == NULL);
			mtx->mtx_owner = NULL;
			if (mtx_chain_link_sh(mtx, lock))
				goto done;
			break;
		case 1:
			/*
			 * Last release, shared lock.
			 * No exclusive or shared requests pending.
			 */
			nlock = 0;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
				goto done;
			break;
		case MTX_EXWANTED | 1:
		case MTX_EXWANTED | MTX_SHWANTED | 1:
			/*
			 * Last release, shared lock.
			 *
			 * Exclusive requests are pending.  Upgrade this
			 * final shared lock to exclusive and transfer our
			 * count (1) to the next exclusive request.
			 *
			 * Exclusive requests have priority over shared reqs.
			 */
			if (mtx_chain_link_ex(mtx, lock))
				goto done;
			break;
		case MTX_SHWANTED | 1:
			/*
			 * Last release, shared lock.
			 * Shared requests pending.
			 */
			if (mtx_chain_link_sh(mtx, lock))
				goto done;
			break;
		default:
			/*
			 * We have to loop if this is the last release but
			 * someone is fiddling with LINKSPIN.
			 */
			if ((lock & MTX_MASK) == 1) {
				KKASSERT(lock & MTX_LINKSPIN);
				break;
			}

			/*
			 * Not the last release (shared or exclusive)
			 */
			nlock = lock - 1;
			KKASSERT((nlock & MTX_MASK) != MTX_MASK);
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
				goto done;
			break;
		}
		/* loop try again */
		cpu_pause();
	}
done:
	;
}
Example #22
0
int
hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
		struct hammer_ioc_volume *ioc)
{
	struct hammer_mount *hmp = trans->hmp;
	struct mount *mp = hmp->mp;
	struct hammer_volume_ondisk ondisk;
	struct bigblock_stat stat;
	hammer_volume_t volume;
	int free_vol_no = 0;
	int error;

	if (mp->mnt_flag & MNT_RDONLY) {
		hmkprintf(hmp, "Cannot add volume to read-only HAMMER filesystem\n");
		return (EINVAL);
	}

	if (hmp->nvolumes >= HAMMER_MAX_VOLUMES) {
		hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n");
		return (EINVAL);
	}

	if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
		hmkprintf(hmp, "Another volume operation is in progress!\n");
		return (EAGAIN);
	}

	/*
	 * Find an unused volume number.
	 */
	while (free_vol_no < HAMMER_MAX_VOLUMES &&
		HAMMER_VOLUME_NUMBER_IS_SET(hmp, free_vol_no)) {
		++free_vol_no;
	}
	if (free_vol_no >= HAMMER_MAX_VOLUMES) {
		hmkprintf(hmp, "Max number of HAMMER volumes exceeded\n");
		error = EINVAL;
		goto end;
	}

	error = hammer_format_volume_header(
		hmp,
		&ondisk,
		hmp->rootvol->ondisk->vol_name,
		free_vol_no,
		hmp->nvolumes+1,
		ioc->vol_size,
		ioc->boot_area_size,
		ioc->mem_area_size);
	if (error)
		goto end;

	error = hammer_install_volume(hmp, ioc->device_name, NULL, &ondisk);
	if (error)
		goto end;

	hammer_sync_lock_sh(trans);
	hammer_lock_ex(&hmp->blkmap_lock);

	volume = hammer_get_volume(hmp, free_vol_no, &error);
	KKASSERT(volume != NULL && error == 0);

	error =	hammer_format_freemap(trans, volume, &stat);
	KKASSERT(error == 0);
	hammer_rel_volume(volume, 0);

	++hmp->nvolumes;
	error = hammer_update_volumes_header(trans, &stat);
	KKASSERT(error == 0);

	hammer_unlock(&hmp->blkmap_lock);
	hammer_sync_unlock(trans);

	KKASSERT(error == 0);
end:
	hammer_unlock(&hmp->volume_lock);
	if (error)
		hmkprintf(hmp, "An error occurred: %d\n", error);
	return (error);
}
Example #23
0
/*
 * Exclusive-lock a mutex, block until acquired unless link is async.
 * Recursion is allowed.
 *
 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS
 * if async.  If immediately successful an async exclusive lock will return 0
 * and not issue the async callback or link the link structure.  The caller
 * must handle this case (typically this is an optimal code path).
 *
 * A tsleep() error can only be returned if PCATCH is specified in the flags.
 */
static __inline int
__mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
{
	thread_t td;
	u_int	lock;
	u_int	nlock;
	int	error;
	int	isasync;

	for (;;) {
		lock = mtx->mtx_lock;
		cpu_ccfence();

		if (lock == 0) {
			nlock = MTX_EXCLUSIVE | 1;
			if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
				mtx->mtx_owner = curthread;
				cpu_sfence();
				link->state = MTX_LINK_ACQUIRED;
				error = 0;
				break;
			}
			continue;
		}
		if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) {
			KKASSERT((lock & MTX_MASK) != MTX_MASK);
			nlock = lock + 1;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
				cpu_sfence();
				link->state = MTX_LINK_ACQUIRED;
				error = 0;
				break;
			}
			continue;
		}

		/*
		 * We need MTX_LINKSPIN to manipulate exlink or
		 * shlink.
		 *
		 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate
		 * pending exclusive requests.  It cannot be set as a separate
		 * operation prior to acquiring MTX_LINKSPIN.
		 *
		 * To avoid unnecessary cpu cache traffic we poll
		 * for collisions.  It is also possible that EXWANTED
		 * state failing the above test was spurious, so all the
		 * tests must be repeated if we cannot obtain LINKSPIN
		 * with the prior state tests intact (i.e. don't reload
		 * the (lock) variable here, for heaven's sake!).
		 */
		if (lock & MTX_LINKSPIN) {
			cpu_pause();
			continue;
		}
		td = curthread;
		nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
		crit_enter_raw(td);
		if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
			crit_exit_raw(td);
			continue;
		}

		/*
		 * Check for early abort.
		 */
		if (link->state == MTX_LINK_ABORTED) {
			if (mtx->mtx_exlink == NULL) {
				atomic_clear_int(&mtx->mtx_lock,
						 MTX_LINKSPIN |
						 MTX_EXWANTED);
			} else {
				atomic_clear_int(&mtx->mtx_lock,
						 MTX_LINKSPIN);
			}
			crit_exit_raw(td);
			link->state = MTX_LINK_IDLE;
			error = ENOLCK;
			break;
		}

		/*
		 * Add our link to the exlink list and release LINKSPIN.
		 */
		link->owner = td;
		link->state = MTX_LINK_LINKED_EX;
		if (mtx->mtx_exlink) {
			link->next = mtx->mtx_exlink;
			link->prev = link->next->prev;
			link->next->prev = link;
			link->prev->next = link;
		} else {
			link->next = link;
			link->prev = link;
			mtx->mtx_exlink = link;
		}
		isasync = (link->callback != NULL);
		atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
		crit_exit_raw(td);

		/* 
		 * If asynchronous lock request return without
		 * blocking, leave link structure linked.
		 */
		if (isasync) {
			error = EINPROGRESS;
			break;
		}

		/*
		 * Wait for lock
		 */
		error = mtx_wait_link(mtx, link, flags, to);
		break;
	}
	return (error);
}
Example #24
0
/*
 * mmap_args(void *addr, size_t len, int prot, int flags, int fd,
 *		long pad, off_t pos)
 *
 * Memory Map (mmap) system call.  Note that the file offset
 * and address are allowed to be NOT page aligned, though if
 * the MAP_FIXED flag it set, both must have the same remainder
 * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
 * page-aligned, the actual mapping starts at trunc_page(addr)
 * and the return value is adjusted up by the page offset.
 *
 * Generally speaking, only character devices which are themselves
 * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
 * there would be no cache coherency between a descriptor and a VM mapping
 * both to the same character device.
 *
 * Block devices can be mmap'd no matter what they represent.  Cache coherency
 * is maintained as long as you do not write directly to the underlying
 * character device.
 *
 * No requirements; sys_mmap path holds the vm_token
 */
int
kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen,
          int uprot, int uflags, int fd, off_t upos, void **res)
{
    struct thread *td = curthread;
    struct proc *p = td->td_proc;
    struct file *fp = NULL;
    struct vnode *vp;
    vm_offset_t addr;
    vm_offset_t tmpaddr;
    vm_size_t size, pageoff;
    vm_prot_t prot, maxprot;
    void *handle;
    int flags, error;
    off_t pos;
    vm_object_t obj;

    KKASSERT(p);

    addr = (vm_offset_t) uaddr;
    size = ulen;
    prot = uprot & VM_PROT_ALL;
    flags = uflags;
    pos = upos;

    /*
     * Make sure mapping fits into numeric range etc.
     *
     * NOTE: We support the full unsigned range for size now.
     */
    if (((flags & MAP_ANON) && (fd != -1 || pos != 0)))
        return (EINVAL);

    if (flags & MAP_STACK) {
        if ((fd != -1) ||
                ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
            return (EINVAL);
        flags |= MAP_ANON;
        pos = 0;
    }

    /*
     * Virtual page tables cannot be used with MAP_STACK.  Apart from
     * it not making any sense, the aux union is used by both
     * types.
     *
     * Because the virtual page table is stored in the backing object
     * and might be updated by the kernel, the mapping must be R+W.
     */
    if (flags & MAP_VPAGETABLE) {
        if (vkernel_enable == 0)
            return (EOPNOTSUPP);
        if (flags & MAP_STACK)
            return (EINVAL);
        if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
            return (EINVAL);
    }

    /*
     * Align the file position to a page boundary,
     * and save its page offset component.
     */
    pageoff = (pos & PAGE_MASK);
    pos -= pageoff;

    /* Adjust size for rounding (on both ends). */
    size += pageoff;			/* low end... */
    size = (vm_size_t) round_page(size);	/* hi end */
    if (size < ulen)			/* wrap */
        return(EINVAL);

    /*
     * Check for illegal addresses.  Watch out for address wrap... Note
     * that VM_*_ADDRESS are not constants due to casts (argh).
     */
    if (flags & (MAP_FIXED | MAP_TRYFIXED)) {
        /*
         * The specified address must have the same remainder
         * as the file offset taken modulo PAGE_SIZE, so it
         * should be aligned after adjustment by pageoff.
         */
        addr -= pageoff;
        if (addr & PAGE_MASK)
            return (EINVAL);

        /*
         * Address range must be all in user VM space and not wrap.
         */
        tmpaddr = addr + size;
        if (tmpaddr < addr)
            return (EINVAL);
        if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS)
            return (EINVAL);
        if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS)
            return (EINVAL);
    } else {
        /*
         * Get a hint of where to map. It also provides mmap offset
         * randomization if enabled.
         */
        addr = vm_map_hint(p, addr, prot);
    }

    if (flags & MAP_ANON) {
        /*
         * Mapping blank space is trivial.
         */
        handle = NULL;
        maxprot = VM_PROT_ALL;
    } else {
        /*
         * Mapping file, get fp for validation. Obtain vnode and make
         * sure it is of appropriate type.
         */
        fp = holdfp(p->p_fd, fd, -1);
        if (fp == NULL)
            return (EBADF);
        if (fp->f_type != DTYPE_VNODE) {
            error = EINVAL;
            goto done;
        }
        /*
         * POSIX shared-memory objects are defined to have
         * kernel persistence, and are not defined to support
         * read(2)/write(2) -- or even open(2).  Thus, we can
         * use MAP_ASYNC to trade on-disk coherence for speed.
         * The shm_open(3) library routine turns on the FPOSIXSHM
         * flag to request this behavior.
         */
        if (fp->f_flag & FPOSIXSHM)
            flags |= MAP_NOSYNC;
        vp = (struct vnode *) fp->f_data;

        /*
         * Validate the vnode for the operation.
         */
        switch(vp->v_type) {
        case VREG:
            /*
             * Get the proper underlying object
             */
            if ((obj = vp->v_object) == NULL) {
                error = EINVAL;
                goto done;
            }
            KKASSERT((struct vnode *)obj->handle == vp);
            break;
        case VCHR:
            /*
             * Make sure a device has not been revoked.
             * Mappability is handled by the device layer.
             */
            if (vp->v_rdev == NULL) {
                error = EBADF;
                goto done;
            }
            break;
        default:
            /*
             * Nothing else is mappable.
             */
            error = EINVAL;
            goto done;
        }

        /*
         * XXX hack to handle use of /dev/zero to map anon memory (ala
         * SunOS).
         */
        if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
            handle = NULL;
            maxprot = VM_PROT_ALL;
            flags |= MAP_ANON;
            pos = 0;
        } else {
            /*
             * cdevs does not provide private mappings of any kind.
             */
            if (vp->v_type == VCHR &&
                    (flags & (MAP_PRIVATE|MAP_COPY))) {
                error = EINVAL;
                goto done;
            }
            /*
             * Ensure that file and memory protections are
             * compatible.  Note that we only worry about
             * writability if mapping is shared; in this case,
             * current and max prot are dictated by the open file.
             * XXX use the vnode instead?  Problem is: what
             * credentials do we use for determination? What if
             * proc does a setuid?
             */
            maxprot = VM_PROT_EXECUTE;	/* ??? */
            if (fp->f_flag & FREAD) {
                maxprot |= VM_PROT_READ;
            } else if (prot & PROT_READ) {
                error = EACCES;
                goto done;
            }
            /*
             * If we are sharing potential changes (either via
             * MAP_SHARED or via the implicit sharing of character
             * device mappings), and we are trying to get write
             * permission although we opened it without asking
             * for it, bail out.  Check for superuser, only if
             * we're at securelevel < 1, to allow the XIG X server
             * to continue to work.
             */
            if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) {
                if ((fp->f_flag & FWRITE) != 0) {
                    struct vattr va;
                    if ((error = VOP_GETATTR(vp, &va))) {
                        goto done;
                    }
                    if ((va.va_flags &
                            (IMMUTABLE|APPEND)) == 0) {
                        maxprot |= VM_PROT_WRITE;
                    } else if (prot & PROT_WRITE) {
                        error = EPERM;
                        goto done;
                    }
                } else if ((prot & PROT_WRITE) != 0) {
                    error = EACCES;
                    goto done;
                }
            } else {
                maxprot |= VM_PROT_WRITE;
            }
            handle = (void *)vp;
        }
    }

    /* Token serializes access to vm_map.nentries against vm_mmap */
    lwkt_gettoken(&vm_token);

    /*
     * Do not allow more then a certain number of vm_map_entry structures
     * per process.  Scale with the number of rforks sharing the map
     * to make the limit reasonable for threads.
     */
    if (max_proc_mmap &&
            vms->vm_map.nentries >= max_proc_mmap * vms->vm_sysref.refcnt) {
        error = ENOMEM;
        lwkt_reltoken(&vm_token);
        goto done;
    }

    error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
                    flags, handle, pos);
    if (error == 0)
        *res = (void *)(addr + pageoff);

    lwkt_reltoken(&vm_token);
done:
    if (fp)
        fdrop(fp);

    return (error);
}
Example #25
0
static int
dm_target_stripe_dump(dm_table_entry_t *table_en, void *data, size_t length, off_t offset)
{
	dm_target_stripe_config_t *tsc;
	uint64_t blkno, blkoff;
	uint64_t stripe, blknr;
	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
	uint64_t off2, len2;
	int devnr;

	tsc = table_en->target_config;
	if (tsc == NULL)
		return 0;

	/* calculate extent of request */
	KKASSERT(length % DEV_BSIZE == 0);

	blkno = offset / DEV_BSIZE;
	blkoff = 0;
	num_blks = length / DEV_BSIZE;

	/*
	 * 0 length means flush buffers and return
	 */
	if (length == 0) {
		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
			if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
				return ENXIO;

			dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
			    data, 0, offset, 0);
		}
		return 0;
	}

	while (num_blks > 0) {
		/* blockno to strip piece nr */
		stripe = blkno / tsc->stripe_chunksize;
		stripe_off = blkno % tsc->stripe_chunksize;

		/* where we are inside the strip */
		devnr = stripe % tsc->stripe_num;
		blknr = stripe / tsc->stripe_num;

		/* how much is left before we hit a boundary */
		stripe_rest = tsc->stripe_chunksize - stripe_off;

		/* issue this piece on stripe `stripe' */
		issue_blks = MIN(stripe_rest, num_blks);

#if 0
		nestiobuf_add(bio, nestbuf, blkoff,
				issue_blks * DEV_BSIZE);
#endif
		len2 = issue_blks * DEV_BSIZE;

		/* I need number of bytes. */
		off2 = blknr * tsc->stripe_chunksize + stripe_off;
		off2 += tsc->stripe_devs[devnr].offset;
		off2 *= DEV_BSIZE;
		off2 = dm_pdev_correct_dump_offset(tsc->stripe_devs[devnr].pdev,
		    off2);

		if (tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev == NULL)
			return ENXIO;

		dev_ddump(tsc->stripe_devs[devnr].pdev->pdev_vnode->v_rdev,
		    (char *)data + blkoff, 0, off2, len2);

		blkno += issue_blks;
		blkoff += issue_blks * DEV_BSIZE;
		num_blks -= issue_blks;
	}

	return 0;
}
Example #26
0
static int
cbq_add_queue_locked(struct pf_altq *a, cbq_state_t *cbqp)
{
	struct rm_class	*borrow, *parent;
	struct rm_class	*cl;
	struct cbq_opts	*opts;
	int		i;

	KKASSERT(a->qid != 0);

	/*
	 * find a free slot in the class table.  if the slot matching
	 * the lower bits of qid is free, use this slot.  otherwise,
	 * use the first free slot.
	 */
	i = a->qid % CBQ_MAX_CLASSES;
	if (cbqp->cbq_class_tbl[i] != NULL) {
		for (i = 0; i < CBQ_MAX_CLASSES; i++)
			if (cbqp->cbq_class_tbl[i] == NULL)
				break;
		if (i == CBQ_MAX_CLASSES)
			return (EINVAL);
	}

	opts = &a->pq_u.cbq_opts;
	/* check parameters */
	if (a->priority >= CBQ_MAXPRI)
		return (EINVAL);

	/* Get pointers to parent and borrow classes.  */
	parent = clh_to_clp(cbqp, a->parent_qid);
	if (opts->flags & CBQCLF_BORROW)
		borrow = parent;
	else
		borrow = NULL;

	/*
	 * A class must borrow from it's parent or it can not
	 * borrow at all.  Hence, borrow can be null.
	 */
	if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
		kprintf("cbq_add_queue: no parent class!\n");
		return (EINVAL);
	}

	if ((borrow != parent)  && (borrow != NULL)) {
		kprintf("cbq_add_class: borrow class != parent\n");
		return (EINVAL);
	}

	/*
	 * check parameters
	 */
	switch (opts->flags & CBQCLF_CLASSMASK) {
	case CBQCLF_ROOTCLASS:
		if (parent != NULL)
			return (EINVAL);
		if (cbqp->ifnp.root_)
			return (EINVAL);
		break;
	case CBQCLF_DEFCLASS:
		if (cbqp->ifnp.default_)
			return (EINVAL);
		break;
	case 0:
		if (a->qid == 0)
			return (EINVAL);
		break;
	default:
		/* more than two flags bits set */
		return (EINVAL);
	}

	/*
	 * create a class.  if this is a root class, initialize the
	 * interface.
	 */
	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
		    cbqrestart, a->qlimit, RM_MAXQUEUED,
		    opts->maxidle, opts->minidle, opts->offtime,
		    opts->flags);
		cl = cbqp->ifnp.root_;
	} else {
		cl = rmc_newclass(a->priority,
				  &cbqp->ifnp, opts->ns_per_byte,
				  rmc_delay_action, a->qlimit, parent, borrow,
				  opts->maxidle, opts->minidle, opts->offtime,
				  opts->pktsize, opts->flags);
	}
	if (cl == NULL)
		return (ENOMEM);

	/* return handle to user space. */
	cl->stats_.handle = a->qid;
	cl->stats_.depth = cl->depth_;

	/* save the allocated class */
	cbqp->cbq_class_tbl[i] = cl;

	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
		cbqp->ifnp.default_ = cl;

	return (0);
}
Example #27
0
/*
 * Allocates a cluster and its underlying chain structures.  The underlying
 * chains will be locked.  The cluster and underlying chains will have one
 * ref.
 */
hammer2_cluster_t *
hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
		      hammer2_trans_t *trans, hammer2_blockref_t *bref)
{
	hammer2_cluster_t *cluster;
	hammer2_cluster_t *rcluster;
	hammer2_chain_t *chain;
	u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
	int i;

	KKASSERT(pmp != NULL);

	/*
	 * Construct the appropriate system structure.
	 */
	switch(bref->type) {
	case HAMMER2_BREF_TYPE_INODE:
	case HAMMER2_BREF_TYPE_INDIRECT:
	case HAMMER2_BREF_TYPE_FREEMAP_NODE:
	case HAMMER2_BREF_TYPE_DATA:
	case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
		/*
		 * Chain's are really only associated with the hmp but we
		 * maintain a pmp association for per-mount memory tracking
		 * purposes.  The pmp can be NULL.
		 */
		break;
	case HAMMER2_BREF_TYPE_VOLUME:
	case HAMMER2_BREF_TYPE_FREEMAP:
		chain = NULL;
		panic("hammer2_cluster_alloc volume type illegal for op");
	default:
		chain = NULL;
		panic("hammer2_cluster_alloc: unrecognized blockref type: %d",
		      bref->type);
	}

	cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
	cluster->refs = 1;

	rcluster = &pmp->iroot->cluster;
	for (i = 0; i < rcluster->nchains; ++i) {
		chain = hammer2_chain_alloc(rcluster->array[i]->hmp,
					    pmp, trans, bref);
		chain->hmp = rcluster->array[i]->hmp;
		chain->bref = *bref;
		chain->bytes = bytes;
		chain->refs = 1;
		chain->flags = HAMMER2_CHAIN_ALLOCATED;
		chain->delete_xid = HAMMER2_XID_MAX;

		/*
		 * Set modify_tid if a transaction is creating the inode.
		 * Enforce update_xlo = 0 so nearby transactions do not think
		 * it has been flushed when it hasn't.
		 *
		 * NOTE: When loading a chain from backing store or creating a
		 *	 snapshot, trans will be NULL and the caller is
		 *	 responsible for setting these fields.
		 */
		if (trans) {
			chain->modify_xid = trans->sync_xid;
			chain->update_xlo = 0;
		}
		cluster->array[i] = chain;
	}
	cluster->nchains = i;
	cluster->pmp = pmp;
	cluster->focus = cluster->array[0];

	return (cluster);
}
Example #28
0
static int
le_pci_attach(device_t dev)
{
	struct le_pci_softc *lesc;
	struct lance_softc *sc;
	int error, i;

	lesc = device_get_softc(dev);
	sc = &lesc->sc_am79900.lsc;

	pci_enable_busmaster(dev);
	pci_enable_io(dev, PCIM_CMD_PORTEN);

	lesc->sc_rrid = PCIR_BAR(0);
	lesc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
	    &lesc->sc_rrid, RF_ACTIVE);
	if (lesc->sc_rres == NULL) {
		device_printf(dev, "cannot allocate registers\n");
		error = ENXIO;
		goto fail_mtx;
	}
	lesc->sc_regt = rman_get_bustag(lesc->sc_rres);
	lesc->sc_regh = rman_get_bushandle(lesc->sc_rres);

	lesc->sc_irid = 0;
	if ((lesc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ,
	    &lesc->sc_irid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
		device_printf(dev, "cannot allocate interrupt\n");
		error = ENXIO;
		goto fail_rres;
	}

	error = bus_dma_tag_create(
	    NULL,			/* parent */
	    1, 0,			/* alignment, boundary */
	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
	    BUS_SPACE_MAXADDR,		/* highaddr */
	    NULL, NULL,			/* filter, filterarg */
	    BUS_SPACE_MAXSIZE_32BIT,	/* maxsize */
	    0,				/* nsegments */
	    BUS_SPACE_MAXSIZE_32BIT,	/* maxsegsize */
	    BUS_DMA_WAITOK,		/* flags */
	    &lesc->sc_pdmat);
	if (error != 0) {
		device_printf(dev, "cannot allocate parent DMA tag\n");
		goto fail_ires;
	}

	sc->sc_memsize = PCNET_MEMSIZE;
	/*
	 * For Am79C970A, Am79C971 and Am79C978 the init block must be 2-byte
	 * aligned and the ring descriptors must be 16-byte aligned when using
	 * a 32-bit software style.
	 */
	error = bus_dma_tag_create(
	    lesc->sc_pdmat,		/* parent */
	    16, 0,			/* alignment, boundary */
	    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
	    BUS_SPACE_MAXADDR,		/* highaddr */
	    NULL, NULL,			/* filter, filterarg */
	    sc->sc_memsize,		/* maxsize */
	    1,				/* nsegments */
	    sc->sc_memsize,		/* maxsegsize */
	    BUS_DMA_WAITOK,		/* flags */
	    &lesc->sc_dmat);
	if (error != 0) {
		device_printf(dev, "cannot allocate buffer DMA tag\n");
		goto fail_pdtag;
	}

	error = bus_dmamem_alloc(lesc->sc_dmat, (void **)&sc->sc_mem,
	    BUS_DMA_WAITOK | BUS_DMA_COHERENT, &lesc->sc_dmam);
	if (error != 0) {
		device_printf(dev, "cannot allocate DMA buffer memory\n");
		goto fail_dtag;
	}

	sc->sc_addr = 0;
	error = bus_dmamap_load(lesc->sc_dmat, lesc->sc_dmam, sc->sc_mem,
	    sc->sc_memsize, le_pci_dma_callback, sc, 0);
	if (error != 0 || sc->sc_addr == 0) {
                device_printf(dev, "cannot load DMA buffer map\n");
		goto fail_dmem;
	}

	sc->sc_flags = LE_BSWAP;
	sc->sc_conf3 = 0;

	sc->sc_mediastatus = NULL;
	switch (pci_get_device(dev)) {
	case AMD_PCNET_HOME:
		sc->sc_mediachange = le_pci_mediachange;
		sc->sc_supmedia = le_home_supmedia;
		sc->sc_nsupmedia = sizeof(le_home_supmedia) / sizeof(int);
		sc->sc_defaultmedia = le_home_supmedia[0];
		break;
	default:
		sc->sc_mediachange = le_pci_mediachange;
		sc->sc_supmedia = le_pci_supmedia;
		sc->sc_nsupmedia = sizeof(le_pci_supmedia) / sizeof(int);
		sc->sc_defaultmedia = le_pci_supmedia[0];
	}

	/*
	 * Extract the physical MAC address from the ROM.
	 */
	for (i = 0; i < sizeof(sc->sc_enaddr); i++)
		sc->sc_enaddr[i] =
		    bus_space_read_1(lesc->sc_regt, lesc->sc_regh, i);

	sc->sc_copytodesc = lance_copytobuf_contig;
	sc->sc_copyfromdesc = lance_copyfrombuf_contig;
	sc->sc_copytobuf = lance_copytobuf_contig;
	sc->sc_copyfrombuf = lance_copyfrombuf_contig;
	sc->sc_zerobuf = lance_zerobuf_contig;

	sc->sc_rdcsr = le_pci_rdcsr;
	sc->sc_wrcsr = le_pci_wrcsr;
	sc->sc_hwreset = le_pci_hwreset;
	sc->sc_hwinit = NULL;
	sc->sc_hwintr = NULL;
	sc->sc_nocarrier = NULL;

	error = am79900_config(&lesc->sc_am79900, device_get_name(dev),
	    device_get_unit(dev));
	if (error != 0) {
		device_printf(dev, "cannot attach Am79900\n");
		goto fail_dmap;
	}

	error = bus_setup_intr(dev, lesc->sc_ires, INTR_MPSAFE,
	    am79900_intr, sc, &lesc->sc_ih, sc->ifp->if_serializer);
	if (error != 0) {
		device_printf(dev, "cannot set up interrupt\n");
		goto fail_am79900;
	}

	sc->ifp->if_cpuid = rman_get_cpuid(lesc->sc_ires);
	KKASSERT(sc->ifp->if_cpuid >= 0 && sc->ifp->if_cpuid < ncpus);

	return (0);

 fail_am79900:
	am79900_detach(&lesc->sc_am79900);
 fail_dmap:
	bus_dmamap_unload(lesc->sc_dmat, lesc->sc_dmam);
 fail_dmem:
	bus_dmamem_free(lesc->sc_dmat, sc->sc_mem, lesc->sc_dmam);
 fail_dtag:
	bus_dma_tag_destroy(lesc->sc_dmat);
 fail_pdtag:
	bus_dma_tag_destroy(lesc->sc_pdmat);
 fail_ires:
	bus_release_resource(dev, SYS_RES_IRQ, lesc->sc_irid, lesc->sc_ires);
 fail_rres:
	bus_release_resource(dev, SYS_RES_IOPORT, lesc->sc_rrid, lesc->sc_rres);
 fail_mtx:
	return (error);
}
Example #29
0
/*
 * vop_compat_nmknod { struct nchandle *a_nch, 	XXX STOPGAP FUNCTION
 *			struct vnode *a_dvp,
 *			struct vnode **a_vpp,
 *			struct ucred *a_cred,
 *			struct vattr *a_vap }
 *
 * Create a device or fifo node as specified by a_vap.  Compatibility requires
 * us to issue the appropriate VOP_OLD_LOOKUP before we issue VOP_OLD_MKNOD
 * in order to setup the directory inode's i_offset and i_count (e.g. in UFS).
 */
int
vop_compat_nmknod(struct vop_nmknod_args *ap)
{
	struct thread *td = curthread;
	struct componentname cnp;
	struct nchandle *nch;
	struct namecache *ncp;
	struct vnode *dvp;
	int error;

	/*
	 * Sanity checks, get a locked directory vnode.
	 */
	nch = ap->a_nch;		/* locked namecache node */
	ncp = nch->ncp;
	dvp = ap->a_dvp;

	if ((error = vget(dvp, LK_EXCLUSIVE)) != 0) {
		kprintf("[diagnostic] vop_compat_resolve: EAGAIN on ncp %p %s\n",
			ncp, ncp->nc_name);
		return(EAGAIN);
	}

	/*
	 * Setup the cnp for a traditional vop_old_lookup() call.  The lookup
	 * caches all information required to create the entry in the
	 * directory inode.  We expect a return code of EJUSTRETURN for
	 * the CREATE case.  The cnp must simulated a saved-name situation.
	 */
	bzero(&cnp, sizeof(cnp));
	cnp.cn_nameiop = NAMEI_CREATE;
	cnp.cn_flags = CNP_LOCKPARENT;
	cnp.cn_nameptr = ncp->nc_name;
	cnp.cn_namelen = ncp->nc_nlen;
	cnp.cn_cred = ap->a_cred;
	cnp.cn_td = td;
	*ap->a_vpp = NULL;

	error = vop_old_lookup(ap->a_head.a_ops, dvp, ap->a_vpp, &cnp);

	/*
	 * EJUSTRETURN should be returned for this case, which means that
	 * the VFS has setup the directory inode for the create.  The dvp we
	 * passed in is expected to remain in a locked state.
	 *
	 * If the VOP_OLD_MKNOD is successful we are responsible for updating
	 * the cache state of the locked ncp that was passed to us.
	 */
	if (error == EJUSTRETURN) {
		KKASSERT((cnp.cn_flags & CNP_PDIRUNLOCK) == 0);
		error = VOP_OLD_MKNOD(dvp, ap->a_vpp, &cnp, ap->a_vap);
		if (error == 0) {
			cache_setunresolved(nch);
			cache_setvp(nch, *ap->a_vpp);
		}
	} else {
		if (error == 0) {
			vput(*ap->a_vpp);
			*ap->a_vpp = NULL;
			error = EEXIST;
		}
		KKASSERT(*ap->a_vpp == NULL);
	}
	if ((cnp.cn_flags & CNP_PDIRUNLOCK) == 0)
		vn_unlock(dvp);
	vrele(dvp);
	return (error);
}
Example #30
0
/*
 * System startup; initialize the world, create process 0, mount root
 * filesystem, and fork to create init and pagedaemon.  Most of the
 * hard work is done in the lower-level initialization routines including
 * startup(), which does memory initialization and autoconfiguration.
 *
 * This allows simple addition of new kernel subsystems that require
 * boot time initialization.  It also allows substitution of subsystem
 * (for instance, a scheduler, kernel profiler, or VM system) by object
 * module.  Finally, it allows for optional "kernel threads".
 */
void
mi_startup(void)
{
	struct sysinit *sip;		/* system initialization*/
	struct sysinit **sipp;		/* system initialization*/
	struct sysinit **xipp;		/* interior loop of sort*/
	struct sysinit *save;		/* bubble*/

	if (sysinit == NULL) {
		sysinit = SET_BEGIN(sysinit_set);
#if defined(__amd64__) && defined(_KERNEL_VIRTUAL)
		/*
		 * XXX For whatever reason, on 64-bit vkernels
		 * the value of sysinit obtained from the
		 * linker set is wrong.
		 */
		if ((long)sysinit % 8 != 0) {
			kprintf("Fixing sysinit value...\n");
			sysinit = (void *)((long)(intptr_t)sysinit + 4);
		}
#endif
		sysinit_end = SET_LIMIT(sysinit_set);
	}
#if defined(__amd64__) && defined(_KERNEL_VIRTUAL)
	KKASSERT((long)sysinit % 8 == 0);
#endif

restart:
	/*
	 * Perform a bubble sort of the system initialization objects by
	 * their subsystem (primary key) and order (secondary key).
	 */
	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
			if ((*sipp)->subsystem < (*xipp)->subsystem ||
			     ((*sipp)->subsystem == (*xipp)->subsystem &&
			      (*sipp)->order <= (*xipp)->order))
				continue;	/* skip*/
			save = *sipp;
			*sipp = *xipp;
			*xipp = save;
		}
	}

	/*
	 * Traverse the (now) ordered list of system initialization tasks.
	 * Perform each task, and continue on to the next task.
	 *
	 * The last item on the list is expected to be the scheduler,
	 * which will not return.
	 */
	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
		sip = *sipp;
		if (sip->subsystem == SI_SPECIAL_DUMMY)
			continue;	/* skip dummy task(s)*/

		if (sip->subsystem == SI_SPECIAL_DONE)
			continue;

		/* Call function */
		(*(sip->func))(sip->udata);

		/* Check off the one we're just done */
		sip->subsystem = SI_SPECIAL_DONE;

		/* Check if we've installed more sysinit items via KLD */
		if (newsysinit != NULL) {
			if (sysinit != SET_BEGIN(sysinit_set))
				kfree(sysinit, M_TEMP);
			sysinit = newsysinit;
			sysinit_end = newsysinit_end;
			newsysinit = NULL;
			newsysinit_end = NULL;
			goto restart;
		}
	}

	panic("Shouldn't get here!");
	/* NOTREACHED*/
}