Beispiel #1
0
static void
nvd_strategy(struct bio *bp)
{
	struct nvd_disk *ndisk;

	ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;

	if (__predict_false(bp->bio_flags & BIO_ORDERED))
		atomic_add_int(&ndisk->ordered_in_flight, 1);

	if (__predict_true(ndisk->ordered_in_flight == 0)) {
		nvd_bio_submit(ndisk, bp);
		return;
	}

	/*
	 * There are ordered bios in flight, so we need to submit
	 *  bios through the task queue to enforce ordering.
	 */
	mtx_lock(&ndisk->bioqlock);
	bioq_insert_tail(&ndisk->bioq, bp);
	mtx_unlock(&ndisk->bioqlock);
	taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
}
Beispiel #2
0
static void
test_callout(void *arg)
{
	struct callout_run *rn;
	int cpu;
	
	critical_enter();
	cpu = curcpu;
	critical_exit();
	rn = (struct callout_run *)arg;
	atomic_add_int(&rn->callout_waiting, 1);
	mtx_lock(&rn->lock);
	if (callout_pending(&rn->co_array[cpu]) ||
	    !callout_active(&rn->co_array[cpu])) {
		rn->co_return_npa++;
		atomic_subtract_int(&rn->callout_waiting, 1);
		mtx_unlock(&rn->lock);
		return;
	}
	callout_deactivate(&rn->co_array[cpu]);
	rn->co_completed++;
	mtx_unlock(&rn->lock);	
	atomic_subtract_int(&rn->callout_waiting, 1);
}
Beispiel #3
0
int
main(void)
{
    struct timespec ts, ts2;
    int error;
    long long count = 0;
    long long max;
    int j;
    int cpuno;
    int ncpu;
    int *done;
    size_t ncpu_size;

    done = mmap(NULL, 4096, PROT_READ|PROT_WRITE,
                MAP_SHARED|MAP_ANON, -1, 0);

    /*
     * How many cpu threads are there?
     */
    ncpu = 0;
    ncpu_size = sizeof(ncpu);
    if (sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0) < 0) {
        perror("sysctl hw.ncpu");
        exit(1);
    }
    printf("timing standard getuid() syscall, %d threads\n", ncpu);
    printf("if using powerd, run several times\n");
    *done = 0;

    /*
     * Approximate timing run length
     */
    start_timing();
    while (stop_timing(0, NULL) == 0) {
        for (j = 0; j < 100; ++j)
            getuid();
        count += 100;
    }
    max = count;

    /*
     * Run same length on all threads.
     */
    for (cpuno = 0; cpuno < ncpu; ++cpuno) {
        if (fork() == 0) {
            /*
             * Give scheduler time to move threads around
             */
            start_timing();
            while (stop_timing(0, NULL) == 0) {
                for (j = 0; j < 100; ++j)
                    getuid();
            }

            /*
             * Actual timing test is here.
             */
            start_timing();
            for (count = 0; count < max; count += 100) {
                for (j = 0; j < 100; ++j)
                    getuid();
            }
            stop_timing(count, "getuid() sysmsg");

            /*
             * Don't unbusy the cpu until the other threads are
             * done.
             */
            atomic_add_int(done, 1);
            while (*done < ncpu)	/* wait for other threads */
                getuid();
            exit(0);
        }
    }
    while (wait3(NULL, 0, NULL) > 0 || errno == EINTR)
        ;
    return 0;
}
Beispiel #4
0
/*
 * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
 * (if specified), rendezvous, execute the action function (if specified),
 * rendezvous again, execute the teardown function (if specified), and then
 * resume.
 *
 * Note that the supplied external functions _must_ be reentrant and aware
 * that they are running in parallel and in an unknown lock context.
 */
void
smp_rendezvous_action(void)
{
	struct thread *td;
	void *local_func_arg;
	void (*local_setup_func)(void*);
	void (*local_action_func)(void*);
	void (*local_teardown_func)(void*);
#ifdef INVARIANTS
	int owepreempt;
#endif

	/* Ensure we have up-to-date values. */
	atomic_add_acq_int(&smp_rv_waiters[0], 1);
	while (smp_rv_waiters[0] < smp_rv_ncpus)
		cpu_spinwait();

	/* Fetch rendezvous parameters after acquire barrier. */
	local_func_arg = smp_rv_func_arg;
	local_setup_func = smp_rv_setup_func;
	local_action_func = smp_rv_action_func;
	local_teardown_func = smp_rv_teardown_func;

	/*
	 * Use a nested critical section to prevent any preemptions
	 * from occurring during a rendezvous action routine.
	 * Specifically, if a rendezvous handler is invoked via an IPI
	 * and the interrupted thread was in the critical_exit()
	 * function after setting td_critnest to 0 but before
	 * performing a deferred preemption, this routine can be
	 * invoked with td_critnest set to 0 and td_owepreempt true.
	 * In that case, a critical_exit() during the rendezvous
	 * action would trigger a preemption which is not permitted in
	 * a rendezvous action.  To fix this, wrap all of the
	 * rendezvous action handlers in a critical section.  We
	 * cannot use a regular critical section however as having
	 * critical_exit() preempt from this routine would also be
	 * problematic (the preemption must not occur before the IPI
	 * has been acknowledged via an EOI).  Instead, we
	 * intentionally ignore td_owepreempt when leaving the
	 * critical section.  This should be harmless because we do
	 * not permit rendezvous action routines to schedule threads,
	 * and thus td_owepreempt should never transition from 0 to 1
	 * during this routine.
	 */
	td = curthread;
	td->td_critnest++;
#ifdef INVARIANTS
	owepreempt = td->td_owepreempt;
#endif
	
	/*
	 * If requested, run a setup function before the main action
	 * function.  Ensure all CPUs have completed the setup
	 * function before moving on to the action function.
	 */
	if (local_setup_func != smp_no_rendevous_barrier) {
		if (smp_rv_setup_func != NULL)
			smp_rv_setup_func(smp_rv_func_arg);
		atomic_add_int(&smp_rv_waiters[1], 1);
		while (smp_rv_waiters[1] < smp_rv_ncpus)
                	cpu_spinwait();
	}

	if (local_action_func != NULL)
		local_action_func(local_func_arg);

	if (local_teardown_func != smp_no_rendevous_barrier) {
		/*
		 * Signal that the main action has been completed.  If a
		 * full exit rendezvous is requested, then all CPUs will
		 * wait here until all CPUs have finished the main action.
		 */
		atomic_add_int(&smp_rv_waiters[2], 1);
		while (smp_rv_waiters[2] < smp_rv_ncpus)
			cpu_spinwait();

		if (local_teardown_func != NULL)
			local_teardown_func(local_func_arg);
	}

	/*
	 * Signal that the rendezvous is fully completed by this CPU.
	 * This means that no member of smp_rv_* pseudo-structure will be
	 * accessed by this target CPU after this point; in particular,
	 * memory pointed by smp_rv_func_arg.
	 */
	atomic_add_int(&smp_rv_waiters[3], 1);

	td->td_critnest--;
	KASSERT(owepreempt == td->td_owepreempt,
	    ("rendezvous action changed td_owepreempt"));
}
void
interrupt(unsigned long a0, unsigned long a1, unsigned long a2,
    struct trapframe *framep)
{
	struct cpu_info *ci = curcpu();
	extern int schedhz;

	switch (a0) {
	case ALPHA_INTR_XPROC:	/* interprocessor interrupt */
#if defined(MULTIPROCESSOR)
		atomic_add_ulong(&ci->ci_intrdepth, 1);

		alpha_ipi_process(ci, framep);

		/*
		 * Handle inter-console messages if we're the primary
		 * CPU.
		 */
		if (ci->ci_cpuid == hwrpb->rpb_primary_cpu_id &&
		    hwrpb->rpb_txrdy != 0)
			cpu_iccb_receive();

		atomic_sub_ulong(&ci->ci_intrdepth, 1);
#else
		printf("WARNING: received interprocessor interrupt!\n");
#endif /* MULTIPROCESSOR */
		break;
		
	case ALPHA_INTR_CLOCK:	/* clock interrupt */
		atomic_add_int(&uvmexp.intrs, 1);
		if (CPU_IS_PRIMARY(ci))
			clk_count.ec_count++;
		if (platform.clockintr) {
			/*
			 * Call hardclock().  This will also call
			 * statclock(). On the primary CPU, it
			 * will also deal with time-of-day stuff.
			 */
			(*platform.clockintr)((struct clockframe *)framep);

			/*
			 * If it's time to call the scheduler clock,
			 * do so.
			 */
			if ((++ci->ci_schedstate.spc_schedticks & 0x3f) == 0 &&
			    schedhz != 0)
				schedclock(ci->ci_curproc);
		}
		break;

	case ALPHA_INTR_ERROR:	/* Machine Check or Correctable Error */
		atomic_add_ulong(&ci->ci_intrdepth, 1);
		a0 = alpha_pal_rdmces();
		if (platform.mcheck_handler)
			(*platform.mcheck_handler)(a0, framep, a1, a2);
		else
			machine_check(a0, framep, a1, a2);
		atomic_sub_ulong(&ci->ci_intrdepth, 1);
		break;

	case ALPHA_INTR_DEVICE:	/* I/O device interrupt */
	    {
		struct scbvec *scb;

		KDASSERT(a1 >= SCB_IOVECBASE && a1 < SCB_SIZE);

		atomic_add_ulong(&ci->ci_intrdepth, 1);
		atomic_add_int(&uvmexp.intrs, 1);
		scb = &scb_iovectab[SCB_VECTOIDX(a1 - SCB_IOVECBASE)];
		(*scb->scb_func)(scb->scb_arg, a1);
		atomic_sub_ulong(&ci->ci_intrdepth, 1);
		break;
	    }

	case ALPHA_INTR_PERF:	/* performance counter interrupt */
		printf("WARNING: received performance counter interrupt!\n");
		break;

	case ALPHA_INTR_PASSIVE:
#if 0
		printf("WARNING: received passive release interrupt vec "
		    "0x%lx\n", a1);
#endif
		break;

	default:
		printf("unexpected interrupt: type 0x%lx vec 0x%lx "
		    "a2 0x%lx"
#if defined(MULTIPROCESSOR)
		    " cpu %lu"
#endif
		    "\n", a0, a1, a2
#if defined(MULTIPROCESSOR)
		    , ci->ci_cpuid
#endif
		    );
		panic("interrupt");
		/* NOTREACHED */
	}
}
Beispiel #6
0
/*
 *	malloc:
 *
 *	Allocate a block of memory.
 *
 *	If M_NOWAIT is set, this routine will not block and return NULL if
 *	the allocation fails.
 */
void *
malloc(unsigned long size, struct malloc_type *mtp, int flags)
{
	int indx;
	struct malloc_type_internal *mtip;
	caddr_t va;
	uma_zone_t zone;
#if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE)
	unsigned long osize = size;
#endif

#ifdef INVARIANTS
	KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic"));
	/*
	 * Check that exactly one of M_WAITOK or M_NOWAIT is specified.
	 */
	indx = flags & (M_WAITOK | M_NOWAIT);
	if (indx != M_NOWAIT && indx != M_WAITOK) {
		static	struct timeval lasterr;
		static	int curerr, once;
		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
			printf("Bad malloc flags: %x\n", indx);
			kdb_backtrace();
			flags |= M_WAITOK;
			once++;
		}
	}
#endif
#ifdef MALLOC_MAKE_FAILURES
	if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
		atomic_add_int(&malloc_nowait_count, 1);
		if ((malloc_nowait_count % malloc_failure_rate) == 0) {
			atomic_add_int(&malloc_failure_count, 1);
			t_malloc_fail = time_uptime;
			return (NULL);
		}
	}
#endif
	if (flags & M_WAITOK)
		KASSERT(curthread->td_intr_nesting_level == 0,
		   ("malloc(M_WAITOK) in interrupt context"));

#ifdef DEBUG_MEMGUARD
	if (memguard_cmp_mtp(mtp, size)) {
		va = memguard_alloc(size, flags);
		if (va != NULL)
			return (va);
		/* This is unfortunate but should not be fatal. */
	}
#endif

#ifdef DEBUG_REDZONE
	size = redzone_size_ntor(size);
#endif

	if (size <= kmem_zmax) {
		mtip = mtp->ks_handle;
		if (size & KMEM_ZMASK)
			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
		indx = kmemsize[size >> KMEM_ZSHIFT];
		KASSERT(mtip->mti_zone < numzones,
		    ("mti_zone %u out of range %d",
		    mtip->mti_zone, numzones));
		zone = kmemzones[indx].kz_zone[mtip->mti_zone];
#ifdef MALLOC_PROFILE
		krequests[size >> KMEM_ZSHIFT]++;
#endif
		va = uma_zalloc(zone, flags);
		if (va != NULL)
			size = zone->uz_size;
		malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
	} else {
Beispiel #7
0
struct socket *
sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
{
	struct socket *newso;
	struct sctp_inpcb *inp, *n_inp;
	struct sctp_tcb *stcb;

	SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
	inp = (struct sctp_inpcb *)head->so_pcb;
	if (inp == NULL) {
		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
		*error = EFAULT;
		return (NULL);
	}
	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
	if (stcb == NULL) {
		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
		*error = ENOTCONN;
		return (NULL);
	}
	atomic_add_int(&stcb->asoc.refcnt, 1);
	SCTP_TCB_UNLOCK(stcb);
	newso = sonewconn(head, SS_ISCONNECTED
	    );
	if (newso == NULL) {
		SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
		SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
		*error = ENOMEM;
		atomic_subtract_int(&stcb->asoc.refcnt, 1);
		return (NULL);

	}
	SCTP_TCB_LOCK(stcb);
	atomic_subtract_int(&stcb->asoc.refcnt, 1);
	n_inp = (struct sctp_inpcb *)newso->so_pcb;
	SOCK_LOCK(head);
	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
	    SCTP_PCB_FLAGS_CONNECTED |
	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
	n_inp->sctp_features = inp->sctp_features;
	n_inp->sctp_frag_point = inp->sctp_frag_point;
	n_inp->partial_delivery_point = inp->partial_delivery_point;
	n_inp->sctp_context = inp->sctp_context;
	n_inp->inp_starting_point_for_iterator = NULL;

	/* copy in the authentication parameters from the original endpoint */
	if (n_inp->sctp_ep.local_hmacs)
		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
	n_inp->sctp_ep.local_hmacs =
	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
	if (n_inp->sctp_ep.local_auth_chunks)
		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
	n_inp->sctp_ep.local_auth_chunks =
	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
	    &n_inp->sctp_ep.shared_keys);

	n_inp->sctp_socket = newso;
	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
		sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
		n_inp->sctp_ep.auto_close_time = 0;
		sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
		    SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
	}
	/* Turn off any non-blocking semantic. */
	SCTP_CLEAR_SO_NBIO(newso);
	newso->so_state |= SS_ISCONNECTED;
	/* We remove it right away */

#ifdef SCTP_LOCK_LOGGING
	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
	}
#endif
	TAILQ_REMOVE(&head->so_comp, newso, so_list);
	head->so_qlen--;
	SOCK_UNLOCK(head);
	/*
	 * Now we must move it from one hash table to another and get the
	 * stcb in the right place.
	 */
	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
	atomic_add_int(&stcb->asoc.refcnt, 1);
	SCTP_TCB_UNLOCK(stcb);
	/*
	 * And now the final hack. We move data in the pending side i.e.
	 * head to the new socket buffer. Let the GRUBBING begin :-0
	 */
	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
	atomic_subtract_int(&stcb->asoc.refcnt, 1);
	return (newso);
}
Beispiel #8
0
/*
 * Handle an exception.
 * In the case of a kernel trap, we return the pc where to resume if
 * pcb_onfault is set, otherwise, return old pc.
 */
void
trap(struct trap_frame *trapframe)
{
	struct cpu_info *ci = curcpu();
	struct proc *p = ci->ci_curproc;
	int type;

	type = (trapframe->cause & CR_EXC_CODE) >> CR_EXC_CODE_SHIFT;

#if defined(CPU_R8000) && !defined(DEBUG_INTERRUPT)
	if (type != T_INT)
#endif
		trapdebug_enter(ci, trapframe, -1);

#ifdef CPU_R8000
	if (type != T_INT && type != T_SYSCALL)
#else
	if (type != T_SYSCALL)
#endif
		atomic_add_int(&uvmexp.traps, 1);
	if (USERMODE(trapframe->sr)) {
		type |= T_USER;
		refreshcreds(p);
	}

	/*
	 * Enable hardware interrupts if they were on before the trap;
	 * enable IPI interrupts only otherwise.
	 */
	switch (type) {
#ifdef CPU_R8000
	case T_INT:
	case T_INT | T_USER:
#endif
	case T_BREAK:
		break;
	default:
		if (ISSET(trapframe->sr, SR_INT_ENAB))
			enableintr();
		else {
#ifdef MULTIPROCESSOR
			ENABLEIPI();
#endif
		}
		break;
	}

#ifdef CPU_R8000
	/*
	 * Some exception causes on R8000 are actually detected by external
	 * circuitry, and as such are reported as external interrupts.
	 * On R8000 kernels, external interrupts vector to trap() instead of
	 * interrupt(), so that we can process these particular exceptions
	 * as if they were triggered as regular exceptions.
	 */
	if ((type & ~T_USER) == T_INT) {
		/*
		 * Similar reality check as done in interrupt(), in case
		 * an interrupt occured between a write to COP_0_STATUS_REG
		 * and it taking effect.
		 */
		if (!ISSET(trapframe->sr, SR_INT_ENAB))
			return;

		if (trapframe->cause & CR_VCE) {
#ifndef DEBUG_INTERRUPT
			trapdebug_enter(ci, trapframe, -1);
#endif
			panic("VCE or TLBX");
		}
		if (trapframe->cause & CR_FPE) {
#ifndef DEBUG_INTERRUPT
			trapdebug_enter(ci, trapframe, -1);
#endif
			itsa(trapframe, ci, p, T_FPE | (type & T_USER));
			cp0_reset_cause(CR_FPE);
		}
		if (trapframe->cause & CR_INT_MASK)
			interrupt(trapframe);

		return;	/* no userret */
	} else
#endif
		itsa(trapframe, ci, p, type);

	if (type & T_USER)
		userret(p);
}
Beispiel #9
0
/*
 * Attempt to build up a hash table for the directory contents in
 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
 */
int
ufsdirhash_build(struct inode *ip)
{
	struct dirhash *dh;
	struct buf *bp = NULL;
	struct direct *ep;
	struct vnode *vp;
	doff_t bmask, pos;
	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
	int dirblksiz = ip->i_ump->um_dirblksiz;

	/* Check if we can/should use dirhash. */
	if (ip->i_dirhash == NULL) {
		if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip))
			return (-1);
	} else {
		/* Hash exists, but sysctls could have changed. */
		if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
		    ufs_dirhashmem > ufs_dirhashmaxmem) {
			ufsdirhash_free(ip);
			return (-1);
		}
		/* Check if hash exists and is intact (note: unlocked read). */
		if (ip->i_dirhash->dh_hash != NULL)
			return (0);
		/* Free the old, recycled hash and build a new one. */
		ufsdirhash_free(ip);
	}

	/* Don't hash removed directories. */
	if (ip->i_nlink == 0)
		return (-1);

	vp = ip->i_vnode;
	/* Allocate 50% more entries than this dir size could ever need. */
	KASSERT(ip->i_size >= dirblksiz);
	nslots = ip->i_size / UFS_DIRECTSIZ(1);
	nslots = (nslots * 3 + 1) / 2;
	narrays = howmany(nslots, DH_NBLKOFF);
	nslots = narrays * DH_NBLKOFF;
	dirblocks = howmany(ip->i_size, dirblksiz);
	nblocks = (dirblocks * 3 + 1) / 2;

	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
	    nblocks * sizeof(*dh->dh_blkfree);

	while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) >
	    ufs_dirhashmaxmem) {
		atomic_add_int(&ufs_dirhashmem, -memreqd);
		if (memreqd > ufs_dirhashmaxmem / 2)
			return (-1);
		/* Try to free some space. */
		if (ufsdirhash_recycle(memreqd) != 0)
			return (-1);
	        else
		    	DIRHASHLIST_UNLOCK();
	}

	/*
	 * Use non-blocking mallocs so that we will revert to a linear
	 * lookup on failure rather than potentially blocking forever.
	 */
	dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT);
	if (dh == NULL) {
		atomic_add_int(&ufs_dirhashmem, -memreqd);
		return (-1);
	}
	memset(dh, 0, sizeof(*dh));
	mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE);
	DIRHASH_LOCK(dh);
	dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]);
	dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP);
	dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]);
	dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP);
	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
		goto fail;
	for (i = 0; i < narrays; i++) {
		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL)
			goto fail;
		for (j = 0; j < DH_NBLKOFF; j++)
			dh->dh_hash[i][j] = DIRHASH_EMPTY;
	}

	/* Initialise the hash table and block statistics. */
	dh->dh_narrays = narrays;
	dh->dh_hlen = nslots;
	dh->dh_nblk = nblocks;
	dh->dh_dirblks = dirblocks;
	for (i = 0; i < dirblocks; i++)
		dh->dh_blkfree[i] = dirblksiz / DIRALIGN;
	for (i = 0; i < DH_NFSTATS; i++)
		dh->dh_firstfree[i] = -1;
	dh->dh_firstfree[DH_NFSTATS] = 0;
	dh->dh_seqopt = 0;
	dh->dh_seqoff = 0;
	dh->dh_score = DH_SCOREINIT;
	ip->i_dirhash = dh;

	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
	pos = 0;
	while (pos < ip->i_size) {
		if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
		    != 0) {
			preempt();
		}
		/* If necessary, get the next directory block. */
		if ((pos & bmask) == 0) {
			if (bp != NULL)
				brelse(bp, 0);
			if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0)
				goto fail;
		}

		/* Add this entry to the hash. */
		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
		if (ep->d_reclen == 0 || ep->d_reclen >
		    dirblksiz - (pos & (dirblksiz - 1))) {
			/* Corrupted directory. */
			brelse(bp, 0);
			goto fail;
		}
		if (ep->d_ino != 0) {
			/* Add the entry (simplified ufsdirhash_add). */
			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
				slot = WRAPINCR(slot, dh->dh_hlen);
			dh->dh_hused++;
			DH_ENTRY(dh, slot) = pos;
			ufsdirhash_adjfree(dh, pos, -UFS_DIRSIZ(0, ep, needswap),
			    dirblksiz);
		}
		pos += ep->d_reclen;
	}

	if (bp != NULL)
		brelse(bp, 0);
	DIRHASHLIST_LOCK();
	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
	dh->dh_onlist = 1;
	DIRHASH_UNLOCK(dh);
	DIRHASHLIST_UNLOCK();
	return (0);

fail:
	DIRHASH_UNLOCK(dh);
	if (dh->dh_hash != NULL) {
		for (i = 0; i < narrays; i++)
			if (dh->dh_hash[i] != NULL)
				DIRHASH_BLKFREE(dh->dh_hash[i]);
		kmem_free(dh->dh_hash, dh->dh_hashsz);
	}
	if (dh->dh_blkfree != NULL)
		kmem_free(dh->dh_blkfree, dh->dh_blkfreesz);
	mutex_destroy(&dh->dh_lock);
	pool_cache_put(ufsdirhash_cache, dh);
	ip->i_dirhash = NULL;
	atomic_add_int(&ufs_dirhashmem, -memreqd);
	return (-1);
}
Beispiel #10
0
void
interrupt(u_int64_t vector, struct trapframe *framep)
{
	struct thread *td;
	volatile struct ia64_interrupt_block *ib = IA64_INTERRUPT_BLOCK;

	td = curthread;
	atomic_add_int(&td->td_intr_nesting_level, 1);

	/*
	 * Handle ExtINT interrupts by generating an INTA cycle to
	 * read the vector.
	 */
	if (vector == 0) {
		vector = ib->ib_inta;
		printf("ExtINT interrupt: vector=%ld\n", vector);
	}

	if (vector == 255) {/* clock interrupt */
		/* CTR0(KTR_INTR, "clock interrupt"); */
			
		cnt.v_intr++;
#ifdef EVCNT_COUNTERS
		clock_intr_evcnt.ev_count++;
#else
		intrcnt[INTRCNT_CLOCK]++;
#endif
		critical_enter();
#ifdef SMP
		clks[PCPU_GET(cpuid)]++;
		/* Only the BSP runs the real clock */
		if (PCPU_GET(cpuid) == 0) {
#endif
			handleclock(framep);
			/* divide hz (1024) by 8 to get stathz (128) */
			if ((++schedclk2 & 0x7) == 0)
				statclock((struct clockframe *)framep);
#ifdef SMP
		} else {
			ia64_set_itm(ia64_get_itc() + itm_reload);
			mtx_lock_spin(&sched_lock);
			hardclock_process(curthread, TRAPF_USERMODE(framep));
			if ((schedclk2 & 0x7) == 0)
				statclock_process(curkse, TRAPF_PC(framep),
				    TRAPF_USERMODE(framep));
			mtx_unlock_spin(&sched_lock);
		}
#endif
		critical_exit();
#ifdef SMP
	} else if (vector == ipi_vector[IPI_AST]) {
		asts[PCPU_GET(cpuid)]++;
		CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
	} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
		rdvs[PCPU_GET(cpuid)]++;
		CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));
		smp_rendezvous_action();
	} else if (vector == ipi_vector[IPI_STOP]) {
		u_int32_t mybit = PCPU_GET(cpumask);

		CTR1(KTR_SMP, "IPI_STOP, cpuid=%d", PCPU_GET(cpuid));
		savectx(PCPU_GET(pcb));
		stopped_cpus |= mybit;
		while ((started_cpus & mybit) == 0)
			/* spin */;
		started_cpus &= ~mybit;
		stopped_cpus &= ~mybit;
		if (PCPU_GET(cpuid) == 0 && cpustop_restartfunc != NULL) {
			void (*f)(void) = cpustop_restartfunc;
			cpustop_restartfunc = NULL;
			(*f)();
		}
	} else if (vector == ipi_vector[IPI_TEST]) {
		CTR1(KTR_SMP, "IPI_TEST, cpuid=%d", PCPU_GET(cpuid));
		mp_ipi_test++;
#endif
	} else {
		ints[PCPU_GET(cpuid)]++;
		ia64_dispatch_intr(framep, vector);
	}

	atomic_subtract_int(&td->td_intr_nesting_level, 1);
}
Beispiel #11
0
/*
 * Try to reuse a vnode from the free list.  This function is somewhat
 * advisory in that NULL can be returned as a normal case, even if free
 * vnodes are present.
 *
 * The scan is limited because it can result in excessive CPU use during
 * periods of extreme vnode use.
 *
 * NOTE: The returned vnode is not completely initialized.
 */
static
struct vnode *
cleanfreevnode(int maxcount)
{
	struct vnode *vp;
	int count;
	int trigger = (long)vmstats.v_page_count / (activevnodes * 2 + 1);

	/*
	 * Try to deactivate some vnodes cached on the active list.
	 */
	if (countcachedvnodes(0) < inactivevnodes)
		goto skip;

	for (count = 0; count < maxcount * 2; count++) {
		spin_lock(&vfs_spin);

		vp = TAILQ_NEXT(&vnode_active_rover, v_list);
		TAILQ_REMOVE(&vnode_active_list, &vnode_active_rover, v_list);
		if (vp == NULL) {
			TAILQ_INSERT_HEAD(&vnode_active_list,
					  &vnode_active_rover, v_list);
		} else {
			TAILQ_INSERT_AFTER(&vnode_active_list, vp,
					   &vnode_active_rover, v_list);
		}
		if (vp == NULL) {
			spin_unlock(&vfs_spin);
			continue;
		}
		if ((vp->v_refcnt & VREF_MASK) != 0) {
			spin_unlock(&vfs_spin);
			vp->v_act += VACT_INC;
			if (vp->v_act > VACT_MAX)	/* SMP race ok */
				vp->v_act = VACT_MAX;
			continue;
		}

		/*
		 * decrement by less if the vnode's object has a lot of
		 * VM pages.  XXX possible SMP races.
		 */
		if (vp->v_act > 0) {
			vm_object_t obj;
			if ((obj = vp->v_object) != NULL &&
			    obj->resident_page_count >= trigger) {
				vp->v_act -= 1;
			} else {
				vp->v_act -= VACT_INC;
			}
			if (vp->v_act < 0)
				vp->v_act = 0;
			spin_unlock(&vfs_spin);
			continue;
		}

		/*
		 * Try to deactivate the vnode.
		 */
		if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
			atomic_add_int(&mycpu->gd_cachedvnodes, -1);
		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);

		spin_unlock(&vfs_spin);
		vrele(vp);
	}

skip:
	/*
	 * Loop trying to lock the first vnode on the free list.
	 * Cycle if we can't.
	 */
	for (count = 0; count < maxcount; count++) {
		spin_lock(&vfs_spin);

		vp = TAILQ_FIRST(&vnode_inactive_list);
		if (vp == NULL) {
			spin_unlock(&vfs_spin);
			break;
		}

		/*
		 * non-blocking vx_get will also ref the vnode on success.
		 */
		if (vx_get_nonblock(vp)) {
			KKASSERT(vp->v_state == VS_INACTIVE);
			TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
			TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
			spin_unlock(&vfs_spin);
			continue;
		}

		/*
		 * Because we are holding vfs_spin the vnode should currently
		 * be inactive and VREF_TERMINATE should still be set.
		 *
		 * Once vfs_spin is released the vnode's state should remain
		 * unmodified due to both the lock and ref on it.
		 */
		KKASSERT(vp->v_state == VS_INACTIVE);
		spin_unlock(&vfs_spin);
#ifdef TRACKVNODE
		if ((u_long)vp == trackvnode)
			kprintf("cleanfreevnode %p %08x\n", vp, vp->v_flag);
#endif

		/*
		 * Do not reclaim/reuse a vnode while auxillary refs exists.
		 * This includes namecache refs due to a related ncp being
		 * locked or having children, a VM object association, or
		 * other hold users.
		 *
		 * Do not reclaim/reuse a vnode if someone else has a real
		 * ref on it.  This can occur if a filesystem temporarily
		 * releases the vnode lock during VOP_RECLAIM.
		 */
		if (vp->v_auxrefs ||
		    (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
failed:
			if (vp->v_state == VS_INACTIVE) {
				spin_lock(&vfs_spin);
				if (vp->v_state == VS_INACTIVE) {
					TAILQ_REMOVE(&vnode_inactive_list,
						     vp, v_list);
					TAILQ_INSERT_TAIL(&vnode_inactive_list,
							  vp, v_list);
				}
				spin_unlock(&vfs_spin);
			}
			vx_put(vp);
			continue;
		}

		/*
		 * VINACTIVE and VREF_TERMINATE are expected to both be set
		 * for vnodes pulled from the inactive list, and cannot be
		 * changed while we hold the vx lock.
		 *
		 * Try to reclaim the vnode.
		 */
		KKASSERT(vp->v_flag & VINACTIVE);
		KKASSERT(vp->v_refcnt & VREF_TERMINATE);

		if ((vp->v_flag & VRECLAIMED) == 0) {
			if (cache_inval_vp_nonblock(vp))
				goto failed;
			vgone_vxlocked(vp);
			/* vnode is still VX locked */
		}

		/*
		 * At this point if there are no other refs or auxrefs on
		 * the vnode with the inactive list locked, and we remove
		 * the vnode from the inactive list, it should not be
		 * possible for anyone else to access the vnode any more.
		 *
		 * Since the vnode is in a VRECLAIMED state, no new
		 * namecache associations could have been made and the
		 * vnode should have already been removed from its mountlist.
		 *
		 * Since we hold a VX lock on the vnode it cannot have been
		 * reactivated (moved out of the inactive list).
		 */
		KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
		spin_lock(&vfs_spin);
		if (vp->v_auxrefs ||
		    (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
			spin_unlock(&vfs_spin);
			goto failed;
		}
		KKASSERT(vp->v_state == VS_INACTIVE);
		TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
		--inactivevnodes;
		vp->v_state = VS_DYING;
		spin_unlock(&vfs_spin);

		/*
		 * Nothing should have been able to access this vp.  Only
		 * our ref should remain now.
		 */
		atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE);
		KASSERT(vp->v_refcnt == 1,
			("vp %p badrefs %08x", vp, vp->v_refcnt));

		/*
		 * Return a VX locked vnode suitable for reuse.
		 */
		return(vp);
	}
	return(NULL);
}
Beispiel #12
0
/****************************************************************
 *			VNODE ACQUISITION FUNCTIONS		*
 ****************************************************************
 *
 * These functions must be used when accessing a vnode that has no
 * chance of being destroyed in a SMP race.  That means the caller will
 * usually either hold an auxiliary reference (such as the namecache)
 * or hold some other lock that ensures that the vnode cannot be destroyed.
 *
 * These functions are MANDATORY for any code chain accessing a vnode
 * whos activation state is not known.
 *
 * vget() can be called with LK_NOWAIT and will return EBUSY if the
 * lock cannot be immediately acquired.
 *
 * vget()/vput() are used when reactivation is desired.
 *
 * vx_get() and vx_put() are used when reactivation is not desired.
 */
int
vget(struct vnode *vp, int flags)
{
	int error;

	/*
	 * A lock type must be passed
	 */
	if ((flags & LK_TYPE_MASK) == 0) {
		panic("vget() called with no lock specified!");
		/* NOT REACHED */
	}

	/*
	 * Reference the structure and then acquire the lock.
	 *
	 * NOTE: The requested lock might be a shared lock and does
	 *	 not protect our access to the refcnt or other fields.
	 */
	if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
		atomic_add_int(&mycpu->gd_cachedvnodes, -1);

	if ((error = vn_lock(vp, flags | LK_FAILRECLAIM)) != 0) {
		/*
		 * The lock failed, undo and return an error.  This will not
		 * normally trigger a termination.
		 */
		vrele(vp);
	} else if (vp->v_flag & VRECLAIMED) {
		/*
		 * The node is being reclaimed and cannot be reactivated
		 * any more, undo and return ENOENT.
		 */
		vn_unlock(vp);
		vrele(vp);
		error = ENOENT;
	} else if (vp->v_state == VS_ACTIVE) {
		/*
		 * A VS_ACTIVE vnode coupled with the fact that we have
		 * a vnode lock (even if shared) prevents v_state from
		 * changing.  Since the vnode is not in a VRECLAIMED state,
		 * we can safely clear VINACTIVE.
		 *
		 * NOTE! Multiple threads may clear VINACTIVE if this is
		 *	 shared lock.  This race is allowed.
		 */
		_vclrflags(vp, VINACTIVE);	/* SMP race ok */
		vp->v_act += VACT_INC;
		if (vp->v_act > VACT_MAX)	/* SMP race ok */
			vp->v_act = VACT_MAX;
		error = 0;
	} else {
		/*
		 * If the vnode is not VS_ACTIVE it must be reactivated
		 * in addition to clearing VINACTIVE.  An exclusive spin_lock
		 * is needed to manipulate the vnode's list.
		 *
		 * Because the lockmgr lock might be shared, we might race
		 * another reactivation, which we handle.  In this situation,
		 * however, the refcnt prevents other v_state races.
		 *
		 * As with above, clearing VINACTIVE is allowed to race other
		 * clearings of VINACTIVE.
		 *
		 * VREF_TERMINATE and VREF_FINALIZE can only be cleared when
		 * the refcnt is non-zero and the vnode has not been
		 * reclaimed.  This also means that the transitions do
		 * not affect cachedvnodes.
		 */
		_vclrflags(vp, VINACTIVE);
		vp->v_act += VACT_INC;
		if (vp->v_act > VACT_MAX)	/* SMP race ok */
			vp->v_act = VACT_MAX;
		spin_lock(&vp->v_spin);

		switch(vp->v_state) {
		case VS_INACTIVE:
			_vactivate(vp);
			atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
							VREF_FINALIZE);
			spin_unlock(&vp->v_spin);
			break;
		case VS_CACHED:
			_vactivate(vp);
			atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
							VREF_FINALIZE);
			spin_unlock(&vp->v_spin);
			break;
		case VS_ACTIVE:
			atomic_clear_int(&vp->v_refcnt, VREF_FINALIZE);
			spin_unlock(&vp->v_spin);
			break;
		case VS_DYING:
			spin_unlock(&vp->v_spin);
			panic("Impossible VS_DYING state");
			break;
		}
		error = 0;
	}
	return(error);
}
Beispiel #13
0
/*
 * Remove an auxiliary reference from the vnode.
 */
void
vdrop(struct vnode *vp)
{
	atomic_add_int(&vp->v_auxrefs, -1);
}
Beispiel #14
0
/*
 * Add an auxiliary data structure reference to the vnode.  Auxiliary
 * references do not change the state of the vnode or prevent deactivation
 * or reclamation of the vnode, but will prevent the vnode from being
 * destroyed (kfree()'d).
 *
 * WARNING!  vhold() must not acquire v_spin.  The spinlock may or may not
 *	     already be held by the caller.  vdrop() will clean up the
 *	     free list state.
 */
void
vhold(struct vnode *vp)
{
	atomic_add_int(&vp->v_auxrefs, 1);
}
Beispiel #15
0
debuglockmgr(struct lock *lkp, u_int flags,
	     const char *name, const char *file, int line)
#endif
{
	thread_t td;
	thread_t otd;
	int error;
	int extflags;
	int count;
	int pflags;
	int wflags;
	int timo;
#ifdef DEBUG_LOCKS
	int i;
#endif

	error = 0;

	if (mycpu->gd_intr_nesting_level &&
	    (flags & LK_NOWAIT) == 0 &&
	    (flags & LK_TYPE_MASK) != LK_RELEASE &&
	    panic_cpu_gd != mycpu
	) {

#ifndef DEBUG_LOCKS
		panic("lockmgr %s from %p: called from interrupt, ipi, "
		      "or hard code section",
		      lkp->lk_wmesg, ((int **)&lkp)[-1]);
#else
		panic("lockmgr %s from %s:%d: called from interrupt, ipi, "
		      "or hard code section",
		      lkp->lk_wmesg, file, line);
#endif
	}

#ifdef DEBUG_LOCKS
	if (mycpu->gd_spinlocks && ((flags & LK_NOWAIT) == 0)) {
		panic("lockmgr %s from %s:%d: called with %d spinlocks held",
		      lkp->lk_wmesg, file, line, mycpu->gd_spinlocks);
	}
#endif

	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
	td = curthread;

again:
	count = lkp->lk_count;
	cpu_ccfence();

	switch (flags & LK_TYPE_MASK) {
	case LK_SHARED:
		/*
		 * Shared lock critical path case
		 */
		if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) {
			if (atomic_cmpset_int(&lkp->lk_count,
					      count, count + 1)) {
				COUNT(td, 1);
				break;
			}
			goto again;
		}

		/*
		 * If the caller already holds the lock exclusively then
		 * we silently obtain another count on the exclusive lock.
		 *
		 * WARNING!  The old FreeBSD behavior was to downgrade,
		 *	     but this creates a problem when recursions
		 *	     return to the caller and the caller expects
		 *	     its original exclusive lock to remain exclusively
		 *	     locked.
		 */
		if (lkp->lk_lockholder == td) {
			KKASSERT(count & LKC_EXCL);
			if ((extflags & LK_CANRECURSE) == 0) {
				if (extflags & LK_NOWAIT) {
					error = EBUSY;
					break;
				}
				panic("lockmgr: locking against myself");
			}
			atomic_add_int(&lkp->lk_count, 1);
			COUNT(td, 1);
			break;
		}

		/*
		 * Slow path
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
		wflags = (td->td_flags & TDF_DEADLKTREAT) ?
				LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ);

		/*
		 * Block while the lock is held exclusively or, conditionally,
		 * if other threads are tring to obtain an exclusive lock or
		 * upgrade to one.
		 */
		if (count & wflags) {
			if (extflags & LK_NOWAIT) {
				error = EBUSY;
				break;
			}
			tsleep_interlock(lkp, pflags);
			if (!atomic_cmpset_int(&lkp->lk_count, count,
					      count | LKC_SHREQ)) {
				goto again;
			}

			mycpu->gd_cnt.v_lock_name[0] = 'S';
			strncpy(mycpu->gd_cnt.v_lock_name + 1,
				lkp->lk_wmesg,
				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
			++mycpu->gd_cnt.v_lock_colls;

			error = tsleep(lkp, pflags | PINTERLOCKED,
				       lkp->lk_wmesg, timo);
			if (error)
				break;
			if (extflags & LK_SLEEPFAIL) {
				error = ENOLCK;
				break;
			}
			goto again;
		}

		/*
		 * Otherwise we can bump the count
		 */
		if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) {
			COUNT(td, 1);
			break;
		}
		goto again;

	case LK_EXCLUSIVE:
		/*
		 * Exclusive lock critical path.
		 */
		if (count == 0) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      LKC_EXCL | (count + 1))) {
				lkp->lk_lockholder = td;
				COUNT(td, 1);
				break;
			}
			goto again;
		}

		/*
		 * Recursive lock if we already hold it exclusively.
		 */
		if (lkp->lk_lockholder == td) {
			KKASSERT(count & LKC_EXCL);
			if ((extflags & LK_CANRECURSE) == 0) {
				if (extflags & LK_NOWAIT) {
					error = EBUSY;
					break;
				}
				panic("lockmgr: locking against myself");
			}
			atomic_add_int(&lkp->lk_count, 1);
			COUNT(td, 1);
			break;
		}

		/*
		 * We will block, handle LK_NOWAIT
		 */
		if (extflags & LK_NOWAIT) {
			error = EBUSY;
			break;
		}

		/*
		 * Wait until we can obtain the exclusive lock.  EXREQ is
		 * automatically cleared when all current holders release
		 * so if we abort the operation we can safely leave it set.
		 * There might be other exclusive requesters.
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;

		tsleep_interlock(lkp, pflags);
		if (!atomic_cmpset_int(&lkp->lk_count, count,
				       count | LKC_EXREQ)) {
			goto again;
		}

		mycpu->gd_cnt.v_lock_name[0] = 'X';
		strncpy(mycpu->gd_cnt.v_lock_name + 1,
			lkp->lk_wmesg,
			sizeof(mycpu->gd_cnt.v_lock_name) - 2);
		++mycpu->gd_cnt.v_lock_colls;

		error = tsleep(lkp, pflags | PINTERLOCKED,
			       lkp->lk_wmesg, timo);
		if (error)
			break;
		if (extflags & LK_SLEEPFAIL) {
			error = ENOLCK;
			break;
		}
		goto again;

	case LK_DOWNGRADE:
		/*
		 * Downgrade an exclusive lock into a shared lock.  All
		 * counts on a recursive exclusive lock become shared.
		 *
		 * This function always succeeds.
		 */
		if (lkp->lk_lockholder != td ||
		    (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) {
			panic("lockmgr: not holding exclusive lock");
		}

#ifdef DEBUG_LOCKS
		for (i = 0; i < LOCKMGR_DEBUG_ARRAY_SIZE; i++) {
			if (td->td_lockmgr_stack[i] == lkp &&
			    td->td_lockmgr_stack_id[i] > 0
			) {
				td->td_lockmgr_stack_id[i]--;
				break;
			}
		}
#endif
		/*
		 * NOTE! Must NULL-out lockholder before releasing LKC_EXCL.
		 */
		otd = lkp->lk_lockholder;
		lkp->lk_lockholder = NULL;
		if (atomic_cmpset_int(&lkp->lk_count, count,
				      count & ~(LKC_EXCL|LKC_SHREQ))) {
			if (count & LKC_SHREQ)
				wakeup(lkp);
			break;
		}
		lkp->lk_lockholder = otd;
		goto again;

	case LK_EXCLUPGRADE:
		/*
		 * Upgrade from a single shared lock to an exclusive lock.
		 *
		 * If another process is ahead of us to get an upgrade,
		 * then we want to fail rather than have an intervening
		 * exclusive access.  The shared lock is released on
		 * failure.
		 */
		if (count & LKC_UPREQ) {
			flags = LK_RELEASE;
			error = EBUSY;
			goto again;
		}
		/* fall through into normal upgrade */

	case LK_UPGRADE:
		/*
		 * Upgrade a shared lock to an exclusive one.  This can cause
		 * the lock to be temporarily released and stolen by other
		 * threads.  LK_SLEEPFAIL or LK_NOWAIT may be used to detect
		 * this case, or use LK_EXCLUPGRADE.
		 *
		 * If the lock is already exclusively owned by us, this
		 * operation is a NOP.
		 *
		 * If we return an error (even NOWAIT), the current lock will
		 * be released.
		 *
		 * Start with the critical path.
		 */
		if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      count | LKC_EXCL)) {
				lkp->lk_lockholder = td;
				break;
			}
			goto again;
		}

		/*
		 * If we already hold the lock exclusively this operation
		 * succeeds and is a NOP.
		 */
		if (count & LKC_EXCL) {
			if (lkp->lk_lockholder == td)
				break;
			panic("lockmgr: upgrade unowned lock");
		}
		if ((count & LKC_MASK) == 0)
			panic("lockmgr: upgrade unowned lock");

		/*
		 * We cannot upgrade without blocking at this point.
		 */
		if (extflags & LK_NOWAIT) {
			flags = LK_RELEASE;
			error = EBUSY;
			goto again;
		}

		/*
		 * Release the shared lock and request the upgrade.
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
		tsleep_interlock(lkp, pflags);
		wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ;

		/*
		 * If someone else owns UPREQ and this transition would
		 * allow it to be granted, we have to grant it.  Otherwise
		 * we release the shared lock.
		 */
		if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) {
			wflags |= LKC_EXCL | LKC_UPGRANT;
			wflags |= count;
			wflags &= ~LKC_UPREQ;
		} else {
			wflags |= (count - 1);
		}

		if (atomic_cmpset_int(&lkp->lk_count, count, wflags)) {
			COUNT(td, -1);

			/*
			 * Must wakeup the thread granted the upgrade.
			 */
			if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1))
				wakeup(lkp);

			mycpu->gd_cnt.v_lock_name[0] = 'U';
			strncpy(mycpu->gd_cnt.v_lock_name + 1,
				lkp->lk_wmesg,
				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
			++mycpu->gd_cnt.v_lock_colls;

			error = tsleep(lkp, pflags | PINTERLOCKED,
				       lkp->lk_wmesg, timo);
			if (error)
				break;
			if (extflags & LK_SLEEPFAIL) {
				error = ENOLCK;
				break;
			}

			/*
			 * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE,
			 * depending on whether we were able to acquire the
			 * LKC_UPREQ bit.
			 */
			if (count & LKC_UPREQ)
				flags = LK_EXCLUSIVE;	/* someone else */
			else
				flags = LK_WAITUPGRADE;	/* we own the bit */
		}
		goto again;

	case LK_WAITUPGRADE:
		/*
		 * We own the LKC_UPREQ bit, wait until we are granted the
		 * exclusive lock (LKC_UPGRANT is set).
		 *
		 * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL),
		 * we have to undo the upgrade request and clean up any lock
		 * that might have been granted via a race.
		 */
		if (count & LKC_UPGRANT) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      count & ~LKC_UPGRANT)) {
				lkp->lk_lockholder = td;
				KKASSERT(count & LKC_EXCL);
				break;
			}
			/* retry */
		} else {
			pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
			timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
			tsleep_interlock(lkp, pflags);
			if (atomic_cmpset_int(&lkp->lk_count, count, count)) {

				mycpu->gd_cnt.v_lock_name[0] = 'U';
				strncpy(mycpu->gd_cnt.v_lock_name + 1,
					lkp->lk_wmesg,
					sizeof(mycpu->gd_cnt.v_lock_name) - 2);
				++mycpu->gd_cnt.v_lock_colls;

				error = tsleep(lkp, pflags | PINTERLOCKED,
					       lkp->lk_wmesg, timo);
				if (error) {
					undo_upreq(lkp);
					break;
				}
				if (extflags & LK_SLEEPFAIL) {
					error = ENOLCK;
					undo_upreq(lkp);
					break;
				}
			}
			/* retry */
		}
		goto again;

	case LK_RELEASE:
		/*
		 * Release the currently held lock.  If releasing the current
		 * lock as part of an error return, error will ALREADY be
		 * non-zero.
		 *
		 * When releasing the last lock we automatically transition
		 * LKC_UPREQ to LKC_EXCL|1.
		 *
		 * WARNING! We cannot detect when there are multiple exclusive
		 *	    requests pending.  We clear EXREQ unconditionally
		 *	    on the 1->0 transition so it is possible for
		 *	    shared requests to race the next exclusive
		 *	    request.
		 *
		 * Always succeeds.
		 */
		if ((count & LKC_MASK) == 0)
			panic("lockmgr: LK_RELEASE: no lock held");

		if (count & LKC_EXCL) {
			if (lkp->lk_lockholder != LK_KERNTHREAD &&
			    lkp->lk_lockholder != td) {
				panic("lockmgr: pid %d, not exlusive "
				      "lock holder thr %p/%p unlocking",
				    (td->td_proc ? td->td_proc->p_pid : -1),
				    td, lkp->lk_lockholder);
			}
			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
				/*
				 * Last exclusive count is being released
				 */
				otd = lkp->lk_lockholder;
				lkp->lk_lockholder = NULL;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count - 1) &
					   ~(LKC_EXCL|LKC_EXREQ|LKC_SHREQ))) {
					lkp->lk_lockholder = otd;
					goto again;
				}
				if (count & (LKC_EXREQ|LKC_SHREQ))
					wakeup(lkp);
				/* success */
			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
				   (LKC_UPREQ | 1)) {
				/*
				 * Last exclusive count is being released but
				 * an upgrade request is present, automatically
				 * grant an exclusive state to the owner of
				 * the upgrade request.
				 */
				otd = lkp->lk_lockholder;
				lkp->lk_lockholder = NULL;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						(count & ~LKC_UPREQ) |
						LKC_UPGRANT)) {
					lkp->lk_lockholder = otd;
				}
				wakeup(lkp);
				/* success */
			} else {
				otd = lkp->lk_lockholder;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						       count - 1)) {
					goto again;
				}
				/* success */
			}
			/* success */
			if (otd != LK_KERNTHREAD)
				COUNT(td, -1);
		} else {
			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
				/*
				 * Last shared count is being released.
				 */
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count - 1) &
					       ~(LKC_EXREQ|LKC_SHREQ))) {
					goto again;
				}
				if (count & (LKC_EXREQ|LKC_SHREQ))
					wakeup(lkp);
				/* success */
			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
				   (LKC_UPREQ | 1)) {
				/*
				 * Last shared count is being released but
				 * an upgrade request is present, automatically
				 * grant an exclusive state to the owner of
				 * the upgrade request.
				 */
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count & ~LKC_UPREQ) |
					      LKC_EXCL | LKC_UPGRANT)) {
					goto again;
				}
				wakeup(lkp);
			} else {
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						       count - 1)) {
					goto again;
				}
			}
			/* success */
			COUNT(td, -1);
		}
		break;

	default:
		panic("lockmgr: unknown locktype request %d",
		    flags & LK_TYPE_MASK);
		/* NOTREACHED */
	}
	return (error);
}
Beispiel #16
0
static int
p4_intr(int cpu, struct trapframe *tf)
{
	uint32_t cccrval, ovf_mask, ovf_partner;
	int did_interrupt, error, ri;
	struct p4_cpu *pc;
	struct pmc *pm;
	pmc_value_t v;

	PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
	    TRAPF_USERMODE(tf));

	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];

	ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
	    P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
	ovf_mask |= P4_CCCR_OVF;
	if (p4_system_has_htt)
		ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
		    P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
	else
		ovf_partner = 0;
	did_interrupt = 0;

	if (p4_system_has_htt)
		P4_PCPU_ACQ_INTR_SPINLOCK(pc);

	/*
	 * Loop through all CCCRs, looking for ones that have
	 * interrupted this CPU.
	 */
	for (ri = 0; ri < P4_NPMCS; ri++) {

		/*
		 * Check if our partner logical CPU has already marked
		 * this PMC has having interrupted it.  If so, reset
		 * the flag and process the interrupt, but leave the
		 * hardware alone.
		 */
		if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
			P4_PCPU_SET_INTRFLAG(pc,ri,0);
			did_interrupt = 1;

			/*
			 * Ignore de-configured or stopped PMCs.
			 * Ignore PMCs not in sampling mode.
			 */
			pm = pc->pc_p4pmcs[ri].phw_pmc;
			if (pm == NULL ||
			    pm->pm_state != PMC_STATE_RUNNING ||
			    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
				continue;
			}
			(void) pmc_process_interrupt(cpu, PMC_HR, pm, tf,
			    TRAPF_USERMODE(tf));
			continue;
		}

		/*
		 * Fresh interrupt.  Look for the CCCR_OVF bit
		 * and the OVF_Tx bit for this logical
		 * processor being set.
		 */
		cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri);

		if ((cccrval & ovf_mask) != ovf_mask)
			continue;

		/*
		 * If the other logical CPU would also have been
		 * interrupted due to the PMC being shared, record
		 * this fact in the per-cpu saved interrupt flag
		 * bitmask.
		 */
		if (p4_system_has_htt && (cccrval & ovf_partner))
			P4_PCPU_SET_INTRFLAG(pc, ri, 1);

		v = rdmsr(P4_PERFCTR_MSR_FIRST + ri);

		PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);

		/* Stop the counter, and reset the overflow  bit */
		cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
		wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval);

		did_interrupt = 1;

		/*
		 * Ignore de-configured or stopped PMCs.  Ignore PMCs
		 * not in sampling mode.
		 */
		pm = pc->pc_p4pmcs[ri].phw_pmc;

		if (pm == NULL ||
		    pm->pm_state != PMC_STATE_RUNNING ||
		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
			continue;
		}

		/*
		 * Process the interrupt.  Re-enable the PMC if
		 * processing was successful.
		 */
		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
		    TRAPF_USERMODE(tf));

		/*
		 * Only the first processor executing the NMI handler
		 * in a HTT pair will restart a PMC, and that too
		 * only if there were no errors.
		 */
		v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
			pm->pm_sc.pm_reloadcount);
		wrmsr(P4_PERFCTR_MSR_FIRST + ri, v);
		if (error == 0)
			wrmsr(P4_CCCR_MSR_FIRST + ri,
			    cccrval | P4_CCCR_ENABLE);
	}

	/* allow the other CPU to proceed */
	if (p4_system_has_htt)
		P4_PCPU_REL_INTR_SPINLOCK(pc);

	/*
	 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
	 * masked when a PMC interrupts the CPU.  We need to unmask
	 * the interrupt source explicitly.
	 */

	if (did_interrupt)
		lapic_reenable_pmc();

	atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
	    &pmc_stats.pm_intr_ignored, 1);

	return (did_interrupt);
}
Beispiel #17
0
static int
msgdma_channel_submit_sg(device_t dev, struct xdma_channel *xchan,
    struct xdma_sglist *sg, uint32_t sg_n)
{
	struct msgdma_channel *chan;
	struct msgdma_desc *desc;
	struct msgdma_softc *sc;
	uint32_t src_addr_lo;
	uint32_t dst_addr_lo;
	uint32_t len;
	uint32_t tmp;
	int i;

	sc = device_get_softc(dev);

	chan = (struct msgdma_channel *)xchan->chan;

	for (i = 0; i < sg_n; i++) {
		src_addr_lo = (uint32_t)sg[i].src_addr;
		dst_addr_lo = (uint32_t)sg[i].dst_addr;
		len = (uint32_t)sg[i].len;

		dprintf("%s: src %x dst %x len %d\n", __func__,
		    src_addr_lo, dst_addr_lo, len);

		desc = chan->descs[chan->idx_head];
		desc->read_lo = htole32(src_addr_lo);
		desc->write_lo = htole32(dst_addr_lo);
		desc->length = htole32(len);
		desc->transferred = 0;
		desc->status = 0;
		desc->reserved = 0;
		desc->control = 0;

		if (sg[i].direction == XDMA_MEM_TO_DEV) {
			if (sg[i].first == 1) {
				desc->control |= htole32(CONTROL_GEN_SOP);
			}

			if (sg[i].last == 1) {
				desc->control |= htole32(CONTROL_GEN_EOP);
				desc->control |= htole32(CONTROL_TC_IRQ_EN |
				    CONTROL_ET_IRQ_EN | CONTROL_ERR_M);
			}
		} else {
			desc->control |= htole32(CONTROL_END_ON_EOP | (1 << 13));
			desc->control |= htole32(CONTROL_TC_IRQ_EN |
			    CONTROL_ET_IRQ_EN | CONTROL_ERR_M);
		}

		tmp = chan->idx_head;

		atomic_add_int(&chan->descs_used_count, 1);
		chan->idx_head = msgdma_next_desc(chan, chan->idx_head);

		desc->control |= htole32(CONTROL_OWN | CONTROL_GO);

		bus_dmamap_sync(chan->dma_tag, chan->dma_map[tmp],
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
	}

	return (0);
}
Beispiel #18
0
/*
 * Flush waiting shared locks.  The lock's prior state is passed in and must
 * be adjusted atomically only if it matches and LINKSPIN is not set.
 *
 * IMPORTANT! The caller has left one active count on the lock for us to
 *	      consume.  We will apply this to the first link, but must add
 *	      additional counts for any other links.
 */
static int
mtx_chain_link_sh(mtx_t *mtx, u_int olock)
{
	thread_t td = curthread;
	mtx_link_t *link;
	u_int	addcount;
	u_int	nlock;

	olock &= ~MTX_LINKSPIN;
	nlock = olock | MTX_LINKSPIN;
	nlock &= ~MTX_EXCLUSIVE;
	crit_enter_raw(td);
	if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
		/*
		 * It should not be possible for SHWANTED to be set without
		 * any links pending.
		 */
		KKASSERT(mtx->mtx_shlink != NULL);

		/*
		 * We have to process the count for all shared locks before
		 * we process any of the links.  Count the additional shared
		 * locks beyond the first link (which is already accounted
		 * for) and associate the full count with the lock
		 * immediately.
		 */
		addcount = 0;
		for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink;
		     link = link->next) {
			++addcount;
		}
		if (addcount > 0)
			atomic_add_int(&mtx->mtx_lock, addcount);

		/*
		 * We can wakeup all waiting shared locks.
		 */
		while ((link = mtx->mtx_shlink) != NULL) {
			KKASSERT(link->state == MTX_LINK_LINKED_SH);
			if (link->next == link) {
				mtx->mtx_shlink = NULL;
			} else {
				mtx->mtx_shlink = link->next;
				link->next->prev = link->prev;
				link->prev->next = link->next;
			}
			link->next = NULL;
			link->prev = NULL;
			cpu_sfence();
			if (link->callback) {
				link->state = MTX_LINK_CALLEDBACK;
				link->callback(link, link->arg, 0);
			} else {
				cpu_sfence();
				link->state = MTX_LINK_ACQUIRED;
				wakeup(link);
			}
		}
		atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
						 MTX_SHWANTED);
		crit_exit_raw(td);
		return 1;
	}
	/* retry */
	crit_exit_raw(td);

	return 0;
}
Beispiel #19
0
/*
 * Handle a single exception.
 */
void
itsa(struct trap_frame *trapframe, struct cpu_info *ci, struct proc *p,
    int type)
{
	int i;
	unsigned ucode = 0;
	vm_prot_t ftype;
	extern vaddr_t onfault_table[];
	int onfault;
	int typ = 0;
	union sigval sv;
	struct pcb *pcb;

	switch (type) {
	case T_TLB_MOD:
		/* check for kernel address */
		if (trapframe->badvaddr < 0) {
			pt_entry_t *pte, entry;
			paddr_t pa;
			vm_page_t pg;

			pte = kvtopte(trapframe->badvaddr);
			entry = *pte;
#ifdef DIAGNOSTIC
			if (!(entry & PG_V) || (entry & PG_M))
				panic("trap: ktlbmod: invalid pte");
#endif
			if (pmap_is_page_ro(pmap_kernel(),
			    trunc_page(trapframe->badvaddr), entry)) {
				/* write to read only page in the kernel */
				ftype = VM_PROT_WRITE;
				pcb = &p->p_addr->u_pcb;
				goto kernel_fault;
			}
			entry |= PG_M;
			*pte = entry;
			KERNEL_LOCK();
			pmap_update_kernel_page(trapframe->badvaddr & ~PGOFSET,
			    entry);
			pa = pfn_to_pad(entry);
			pg = PHYS_TO_VM_PAGE(pa);
			if (pg == NULL)
				panic("trap: ktlbmod: unmanaged page");
			pmap_set_modify(pg);
			KERNEL_UNLOCK();
			return;
		}
		/* FALLTHROUGH */

	case T_TLB_MOD+T_USER:
	    {
		pt_entry_t *pte, entry;
		paddr_t pa;
		vm_page_t pg;
		pmap_t pmap = p->p_vmspace->vm_map.pmap;

		if (!(pte = pmap_segmap(pmap, trapframe->badvaddr)))
			panic("trap: utlbmod: invalid segmap");
		pte += uvtopte(trapframe->badvaddr);
		entry = *pte;
#ifdef DIAGNOSTIC
		if (!(entry & PG_V) || (entry & PG_M))
			panic("trap: utlbmod: invalid pte");
#endif
		if (pmap_is_page_ro(pmap,
		    trunc_page(trapframe->badvaddr), entry)) {
			/* write to read only page */
			ftype = VM_PROT_WRITE;
			pcb = &p->p_addr->u_pcb;
			goto fault_common_no_miss;
		}
		entry |= PG_M;
		*pte = entry;
		KERNEL_LOCK();
		pmap_update_user_page(pmap, (trapframe->badvaddr & ~PGOFSET), 
		    entry);
		pa = pfn_to_pad(entry);
		pg = PHYS_TO_VM_PAGE(pa);
		if (pg == NULL)
			panic("trap: utlbmod: unmanaged page");
		pmap_set_modify(pg);
		KERNEL_UNLOCK();
		return;
	    }

	case T_TLB_LD_MISS:
	case T_TLB_ST_MISS:
		ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ;
		pcb = &p->p_addr->u_pcb;
		/* check for kernel address */
		if (trapframe->badvaddr < 0) {
			vaddr_t va;
			int rv;

	kernel_fault:
			va = trunc_page((vaddr_t)trapframe->badvaddr);
			onfault = pcb->pcb_onfault;
			pcb->pcb_onfault = 0;
			KERNEL_LOCK();
			rv = uvm_fault(kernel_map, trunc_page(va), 0, ftype);
			KERNEL_UNLOCK();
			pcb->pcb_onfault = onfault;
			if (rv == 0)
				return;
			if (onfault != 0) {
				pcb->pcb_onfault = 0;
				trapframe->pc = onfault_table[onfault];
				return;
			}
			goto err;
		}
		/*
		 * It is an error for the kernel to access user space except
		 * through the copyin/copyout routines.
		 */
		if (pcb->pcb_onfault != 0) {
			/*
			 * We want to resolve the TLB fault before invoking
			 * pcb_onfault if necessary.
			 */
			goto fault_common;
		} else {
			goto err;
		}

	case T_TLB_LD_MISS+T_USER:
		ftype = VM_PROT_READ;
		pcb = &p->p_addr->u_pcb;
		goto fault_common;

	case T_TLB_ST_MISS+T_USER:
		ftype = VM_PROT_WRITE;
		pcb = &p->p_addr->u_pcb;
fault_common:

#ifdef CPU_R4000
		if (r4000_errata != 0) {
			if (eop_tlb_miss_handler(trapframe, ci, p) != 0)
				return;
		}
#endif

fault_common_no_miss:

#ifdef CPU_R4000
		if (r4000_errata != 0) {
			eop_cleanup(trapframe, p);
		}
#endif

	    {
		vaddr_t va;
		struct vmspace *vm;
		vm_map_t map;
		int rv;

		vm = p->p_vmspace;
		map = &vm->vm_map;
		va = trunc_page((vaddr_t)trapframe->badvaddr);

		onfault = pcb->pcb_onfault;
		pcb->pcb_onfault = 0;
		KERNEL_LOCK();

		rv = uvm_fault(map, va, 0, ftype);
		pcb->pcb_onfault = onfault;

		/*
		 * If this was a stack access we keep track of the maximum
		 * accessed stack size.  Also, if vm_fault gets a protection
		 * failure it is due to accessing the stack region outside
		 * the current limit and we need to reflect that as an access
		 * error.
		 */
		if ((caddr_t)va >= vm->vm_maxsaddr) {
			if (rv == 0)
				uvm_grow(p, va);
			else if (rv == EACCES)
				rv = EFAULT;
		}
		KERNEL_UNLOCK();
		if (rv == 0)
			return;
		if (!USERMODE(trapframe->sr)) {
			if (onfault != 0) {
				pcb->pcb_onfault = 0;
				trapframe->pc =  onfault_table[onfault];
				return;
			}
			goto err;
		}

		ucode = ftype;
		i = SIGSEGV;
		typ = SEGV_MAPERR;
		break;
	    }

	case T_ADDR_ERR_LD+T_USER:	/* misaligned or kseg access */
	case T_ADDR_ERR_ST+T_USER:	/* misaligned or kseg access */
		ucode = 0;		/* XXX should be VM_PROT_something */
		i = SIGBUS;
		typ = BUS_ADRALN;
		break;
	case T_BUS_ERR_IFETCH+T_USER:	/* BERR asserted to cpu */
	case T_BUS_ERR_LD_ST+T_USER:	/* BERR asserted to cpu */
		ucode = 0;		/* XXX should be VM_PROT_something */
		i = SIGBUS;
		typ = BUS_OBJERR;
		break;

	case T_SYSCALL+T_USER:
	    {
		struct trap_frame *locr0 = p->p_md.md_regs;
		struct sysent *callp;
		unsigned int code;
		register_t tpc;
		int numsys, error;
		struct args {
			register_t i[8];
		} args;
		register_t rval[2];

		atomic_add_int(&uvmexp.syscalls, 1);

		/* compute next PC after syscall instruction */
		tpc = trapframe->pc; /* Remember if restart */
		if (trapframe->cause & CR_BR_DELAY)
			locr0->pc = MipsEmulateBranch(locr0,
			    trapframe->pc, 0, 0);
		else
			locr0->pc += 4;
		callp = p->p_p->ps_emul->e_sysent;
		numsys = p->p_p->ps_emul->e_nsysent;
		code = locr0->v0;
		switch (code) {
		case SYS_syscall:
		case SYS___syscall:
			/*
			 * Code is first argument, followed by actual args.
			 * __syscall provides the code as a quad to maintain
			 * proper alignment of 64-bit arguments on 32-bit
			 * platforms, which doesn't change anything here.
			 */
			code = locr0->a0;
			if (code >= numsys)
				callp += p->p_p->ps_emul->e_nosys; /* (illegal) */
			else
				callp += code;
			i = callp->sy_argsize / sizeof(register_t);
			args.i[0] = locr0->a1;
			args.i[1] = locr0->a2;
			args.i[2] = locr0->a3;
			if (i > 3) {
				args.i[3] = locr0->a4;
				args.i[4] = locr0->a5;
				args.i[5] = locr0->a6;
				args.i[6] = locr0->a7;
				if (i > 7)
					if ((error = copyin((void *)locr0->sp,
					    &args.i[7], sizeof(register_t))))
						goto bad;
			}
			break;
		default:
			if (code >= numsys)
				callp += p->p_p->ps_emul->e_nosys; /* (illegal) */
			else
				callp += code;

			i = callp->sy_narg;
			args.i[0] = locr0->a0;
			args.i[1] = locr0->a1;
			args.i[2] = locr0->a2;
			args.i[3] = locr0->a3;
			if (i > 4) {
				args.i[4] = locr0->a4;
				args.i[5] = locr0->a5;
				args.i[6] = locr0->a6;
				args.i[7] = locr0->a7;
			}
		}

		rval[0] = 0;
		rval[1] = locr0->v1;

#if defined(DDB) || defined(DEBUG)
		trapdebug[TRAPSIZE * ci->ci_cpuid + (trppos[ci->ci_cpuid] == 0 ?
		    TRAPSIZE : trppos[ci->ci_cpuid]) - 1].code = code;
#endif

		error = mi_syscall(p, code, callp, args.i, rval);

		switch (error) {
		case 0:
			locr0->v0 = rval[0];
			locr0->v1 = rval[1];
			locr0->a3 = 0;
			break;

		case ERESTART:
			locr0->pc = tpc;
			break;

		case EJUSTRETURN:
			break;	/* nothing to do */

		default:
		bad:
			locr0->v0 = error;
			locr0->a3 = 1;
		}

		mi_syscall_return(p, code, error, rval);

		return;
	    }

	case T_BREAK:
#ifdef DDB
		kdb_trap(type, trapframe);
#endif
		/* Reenable interrupts if necessary */
		if (trapframe->sr & SR_INT_ENAB) {
			enableintr();
		}
		return;

	case T_BREAK+T_USER:
	    {
		caddr_t va;
		u_int32_t instr;
		struct trap_frame *locr0 = p->p_md.md_regs;

		/* compute address of break instruction */
		va = (caddr_t)trapframe->pc;
		if (trapframe->cause & CR_BR_DELAY)
			va += 4;

		/* read break instruction */
		copyin(va, &instr, sizeof(int32_t));

		switch ((instr & BREAK_VAL_MASK) >> BREAK_VAL_SHIFT) {
		case 6:	/* gcc range error */
			i = SIGFPE;
			typ = FPE_FLTSUB;
			/* skip instruction */
			if (trapframe->cause & CR_BR_DELAY)
				locr0->pc = MipsEmulateBranch(locr0,
				    trapframe->pc, 0, 0);
			else
				locr0->pc += 4;
			break;
		case 7:	/* gcc3 divide by zero */
			i = SIGFPE;
			typ = FPE_INTDIV;
			/* skip instruction */
			if (trapframe->cause & CR_BR_DELAY)
				locr0->pc = MipsEmulateBranch(locr0,
				    trapframe->pc, 0, 0);
			else
				locr0->pc += 4;
			break;
#ifdef PTRACE
		case BREAK_SSTEP_VAL:
			if (p->p_md.md_ss_addr == (long)va) {
#ifdef DEBUG
				printf("trap: %s (%d): breakpoint at %p "
				    "(insn %08x)\n",
				    p->p_comm, p->p_pid,
				    (void *)p->p_md.md_ss_addr,
				    p->p_md.md_ss_instr);
#endif

				/* Restore original instruction and clear BP */
				process_sstep(p, 0);
				typ = TRAP_BRKPT;
			} else {
				typ = TRAP_TRACE;
			}
			i = SIGTRAP;
			break;
#endif
#ifdef FPUEMUL
		case BREAK_FPUEMUL_VAL:
			/*
			 * If this is a genuine FP emulation break,
			 * resume execution to our branch destination.
			 */
			if ((p->p_md.md_flags & MDP_FPUSED) != 0 &&
			    p->p_md.md_fppgva + 4 == (vaddr_t)va) {
				struct vm_map *map = &p->p_vmspace->vm_map;

				p->p_md.md_flags &= ~MDP_FPUSED;
				locr0->pc = p->p_md.md_fpbranchva;

				/*
				 * Prevent access to the relocation page.
				 * XXX needs to be fixed to work with rthreads
				 */
				uvm_fault_unwire(map, p->p_md.md_fppgva,
				    p->p_md.md_fppgva + PAGE_SIZE);
				(void)uvm_map_protect(map, p->p_md.md_fppgva,
				    p->p_md.md_fppgva + PAGE_SIZE,
				    UVM_PROT_NONE, FALSE);
				return;
			}
			/* FALLTHROUGH */
#endif
		default:
			typ = TRAP_TRACE;
			i = SIGTRAP;
			break;
		}
		break;
	    }

	case T_IWATCH+T_USER:
	case T_DWATCH+T_USER:
	    {
		caddr_t va;
		/* compute address of trapped instruction */
		va = (caddr_t)trapframe->pc;
		if (trapframe->cause & CR_BR_DELAY)
			va += 4;
		printf("watch exception @ %p\n", va);
#ifdef RM7K_PERFCNTR
		if (rm7k_watchintr(trapframe)) {
			/* Return to user, don't add any more overhead */
			return;
		}
#endif
		i = SIGTRAP;
		typ = TRAP_BRKPT;
		break;
	    }

	case T_TRAP+T_USER:
	    {
		caddr_t va;
		u_int32_t instr;
		struct trap_frame *locr0 = p->p_md.md_regs;

		/* compute address of trap instruction */
		va = (caddr_t)trapframe->pc;
		if (trapframe->cause & CR_BR_DELAY)
			va += 4;
		/* read break instruction */
		copyin(va, &instr, sizeof(int32_t));

		if (trapframe->cause & CR_BR_DELAY)
			locr0->pc = MipsEmulateBranch(locr0,
			    trapframe->pc, 0, 0);
		else
			locr0->pc += 4;
#ifdef RM7K_PERFCNTR
		if (instr == 0x040c0000) { /* Performance cntr trap */
			int result;

			result = rm7k_perfcntr(trapframe->a0, trapframe->a1,
						trapframe->a2, trapframe->a3);
			locr0->v0 = -result;
			/* Return to user, don't add any more overhead */
			return;
		} else
#endif
		/*
		 * GCC 4 uses teq with code 7 to signal divide by
	 	 * zero at runtime. This is one instruction shorter
		 * than the BEQ + BREAK combination used by gcc 3.
		 */
		if ((instr & 0xfc00003f) == 0x00000034 /* teq */ &&
		    (instr & 0x001fffc0) == ((ZERO << 16) | (7 << 6))) {
			i = SIGFPE;
			typ = FPE_INTDIV;
		} else {
			i = SIGEMT;	/* Stuff it with something for now */
			typ = 0;
		}
		break;
	    }

	case T_RES_INST+T_USER:
		i = SIGILL;
		typ = ILL_ILLOPC;
		break;

	case T_COP_UNUSABLE+T_USER:
		/*
		 * Note MIPS IV COP1X instructions issued with FPU
		 * disabled correctly report coprocessor 1 as the
		 * unusable coprocessor number.
		 */
		if ((trapframe->cause & CR_COP_ERR) != CR_COP1_ERR) {
			i = SIGILL;	/* only FPU instructions allowed */
			typ = ILL_ILLOPC;
			break;
		}
#ifdef FPUEMUL
		MipsFPTrap(trapframe);
#else
		enable_fpu(p);
#endif
		return;

	case T_FPE:
		printf("FPU Trap: PC %lx CR %lx SR %lx\n",
			trapframe->pc, trapframe->cause, trapframe->sr);
		goto err;

	case T_FPE+T_USER:
		MipsFPTrap(trapframe);
		return;

	case T_OVFLOW+T_USER:
		i = SIGFPE;
		typ = FPE_FLTOVF;
		break;

	case T_ADDR_ERR_LD:	/* misaligned access */
	case T_ADDR_ERR_ST:	/* misaligned access */
	case T_BUS_ERR_LD_ST:	/* BERR asserted to cpu */
		pcb = &p->p_addr->u_pcb;
		if ((onfault = pcb->pcb_onfault) != 0) {
			pcb->pcb_onfault = 0;
			trapframe->pc = onfault_table[onfault];
			return;
		}
		goto err;

	default:
	err:
		disableintr();
#if !defined(DDB) && defined(DEBUG)
		trapDump("trap", printf);
#endif
		printf("\nTrap cause = %d Frame %p\n", type, trapframe);
		printf("Trap PC %p RA %p fault %p\n",
		    (void *)trapframe->pc, (void *)trapframe->ra,
		    (void *)trapframe->badvaddr);
#ifdef DDB
		stacktrace(!USERMODE(trapframe->sr) ? trapframe : p->p_md.md_regs);
		kdb_trap(type, trapframe);
#endif
		panic("trap");
	}

#ifdef FPUEMUL
	/*
	 * If a relocated delay slot causes an exception, blame the
	 * original delay slot address - userland is not supposed to
	 * know anything about emulation bowels.
	 */
	if ((p->p_md.md_flags & MDP_FPUSED) != 0 &&
	    trapframe->badvaddr == p->p_md.md_fppgva)
		trapframe->badvaddr = p->p_md.md_fpslotva;
#endif
	p->p_md.md_regs->pc = trapframe->pc;
	p->p_md.md_regs->cause = trapframe->cause;
	p->p_md.md_regs->badvaddr = trapframe->badvaddr;
	sv.sival_ptr = (void *)trapframe->badvaddr;
	KERNEL_LOCK();
	trapsignal(p, i, ucode, typ, sv);
	KERNEL_UNLOCK();
}
Beispiel #20
0
static void atomic_increment(sp_counted_base_atomic_type volatile *pw)
{
    atomic_add_int(&pw->ui,1);
}
Beispiel #21
0
static int
mpc7xxx_intr(int cpu, struct trapframe *tf)
{
	int i, error, retval;
	uint32_t config;
	struct pmc *pm;
	struct powerpc_cpu *pac;

	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
	    ("[powerpc,%d] out of range CPU %d", __LINE__, cpu));

	PMCDBG(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *) tf,
	    TRAPF_USERMODE(tf));

	retval = 0;

	pac = powerpc_pcpu[cpu];

	config  = mfspr(SPR_MMCR0) & ~SPR_MMCR0_FC;

	/*
	 * look for all PMCs that have interrupted:
	 * - look for a running, sampling PMC which has overflowed
	 *   and which has a valid 'struct pmc' association
	 *
	 * If found, we call a helper to process the interrupt.
	 */

	for (i = 0; i < MPC7XXX_MAX_PMCS; i++) {
		if ((pm = pac->pc_ppcpmcs[i].phw_pmc) == NULL ||
		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
			continue;
		}

		if (!MPC7XXX_PMC_HAS_OVERFLOWED(i))
			continue;

		retval = 1;	/* Found an interrupting PMC. */

		if (pm->pm_state != PMC_STATE_RUNNING)
			continue;

		/* Stop the counter if logging fails. */
		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
		    TRAPF_USERMODE(tf));
		if (error != 0)
			mpc7xxx_stop_pmc(cpu, i);

		/* reload count. */
		mpc7xxx_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount);
	}

	atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
	    &pmc_stats.pm_intr_ignored, 1);

	/* Re-enable PERF exceptions. */
	if (retval)
		mtspr(SPR_MMCR0, config | SPR_MMCR0_PMXE);

	return (retval);
}
Beispiel #22
0
static vm_page_t rtR0MemObjFreeBSDContigPhysAllocHelper(vm_object_t pObject, vm_pindex_t iPIndex,
                                                        u_long cPages, vm_paddr_t VmPhysAddrHigh,
                                                        u_long uAlignment, bool fWire)
{
    vm_page_t pPages;
    int cTries = 0;

#if __FreeBSD_version > 1000000
    int fFlags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
    if (fWire)
        fFlags |= VM_ALLOC_WIRED;

    while (cTries <= 1)
    {
#if __FreeBSD_version >= 1000030
        VM_OBJECT_WLOCK(pObject);
#else
        VM_OBJECT_LOCK(pObject);
#endif
        pPages = vm_page_alloc_contig(pObject, iPIndex, fFlags, cPages, 0,
                                      VmPhysAddrHigh, uAlignment, 0, VM_MEMATTR_DEFAULT);
#if __FreeBSD_version >= 1000030
        VM_OBJECT_WUNLOCK(pObject);
#else
        VM_OBJECT_UNLOCK(pObject);
#endif
        if (pPages)
            break;
        vm_pageout_grow_cache(cTries, 0, VmPhysAddrHigh);
        cTries++;
    }

    return pPages;
#else
    while (cTries <= 1)
    {
        pPages = vm_phys_alloc_contig(cPages, 0, VmPhysAddrHigh, uAlignment, 0);
        if (pPages)
            break;
        vm_contig_grow_cache(cTries, 0, VmPhysAddrHigh);
        cTries++;
    }

    if (!pPages)
        return pPages;
#if __FreeBSD_version >= 1000030
    VM_OBJECT_WLOCK(pObject);
#else
    VM_OBJECT_LOCK(pObject);
#endif
    for (vm_pindex_t iPage = 0; iPage < cPages; iPage++)
    {
        vm_page_t pPage = pPages + iPage;
        vm_page_insert(pPage, pObject, iPIndex + iPage);
        pPage->valid = VM_PAGE_BITS_ALL;
        if (fWire)
        {
            pPage->wire_count = 1;
            atomic_add_int(&cnt.v_wire_count, 1);
        }
    }
#if __FreeBSD_version >= 1000030
    VM_OBJECT_WUNLOCK(pObject);
#else
    VM_OBJECT_UNLOCK(pObject);
#endif
    return pPages;
#endif
}
/*
 * Allocate a device specific dma_tag.
 */
int
bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
    bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
    void *lockfuncarg, bus_dma_tag_t *dmat)
{
	bus_dma_tag_t newtag;
	int error = 0;

	/* Return a NULL tag on failure */
	*dmat = NULL;

	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT);
	if (newtag == NULL)
		return (ENOMEM);

	newtag->parent = parent;
	newtag->alignment = alignment;
	newtag->boundary = boundary;
	newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
	newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1);
	newtag->filter = filter;
	newtag->filterarg = filterarg;
        newtag->maxsize = maxsize;
        newtag->nsegments = nsegments;
	newtag->maxsegsz = maxsegsz;
	newtag->flags = flags;
	newtag->ref_count = 1; /* Count ourself */
	newtag->map_count = 0;
	if (lockfunc != NULL) {
		newtag->lockfunc = lockfunc;
		newtag->lockfuncarg = lockfuncarg;
	} else {
		newtag->lockfunc = dflt_lock;
		newtag->lockfuncarg = NULL;
	}

        /*
	 * Take into account any restrictions imposed by our parent tag
	 */
        if (parent != NULL) {
                newtag->lowaddr = min(parent->lowaddr, newtag->lowaddr);
                newtag->highaddr = max(parent->highaddr, newtag->highaddr);
		if (newtag->boundary == 0)
			newtag->boundary = parent->boundary;
		else if (parent->boundary != 0)
                	newtag->boundary = MIN(parent->boundary,
					       newtag->boundary);
                if (newtag->filter == NULL) {
                        /*
                         * Short circuit looking at our parent directly
                         * since we have encapsulated all of its information
                         */
                        newtag->filter = parent->filter;
                        newtag->filterarg = parent->filterarg;
                        newtag->parent = parent->parent;
		}
		if (newtag->parent != NULL)
			atomic_add_int(&parent->ref_count, 1);
	}

	*dmat = newtag;
	return (error);
}
Beispiel #24
0
static int
cbb_pci_filt(void *arg)
{
	struct cbb_softc *sc = arg;
	uint32_t sockevent;
	uint8_t csc;
	int retval = FILTER_STRAY;

	/*
	 * Some chips also require us to read the old ExCA registe for card
	 * status change when we route CSC vis PCI.  This isn't supposed to be
	 * required, but it clears the interrupt state on some chipsets.
	 * Maybe there's a setting that would obviate its need.  Maybe we
	 * should test the status bits and deal with them, but so far we've
	 * not found any machines that don't also give us the socket status
	 * indication above.
	 *
	 * This call used to be unconditional.  However, further research
	 * suggests that we hit this condition when the card READY interrupt
	 * fired.  So now we only read it for 16-bit cards, and we only claim
	 * the interrupt if READY is set.  If this still causes problems, then
	 * the next step would be to read this if we have a 16-bit card *OR*
	 * we have no card.  We treat the READY signal as if it were the power
	 * completion signal.  Some bridges may double signal things here, bit
	 * signalling twice should be OK since we only sleep on the powerintr
	 * in one place and a double wakeup would be benign there.
	 */
	if (sc->flags & CBB_16BIT_CARD) {
		csc = exca_getb(&sc->exca[0], EXCA_CSC);
		if (csc & EXCA_CSC_READY) {
			atomic_add_int(&sc->powerintr, 1);
			wakeup((void *)&sc->powerintr);
			retval = FILTER_HANDLED;
		}
	}

	/*
	 * Read the socket event.  Sometimes, the theory goes, the PCI bus is
	 * so loaded that it cannot satisfy the read request, so we get
	 * garbage back from the following read.  We have to filter out the
	 * garbage so that we don't spontaneously reset the card under high
	 * load.  PCI isn't supposed to act like this.  No doubt this is a bug
	 * in the PCI bridge chipset (or cbb brige) that's being used in
	 * certain amd64 laptops today.  Work around the issue by assuming
	 * that any bits we don't know about being set means that we got
	 * garbage.
	 */
	sockevent = cbb_get(sc, CBB_SOCKET_EVENT);
	if (sockevent != 0 && (sockevent & ~CBB_SOCKET_EVENT_VALID_MASK) == 0) {
		/*
		 * If anything has happened to the socket, we assume that the
		 * card is no longer OK, and we shouldn't call its ISR.  We
		 * set cardok as soon as we've attached the card.  This helps
		 * in a noisy eject, which happens all too often when users
		 * are ejecting their PC Cards.
		 *
		 * We use this method in preference to checking to see if the
		 * card is still there because the check suffers from a race
		 * condition in the bouncing case.
		 */
#define DELTA (CBB_SOCKET_MASK_CD)
		if (sockevent & DELTA) {
			cbb_clrb(sc, CBB_SOCKET_MASK, DELTA);
			cbb_set(sc, CBB_SOCKET_EVENT, DELTA);
			sc->cardok = 0;
			cbb_disable_func_intr(sc);
			wakeup(&sc->intrhand);
		}
#undef DELTA

		/*
		 * Wakeup anybody waiting for a power interrupt.  We have to
		 * use atomic_add_int for wakups on other cores.
		 */
		if (sockevent & CBB_SOCKET_EVENT_POWER) {
			cbb_clrb(sc, CBB_SOCKET_MASK, CBB_SOCKET_EVENT_POWER);
			cbb_set(sc, CBB_SOCKET_EVENT, CBB_SOCKET_EVENT_POWER);
			atomic_add_int(&sc->powerintr, 1);
			wakeup((void *)&sc->powerintr);
		}

		/*
		 * Status change interrupts aren't presently used in the
		 * rest of the driver.  For now, just ACK them.
		 */
		if (sockevent & CBB_SOCKET_EVENT_CSTS)
			cbb_set(sc, CBB_SOCKET_EVENT, CBB_SOCKET_EVENT_CSTS);
		retval = FILTER_HANDLED;
	}
	return retval;
}
Beispiel #25
0
/*
 * Adding a ref to an inode is only legal if the inode already has at least
 * one ref.
 */
void
hammer2_inode_ref(hammer2_inode_t *ip)
{
	atomic_add_int(&ip->refs, 1);
}
Beispiel #26
0
/*
 * Allocate a device specific dma_tag.
 */
int
bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
    void *lockfuncarg, bus_dma_tag_t *dmat)
{
	bus_dma_tag_t newtag;

	/* Return a NULL tag on failure */
	*dmat = NULL;

	/* Enforce the usage of BUS_GET_DMA_TAG(). */
	if (parent == NULL)
		panic("%s: parent DMA tag NULL", __func__);

	newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT);
	if (newtag == NULL)
		return (ENOMEM);

	/*
	 * The method table pointer and the cookie need to be taken over from
	 * the parent.
	 */
	newtag->dt_cookie = parent->dt_cookie;
	newtag->dt_mt = parent->dt_mt;

	newtag->dt_parent = parent;
	newtag->dt_alignment = alignment;
	newtag->dt_boundary = boundary;
	newtag->dt_lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
	newtag->dt_highaddr = trunc_page((vm_offset_t)highaddr) +
	    (PAGE_SIZE - 1);
	newtag->dt_filter = filter;
	newtag->dt_filterarg = filterarg;
	newtag->dt_maxsize = maxsize;
	newtag->dt_nsegments = nsegments;
	newtag->dt_maxsegsz = maxsegsz;
	newtag->dt_flags = flags;
	newtag->dt_ref_count = 1; /* Count ourselves */
	newtag->dt_map_count = 0;

	if (lockfunc != NULL) {
		newtag->dt_lockfunc = lockfunc;
		newtag->dt_lockfuncarg = lockfuncarg;
	} else {
		newtag->dt_lockfunc = dflt_lock;
		newtag->dt_lockfuncarg = NULL;
	}

	newtag->dt_segments = NULL;

	/* Take into account any restrictions imposed by our parent tag. */
	newtag->dt_lowaddr = ulmin(parent->dt_lowaddr, newtag->dt_lowaddr);
	newtag->dt_highaddr = ulmax(parent->dt_highaddr, newtag->dt_highaddr);
	if (newtag->dt_boundary == 0)
		newtag->dt_boundary = parent->dt_boundary;
	else if (parent->dt_boundary != 0)
		newtag->dt_boundary = ulmin(parent->dt_boundary,
		    newtag->dt_boundary);
	atomic_add_int(&parent->dt_ref_count, 1);

	if (newtag->dt_boundary > 0)
		newtag->dt_maxsegsz = ulmin(newtag->dt_maxsegsz,
		    newtag->dt_boundary);

	*dmat = newtag;
	return (0);
}
Beispiel #27
0
static void
iv_lazypmap(uintptr_t a, uintptr_t b)
{
	pmap_lazyfix_action();
	atomic_add_int(&smp_tlb_wait, 1);
}
Beispiel #28
0
int
fork1(struct thread *td, struct fork_req *fr)
{
	struct proc *p1, *newproc;
	struct thread *td2;
	struct vmspace *vm2;
	struct file *fp_procdesc;
	vm_ooffset_t mem_charged;
	int error, nprocs_new, ok;
	static int curfail;
	static struct timeval lastfail;
	int flags, pages;

	flags = fr->fr_flags;
	pages = fr->fr_pages;

	if ((flags & RFSTOPPED) != 0)
		MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
	else
		MPASS(fr->fr_procp == NULL);

	/* Check for the undefined or unimplemented flags. */
	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
		return (EINVAL);

	/* Signal value requires RFTSIGZMB. */
	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
		return (EINVAL);

	/* Can't copy and clear. */
	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
		return (EINVAL);

	/* Check the validity of the signal number. */
	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
		return (EINVAL);

	if ((flags & RFPROCDESC) != 0) {
		/* Can't not create a process yet get a process descriptor. */
		if ((flags & RFPROC) == 0)
			return (EINVAL);

		/* Must provide a place to put a procdesc if creating one. */
		if (fr->fr_pd_fd == NULL)
			return (EINVAL);

		/* Check if we are using supported flags. */
		if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
			return (EINVAL);
	}

	p1 = td->td_proc;

	/*
	 * Here we don't create a new process, but we divorce
	 * certain parts of a process from itself.
	 */
	if ((flags & RFPROC) == 0) {
		if (fr->fr_procp != NULL)
			*fr->fr_procp = NULL;
		else if (fr->fr_pidp != NULL)
			*fr->fr_pidp = 0;
		return (fork_norfproc(td, flags));
	}

	fp_procdesc = NULL;
	newproc = NULL;
	vm2 = NULL;

	/*
	 * Increment the nprocs resource before allocations occur.
	 * Although process entries are dynamically created, we still
	 * keep a global limit on the maximum number we will
	 * create. There are hard-limits as to the number of processes
	 * that can run, established by the KVA and memory usage for
	 * the process data.
	 *
	 * Don't allow a nonprivileged user to use the last ten
	 * processes; don't let root exceed the limit.
	 */
	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
	    PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) {
		error = EAGAIN;
		sx_xlock(&allproc_lock);
		if (ppsratecheck(&lastfail, &curfail, 1)) {
			printf("maxproc limit exceeded by uid %u (pid %d); "
			    "see tuning(7) and login.conf(5)\n",
			    td->td_ucred->cr_ruid, p1->p_pid);
		}
		sx_xunlock(&allproc_lock);
		goto fail2;
	}

	/*
	 * If required, create a process descriptor in the parent first; we
	 * will abandon it if something goes wrong. We don't finit() until
	 * later.
	 */
	if (flags & RFPROCDESC) {
		error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd,
		    fr->fr_pd_flags, fr->fr_pd_fcaps);
		if (error != 0)
			goto fail2;
	}

	mem_charged = 0;
	if (pages == 0)
		pages = kstack_pages;
	/* Allocate new proc. */
	newproc = uma_zalloc(proc_zone, M_WAITOK);
	td2 = FIRST_THREAD_IN_PROC(newproc);
	if (td2 == NULL) {
		td2 = thread_alloc(pages);
		if (td2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		proc_linkup(newproc, td2);
	} else {
		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
			if (td2->td_kstack != 0)
				vm_thread_dispose(td2);
			if (!thread_alloc_stack(td2, pages)) {
				error = ENOMEM;
				goto fail2;
			}
		}
	}

	if ((flags & RFMEM) == 0) {
		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
		if (vm2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		if (!swap_reserve(mem_charged)) {
			/*
			 * The swap reservation failed. The accounting
			 * from the entries of the copied vm2 will be
			 * subtracted in vmspace_free(), so force the
			 * reservation there.
			 */
			swap_reserve_force(mem_charged);
			error = ENOMEM;
			goto fail2;
		}
	} else
		vm2 = NULL;

	/*
	 * XXX: This is ugly; when we copy resource usage, we need to bump
	 *      per-cred resource counters.
	 */
	proc_set_cred_init(newproc, crhold(td->td_ucred));

	/*
	 * Initialize resource accounting for the child process.
	 */
	error = racct_proc_fork(p1, newproc);
	if (error != 0) {
		error = EAGAIN;
		goto fail1;
	}

#ifdef MAC
	mac_proc_init(newproc);
#endif
	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
	STAILQ_INIT(&newproc->p_ktr);

	/* We have to lock the process tree while we look for a pid. */
	sx_slock(&proctree_lock);
	sx_xlock(&allproc_lock);

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 *
	 * XXXRW: Can we avoid privilege here if it's not needed?
	 */
	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
	if (error == 0)
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
	else {
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
		    lim_cur(td, RLIMIT_NPROC));
	}
	if (ok) {
		do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
		return (0);
	}

	error = EAGAIN;
	sx_sunlock(&proctree_lock);
	sx_xunlock(&allproc_lock);
#ifdef MAC
	mac_proc_destroy(newproc);
#endif
	racct_proc_exit(newproc);
fail1:
	crfree(newproc->p_ucred);
	newproc->p_ucred = NULL;
fail2:
	if (vm2 != NULL)
		vmspace_free(vm2);
	uma_zfree(proc_zone, newproc);
	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
		fdclose(td, fp_procdesc, *fr->fr_pd_fd);
		fdrop(fp_procdesc, td);
	}
	atomic_add_int(&nprocs, -1);
	pause("fork", hz / 2);
	return (error);
}
Beispiel #29
0
int
sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
{
	struct sctp_inpcb *inp, *n_inp;
	struct sctp_tcb *stcb;
	uint32_t state;

	inp = (struct sctp_inpcb *)head->so_pcb;
	if (inp == NULL) {
		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
		return (EFAULT);
	}
	stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
	if (stcb == NULL) {
		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
		return (ENOTCONN);
	}
	state = SCTP_GET_STATE((&stcb->asoc));
	if ((state == SCTP_STATE_EMPTY) ||
	    (state == SCTP_STATE_INUSE)) {
		SCTP_TCB_UNLOCK(stcb);
		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
		return (ENOTCONN);
	}
	n_inp = (struct sctp_inpcb *)so->so_pcb;
	n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
	    SCTP_PCB_FLAGS_CONNECTED |
	    SCTP_PCB_FLAGS_IN_TCPPOOL |	/* Turn on Blocking IO */
	    (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
	n_inp->sctp_socket = so;
	n_inp->sctp_features = inp->sctp_features;
	n_inp->sctp_mobility_features = inp->sctp_mobility_features;
	n_inp->sctp_frag_point = inp->sctp_frag_point;
	n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off;
	n_inp->ecn_supported = inp->ecn_supported;
	n_inp->prsctp_supported = inp->prsctp_supported;
	n_inp->auth_supported = inp->auth_supported;
	n_inp->asconf_supported = inp->asconf_supported;
	n_inp->reconfig_supported = inp->reconfig_supported;
	n_inp->nrsack_supported = inp->nrsack_supported;
	n_inp->pktdrop_supported = inp->pktdrop_supported;
	n_inp->partial_delivery_point = inp->partial_delivery_point;
	n_inp->sctp_context = inp->sctp_context;
	n_inp->max_cwnd = inp->max_cwnd;
	n_inp->local_strreset_support = inp->local_strreset_support;
	n_inp->inp_starting_point_for_iterator = NULL;
	/* copy in the authentication parameters from the original endpoint */
	if (n_inp->sctp_ep.local_hmacs)
		sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
	n_inp->sctp_ep.local_hmacs =
	    sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
	if (n_inp->sctp_ep.local_auth_chunks)
		sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
	n_inp->sctp_ep.local_auth_chunks =
	    sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
	(void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
	    &n_inp->sctp_ep.shared_keys);
	/*
	 * Now we must move it from one hash table to another and get the
	 * stcb in the right place.
	 */
	sctp_move_pcb_and_assoc(inp, n_inp, stcb);
	atomic_add_int(&stcb->asoc.refcnt, 1);
	SCTP_TCB_UNLOCK(stcb);

	sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
	atomic_subtract_int(&stcb->asoc.refcnt, 1);

	return (0);
}
Beispiel #30
0
int
_pthread_create(pthread_t * thread, const pthread_attr_t * attr,
	       void *(*start_routine) (void *), void *arg)
{
	struct pthread *curthread, *new_thread;
	struct thr_param param;
	struct sched_param sched_param;
	struct rtprio rtp;
	sigset_t set, oset;
	cpuset_t *cpusetp;
	int i, cpusetsize, create_suspended, locked, old_stack_prot, ret;

	cpusetp = NULL;
	ret = cpusetsize = 0;
	_thr_check_init();

	/*
	 * Tell libc and others now they need lock to protect their data.
	 */
	if (_thr_isthreaded() == 0) {
		_malloc_first_thread();
		if (_thr_setthreaded(1))
			return (EAGAIN);
	}

	curthread = _get_curthread();
	if ((new_thread = _thr_alloc(curthread)) == NULL)
		return (EAGAIN);

	memset(&param, 0, sizeof(param));

	if (attr == NULL || *attr == NULL)
		/* Use the default thread attributes: */
		new_thread->attr = _pthread_attr_default;
	else {
		new_thread->attr = *(*attr);
		cpusetp = new_thread->attr.cpuset;
		cpusetsize = new_thread->attr.cpusetsize;
		new_thread->attr.cpuset = NULL;
		new_thread->attr.cpusetsize = 0;
	}
	if (new_thread->attr.sched_inherit == PTHREAD_INHERIT_SCHED) {
		/* inherit scheduling contention scope */
		if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
			new_thread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
		else
			new_thread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;

		new_thread->attr.prio = curthread->attr.prio;
		new_thread->attr.sched_policy = curthread->attr.sched_policy;
	}

	new_thread->tid = TID_TERMINATED;

	old_stack_prot = _rtld_get_stack_prot();
	if (create_stack(&new_thread->attr) != 0) {
		/* Insufficient memory to create a stack: */
		_thr_free(curthread, new_thread);
		return (EAGAIN);
	}
	/*
	 * Write a magic value to the thread structure
	 * to help identify valid ones:
	 */
	new_thread->magic = THR_MAGIC;
	new_thread->start_routine = start_routine;
	new_thread->arg = arg;
	new_thread->cancel_enable = 1;
	new_thread->cancel_async = 0;
	/* Initialize the mutex queue: */
	for (i = 0; i < TMQ_NITEMS; i++)
		TAILQ_INIT(&new_thread->mq[i]);

	/* Initialise hooks in the thread structure: */
	if (new_thread->attr.suspend == THR_CREATE_SUSPENDED) {
		new_thread->flags = THR_FLAGS_NEED_SUSPEND;
		create_suspended = 1;
	} else {
		create_suspended = 0;
	}

	new_thread->state = PS_RUNNING;

	if (new_thread->attr.flags & PTHREAD_CREATE_DETACHED)
		new_thread->flags |= THR_FLAGS_DETACHED;

	/* Add the new thread. */
	new_thread->refcount = 1;
	_thr_link(curthread, new_thread);

	/*
	 * Handle the race between __pthread_map_stacks_exec and
	 * thread linkage.
	 */
	if (old_stack_prot != _rtld_get_stack_prot())
		_thr_stack_fix_protection(new_thread);

	/* Return thread pointer eariler so that new thread can use it. */
	(*thread) = new_thread;
	if (SHOULD_REPORT_EVENT(curthread, TD_CREATE) || cpusetp != NULL) {
		THR_THREAD_LOCK(curthread, new_thread);
		locked = 1;
	} else
		locked = 0;
	param.start_func = (void (*)(void *)) thread_start;
	param.arg = new_thread;
	param.stack_base = new_thread->attr.stackaddr_attr;
	param.stack_size = new_thread->attr.stacksize_attr;
	param.tls_base = (char *)new_thread->tcb;
	param.tls_size = sizeof(struct tcb);
	param.child_tid = &new_thread->tid;
	param.parent_tid = &new_thread->tid;
	param.flags = 0;
	if (new_thread->attr.flags & PTHREAD_SCOPE_SYSTEM)
		param.flags |= THR_SYSTEM_SCOPE;
	if (new_thread->attr.sched_inherit == PTHREAD_INHERIT_SCHED)
		param.rtp = NULL;
	else {
		sched_param.sched_priority = new_thread->attr.prio;
		_schedparam_to_rtp(new_thread->attr.sched_policy,
			&sched_param, &rtp);
		param.rtp = &rtp;
	}

	/* Schedule the new thread. */
	if (create_suspended) {
		SIGFILLSET(set);
		SIGDELSET(set, SIGTRAP);
		__sys_sigprocmask(SIG_SETMASK, &set, &oset);
		new_thread->sigmask = oset;
		SIGDELSET(new_thread->sigmask, SIGCANCEL);
	}

	ret = thr_new(&param, sizeof(param));

	if (ret != 0) {
		ret = errno;
		/*
		 * Translate EPROCLIM into well-known POSIX code EAGAIN.
		 */
		if (ret == EPROCLIM)
			ret = EAGAIN;
	}

	if (create_suspended)
		__sys_sigprocmask(SIG_SETMASK, &oset, NULL);

	if (ret != 0) {
		if (!locked)
			THR_THREAD_LOCK(curthread, new_thread);
		new_thread->state = PS_DEAD;
		new_thread->tid = TID_TERMINATED;
		new_thread->flags |= THR_FLAGS_DETACHED;
		new_thread->refcount--;
		if (new_thread->flags & THR_FLAGS_NEED_SUSPEND) {
			new_thread->cycle++;
			_thr_umtx_wake(&new_thread->cycle, INT_MAX, 0);
		}
		_thr_try_gc(curthread, new_thread); /* thread lock released */
		atomic_add_int(&_thread_active_threads, -1);
	} else if (locked) {
		if (cpusetp != NULL) {
			if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID,
				TID(new_thread), cpusetsize, cpusetp)) {
				ret = errno;
				/* kill the new thread */
				new_thread->force_exit = 1;
				new_thread->flags |= THR_FLAGS_DETACHED;
				_thr_try_gc(curthread, new_thread);
				 /* thread lock released */
				goto out;
			}
		}

		_thr_report_creation(curthread, new_thread);
		THR_THREAD_UNLOCK(curthread, new_thread);
	}
out:
	if (ret)
		(*thread) = 0;
	return (ret);
}