static void nvd_strategy(struct bio *bp) { struct nvd_disk *ndisk; ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1; if (__predict_false(bp->bio_flags & BIO_ORDERED)) atomic_add_int(&ndisk->ordered_in_flight, 1); if (__predict_true(ndisk->ordered_in_flight == 0)) { nvd_bio_submit(ndisk, bp); return; } /* * There are ordered bios in flight, so we need to submit * bios through the task queue to enforce ordering. */ mtx_lock(&ndisk->bioqlock); bioq_insert_tail(&ndisk->bioq, bp); mtx_unlock(&ndisk->bioqlock); taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask); }
static void test_callout(void *arg) { struct callout_run *rn; int cpu; critical_enter(); cpu = curcpu; critical_exit(); rn = (struct callout_run *)arg; atomic_add_int(&rn->callout_waiting, 1); mtx_lock(&rn->lock); if (callout_pending(&rn->co_array[cpu]) || !callout_active(&rn->co_array[cpu])) { rn->co_return_npa++; atomic_subtract_int(&rn->callout_waiting, 1); mtx_unlock(&rn->lock); return; } callout_deactivate(&rn->co_array[cpu]); rn->co_completed++; mtx_unlock(&rn->lock); atomic_subtract_int(&rn->callout_waiting, 1); }
int main(void) { struct timespec ts, ts2; int error; long long count = 0; long long max; int j; int cpuno; int ncpu; int *done; size_t ncpu_size; done = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0); /* * How many cpu threads are there? */ ncpu = 0; ncpu_size = sizeof(ncpu); if (sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0) < 0) { perror("sysctl hw.ncpu"); exit(1); } printf("timing standard getuid() syscall, %d threads\n", ncpu); printf("if using powerd, run several times\n"); *done = 0; /* * Approximate timing run length */ start_timing(); while (stop_timing(0, NULL) == 0) { for (j = 0; j < 100; ++j) getuid(); count += 100; } max = count; /* * Run same length on all threads. */ for (cpuno = 0; cpuno < ncpu; ++cpuno) { if (fork() == 0) { /* * Give scheduler time to move threads around */ start_timing(); while (stop_timing(0, NULL) == 0) { for (j = 0; j < 100; ++j) getuid(); } /* * Actual timing test is here. */ start_timing(); for (count = 0; count < max; count += 100) { for (j = 0; j < 100; ++j) getuid(); } stop_timing(count, "getuid() sysmsg"); /* * Don't unbusy the cpu until the other threads are * done. */ atomic_add_int(done, 1); while (*done < ncpu) /* wait for other threads */ getuid(); exit(0); } } while (wait3(NULL, 0, NULL) > 0 || errno == EINTR) ; return 0; }
/* * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), * rendezvous again, execute the teardown function (if specified), and then * resume. * * Note that the supplied external functions _must_ be reentrant and aware * that they are running in parallel and in an unknown lock context. */ void smp_rendezvous_action(void) { struct thread *td; void *local_func_arg; void (*local_setup_func)(void*); void (*local_action_func)(void*); void (*local_teardown_func)(void*); #ifdef INVARIANTS int owepreempt; #endif /* Ensure we have up-to-date values. */ atomic_add_acq_int(&smp_rv_waiters[0], 1); while (smp_rv_waiters[0] < smp_rv_ncpus) cpu_spinwait(); /* Fetch rendezvous parameters after acquire barrier. */ local_func_arg = smp_rv_func_arg; local_setup_func = smp_rv_setup_func; local_action_func = smp_rv_action_func; local_teardown_func = smp_rv_teardown_func; /* * Use a nested critical section to prevent any preemptions * from occurring during a rendezvous action routine. * Specifically, if a rendezvous handler is invoked via an IPI * and the interrupted thread was in the critical_exit() * function after setting td_critnest to 0 but before * performing a deferred preemption, this routine can be * invoked with td_critnest set to 0 and td_owepreempt true. * In that case, a critical_exit() during the rendezvous * action would trigger a preemption which is not permitted in * a rendezvous action. To fix this, wrap all of the * rendezvous action handlers in a critical section. We * cannot use a regular critical section however as having * critical_exit() preempt from this routine would also be * problematic (the preemption must not occur before the IPI * has been acknowledged via an EOI). Instead, we * intentionally ignore td_owepreempt when leaving the * critical section. This should be harmless because we do * not permit rendezvous action routines to schedule threads, * and thus td_owepreempt should never transition from 0 to 1 * during this routine. */ td = curthread; td->td_critnest++; #ifdef INVARIANTS owepreempt = td->td_owepreempt; #endif /* * If requested, run a setup function before the main action * function. Ensure all CPUs have completed the setup * function before moving on to the action function. */ if (local_setup_func != smp_no_rendevous_barrier) { if (smp_rv_setup_func != NULL) smp_rv_setup_func(smp_rv_func_arg); atomic_add_int(&smp_rv_waiters[1], 1); while (smp_rv_waiters[1] < smp_rv_ncpus) cpu_spinwait(); } if (local_action_func != NULL) local_action_func(local_func_arg); if (local_teardown_func != smp_no_rendevous_barrier) { /* * Signal that the main action has been completed. If a * full exit rendezvous is requested, then all CPUs will * wait here until all CPUs have finished the main action. */ atomic_add_int(&smp_rv_waiters[2], 1); while (smp_rv_waiters[2] < smp_rv_ncpus) cpu_spinwait(); if (local_teardown_func != NULL) local_teardown_func(local_func_arg); } /* * Signal that the rendezvous is fully completed by this CPU. * This means that no member of smp_rv_* pseudo-structure will be * accessed by this target CPU after this point; in particular, * memory pointed by smp_rv_func_arg. */ atomic_add_int(&smp_rv_waiters[3], 1); td->td_critnest--; KASSERT(owepreempt == td->td_owepreempt, ("rendezvous action changed td_owepreempt")); }
void interrupt(unsigned long a0, unsigned long a1, unsigned long a2, struct trapframe *framep) { struct cpu_info *ci = curcpu(); extern int schedhz; switch (a0) { case ALPHA_INTR_XPROC: /* interprocessor interrupt */ #if defined(MULTIPROCESSOR) atomic_add_ulong(&ci->ci_intrdepth, 1); alpha_ipi_process(ci, framep); /* * Handle inter-console messages if we're the primary * CPU. */ if (ci->ci_cpuid == hwrpb->rpb_primary_cpu_id && hwrpb->rpb_txrdy != 0) cpu_iccb_receive(); atomic_sub_ulong(&ci->ci_intrdepth, 1); #else printf("WARNING: received interprocessor interrupt!\n"); #endif /* MULTIPROCESSOR */ break; case ALPHA_INTR_CLOCK: /* clock interrupt */ atomic_add_int(&uvmexp.intrs, 1); if (CPU_IS_PRIMARY(ci)) clk_count.ec_count++; if (platform.clockintr) { /* * Call hardclock(). This will also call * statclock(). On the primary CPU, it * will also deal with time-of-day stuff. */ (*platform.clockintr)((struct clockframe *)framep); /* * If it's time to call the scheduler clock, * do so. */ if ((++ci->ci_schedstate.spc_schedticks & 0x3f) == 0 && schedhz != 0) schedclock(ci->ci_curproc); } break; case ALPHA_INTR_ERROR: /* Machine Check or Correctable Error */ atomic_add_ulong(&ci->ci_intrdepth, 1); a0 = alpha_pal_rdmces(); if (platform.mcheck_handler) (*platform.mcheck_handler)(a0, framep, a1, a2); else machine_check(a0, framep, a1, a2); atomic_sub_ulong(&ci->ci_intrdepth, 1); break; case ALPHA_INTR_DEVICE: /* I/O device interrupt */ { struct scbvec *scb; KDASSERT(a1 >= SCB_IOVECBASE && a1 < SCB_SIZE); atomic_add_ulong(&ci->ci_intrdepth, 1); atomic_add_int(&uvmexp.intrs, 1); scb = &scb_iovectab[SCB_VECTOIDX(a1 - SCB_IOVECBASE)]; (*scb->scb_func)(scb->scb_arg, a1); atomic_sub_ulong(&ci->ci_intrdepth, 1); break; } case ALPHA_INTR_PERF: /* performance counter interrupt */ printf("WARNING: received performance counter interrupt!\n"); break; case ALPHA_INTR_PASSIVE: #if 0 printf("WARNING: received passive release interrupt vec " "0x%lx\n", a1); #endif break; default: printf("unexpected interrupt: type 0x%lx vec 0x%lx " "a2 0x%lx" #if defined(MULTIPROCESSOR) " cpu %lu" #endif "\n", a0, a1, a2 #if defined(MULTIPROCESSOR) , ci->ci_cpuid #endif ); panic("interrupt"); /* NOTREACHED */ } }
/* * malloc: * * Allocate a block of memory. * * If M_NOWAIT is set, this routine will not block and return NULL if * the allocation fails. */ void * malloc(unsigned long size, struct malloc_type *mtp, int flags) { int indx; struct malloc_type_internal *mtip; caddr_t va; uma_zone_t zone; #if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE) unsigned long osize = size; #endif #ifdef INVARIANTS KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic")); /* * Check that exactly one of M_WAITOK or M_NOWAIT is specified. */ indx = flags & (M_WAITOK | M_NOWAIT); if (indx != M_NOWAIT && indx != M_WAITOK) { static struct timeval lasterr; static int curerr, once; if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) { printf("Bad malloc flags: %x\n", indx); kdb_backtrace(); flags |= M_WAITOK; once++; } } #endif #ifdef MALLOC_MAKE_FAILURES if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) { atomic_add_int(&malloc_nowait_count, 1); if ((malloc_nowait_count % malloc_failure_rate) == 0) { atomic_add_int(&malloc_failure_count, 1); t_malloc_fail = time_uptime; return (NULL); } } #endif if (flags & M_WAITOK) KASSERT(curthread->td_intr_nesting_level == 0, ("malloc(M_WAITOK) in interrupt context")); #ifdef DEBUG_MEMGUARD if (memguard_cmp_mtp(mtp, size)) { va = memguard_alloc(size, flags); if (va != NULL) return (va); /* This is unfortunate but should not be fatal. */ } #endif #ifdef DEBUG_REDZONE size = redzone_size_ntor(size); #endif if (size <= kmem_zmax) { mtip = mtp->ks_handle; if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; KASSERT(mtip->mti_zone < numzones, ("mti_zone %u out of range %d", mtip->mti_zone, numzones)); zone = kmemzones[indx].kz_zone[mtip->mti_zone]; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif va = uma_zalloc(zone, flags); if (va != NULL) size = zone->uz_size; malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else {
struct socket * sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error) { struct socket *newso; struct sctp_inpcb *inp, *n_inp; struct sctp_tcb *stcb; SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n"); inp = (struct sctp_inpcb *)head->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT); *error = EFAULT; return (NULL); } stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); *error = ENOTCONN; return (NULL); } atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); newso = sonewconn(head, SS_ISCONNECTED ); if (newso == NULL) { SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n"); SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM); *error = ENOMEM; atomic_subtract_int(&stcb->asoc.refcnt, 1); return (NULL); } SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); n_inp = (struct sctp_inpcb *)newso->so_pcb; SOCK_LOCK(head); n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_CONNECTED | SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */ (SCTP_PCB_COPY_FLAGS & inp->sctp_flags)); n_inp->sctp_features = inp->sctp_features; n_inp->sctp_frag_point = inp->sctp_frag_point; n_inp->partial_delivery_point = inp->partial_delivery_point; n_inp->sctp_context = inp->sctp_context; n_inp->inp_starting_point_for_iterator = NULL; /* copy in the authentication parameters from the original endpoint */ if (n_inp->sctp_ep.local_hmacs) sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs); n_inp->sctp_ep.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs); if (n_inp->sctp_ep.local_auth_chunks) sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks); n_inp->sctp_ep.local_auth_chunks = sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks); (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys, &n_inp->sctp_ep.shared_keys); n_inp->sctp_socket = newso; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) { sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE); n_inp->sctp_ep.auto_close_time = 0; sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1); } /* Turn off any non-blocking semantic. */ SCTP_CLEAR_SO_NBIO(newso); newso->so_state |= SS_ISCONNECTED; /* We remove it right away */ #ifdef SCTP_LOCK_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) { sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK); } #endif TAILQ_REMOVE(&head->so_comp, newso, so_list); head->so_qlen--; SOCK_UNLOCK(head); /* * Now we must move it from one hash table to another and get the * stcb in the right place. */ sctp_move_pcb_and_assoc(inp, n_inp, stcb); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); /* * And now the final hack. We move data in the pending side i.e. * head to the new socket buffer. Let the GRUBBING begin :-0 */ sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (newso); }
/* * Handle an exception. * In the case of a kernel trap, we return the pc where to resume if * pcb_onfault is set, otherwise, return old pc. */ void trap(struct trap_frame *trapframe) { struct cpu_info *ci = curcpu(); struct proc *p = ci->ci_curproc; int type; type = (trapframe->cause & CR_EXC_CODE) >> CR_EXC_CODE_SHIFT; #if defined(CPU_R8000) && !defined(DEBUG_INTERRUPT) if (type != T_INT) #endif trapdebug_enter(ci, trapframe, -1); #ifdef CPU_R8000 if (type != T_INT && type != T_SYSCALL) #else if (type != T_SYSCALL) #endif atomic_add_int(&uvmexp.traps, 1); if (USERMODE(trapframe->sr)) { type |= T_USER; refreshcreds(p); } /* * Enable hardware interrupts if they were on before the trap; * enable IPI interrupts only otherwise. */ switch (type) { #ifdef CPU_R8000 case T_INT: case T_INT | T_USER: #endif case T_BREAK: break; default: if (ISSET(trapframe->sr, SR_INT_ENAB)) enableintr(); else { #ifdef MULTIPROCESSOR ENABLEIPI(); #endif } break; } #ifdef CPU_R8000 /* * Some exception causes on R8000 are actually detected by external * circuitry, and as such are reported as external interrupts. * On R8000 kernels, external interrupts vector to trap() instead of * interrupt(), so that we can process these particular exceptions * as if they were triggered as regular exceptions. */ if ((type & ~T_USER) == T_INT) { /* * Similar reality check as done in interrupt(), in case * an interrupt occured between a write to COP_0_STATUS_REG * and it taking effect. */ if (!ISSET(trapframe->sr, SR_INT_ENAB)) return; if (trapframe->cause & CR_VCE) { #ifndef DEBUG_INTERRUPT trapdebug_enter(ci, trapframe, -1); #endif panic("VCE or TLBX"); } if (trapframe->cause & CR_FPE) { #ifndef DEBUG_INTERRUPT trapdebug_enter(ci, trapframe, -1); #endif itsa(trapframe, ci, p, T_FPE | (type & T_USER)); cp0_reset_cause(CR_FPE); } if (trapframe->cause & CR_INT_MASK) interrupt(trapframe); return; /* no userret */ } else #endif itsa(trapframe, ci, p, type); if (type & T_USER) userret(p); }
/* * Attempt to build up a hash table for the directory contents in * inode 'ip'. Returns 0 on success, or -1 of the operation failed. */ int ufsdirhash_build(struct inode *ip) { struct dirhash *dh; struct buf *bp = NULL; struct direct *ep; struct vnode *vp; doff_t bmask, pos; int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; const int needswap = UFS_MPNEEDSWAP(ip->i_ump); int dirblksiz = ip->i_ump->um_dirblksiz; /* Check if we can/should use dirhash. */ if (ip->i_dirhash == NULL) { if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip)) return (-1); } else { /* Hash exists, but sysctls could have changed. */ if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || ufs_dirhashmem > ufs_dirhashmaxmem) { ufsdirhash_free(ip); return (-1); } /* Check if hash exists and is intact (note: unlocked read). */ if (ip->i_dirhash->dh_hash != NULL) return (0); /* Free the old, recycled hash and build a new one. */ ufsdirhash_free(ip); } /* Don't hash removed directories. */ if (ip->i_nlink == 0) return (-1); vp = ip->i_vnode; /* Allocate 50% more entries than this dir size could ever need. */ KASSERT(ip->i_size >= dirblksiz); nslots = ip->i_size / UFS_DIRECTSIZ(1); nslots = (nslots * 3 + 1) / 2; narrays = howmany(nslots, DH_NBLKOFF); nslots = narrays * DH_NBLKOFF; dirblocks = howmany(ip->i_size, dirblksiz); nblocks = (dirblocks * 3 + 1) / 2; memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + nblocks * sizeof(*dh->dh_blkfree); while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) > ufs_dirhashmaxmem) { atomic_add_int(&ufs_dirhashmem, -memreqd); if (memreqd > ufs_dirhashmaxmem / 2) return (-1); /* Try to free some space. */ if (ufsdirhash_recycle(memreqd) != 0) return (-1); else DIRHASHLIST_UNLOCK(); } /* * Use non-blocking mallocs so that we will revert to a linear * lookup on failure rather than potentially blocking forever. */ dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT); if (dh == NULL) { atomic_add_int(&ufs_dirhashmem, -memreqd); return (-1); } memset(dh, 0, sizeof(*dh)); mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE); DIRHASH_LOCK(dh); dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]); dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP); dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]); dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP); if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) goto fail; for (i = 0; i < narrays; i++) { if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL) goto fail; for (j = 0; j < DH_NBLKOFF; j++) dh->dh_hash[i][j] = DIRHASH_EMPTY; } /* Initialise the hash table and block statistics. */ dh->dh_narrays = narrays; dh->dh_hlen = nslots; dh->dh_nblk = nblocks; dh->dh_dirblks = dirblocks; for (i = 0; i < dirblocks; i++) dh->dh_blkfree[i] = dirblksiz / DIRALIGN; for (i = 0; i < DH_NFSTATS; i++) dh->dh_firstfree[i] = -1; dh->dh_firstfree[DH_NFSTATS] = 0; dh->dh_seqopt = 0; dh->dh_seqoff = 0; dh->dh_score = DH_SCOREINIT; ip->i_dirhash = dh; bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; pos = 0; while (pos < ip->i_size) { if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { preempt(); } /* If necessary, get the next directory block. */ if ((pos & bmask) == 0) { if (bp != NULL) brelse(bp, 0); if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0) goto fail; } /* Add this entry to the hash. */ ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); if (ep->d_reclen == 0 || ep->d_reclen > dirblksiz - (pos & (dirblksiz - 1))) { /* Corrupted directory. */ brelse(bp, 0); goto fail; } if (ep->d_ino != 0) { /* Add the entry (simplified ufsdirhash_add). */ slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) slot = WRAPINCR(slot, dh->dh_hlen); dh->dh_hused++; DH_ENTRY(dh, slot) = pos; ufsdirhash_adjfree(dh, pos, -UFS_DIRSIZ(0, ep, needswap), dirblksiz); } pos += ep->d_reclen; } if (bp != NULL) brelse(bp, 0); DIRHASHLIST_LOCK(); TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); dh->dh_onlist = 1; DIRHASH_UNLOCK(dh); DIRHASHLIST_UNLOCK(); return (0); fail: DIRHASH_UNLOCK(dh); if (dh->dh_hash != NULL) { for (i = 0; i < narrays; i++) if (dh->dh_hash[i] != NULL) DIRHASH_BLKFREE(dh->dh_hash[i]); kmem_free(dh->dh_hash, dh->dh_hashsz); } if (dh->dh_blkfree != NULL) kmem_free(dh->dh_blkfree, dh->dh_blkfreesz); mutex_destroy(&dh->dh_lock); pool_cache_put(ufsdirhash_cache, dh); ip->i_dirhash = NULL; atomic_add_int(&ufs_dirhashmem, -memreqd); return (-1); }
void interrupt(u_int64_t vector, struct trapframe *framep) { struct thread *td; volatile struct ia64_interrupt_block *ib = IA64_INTERRUPT_BLOCK; td = curthread; atomic_add_int(&td->td_intr_nesting_level, 1); /* * Handle ExtINT interrupts by generating an INTA cycle to * read the vector. */ if (vector == 0) { vector = ib->ib_inta; printf("ExtINT interrupt: vector=%ld\n", vector); } if (vector == 255) {/* clock interrupt */ /* CTR0(KTR_INTR, "clock interrupt"); */ cnt.v_intr++; #ifdef EVCNT_COUNTERS clock_intr_evcnt.ev_count++; #else intrcnt[INTRCNT_CLOCK]++; #endif critical_enter(); #ifdef SMP clks[PCPU_GET(cpuid)]++; /* Only the BSP runs the real clock */ if (PCPU_GET(cpuid) == 0) { #endif handleclock(framep); /* divide hz (1024) by 8 to get stathz (128) */ if ((++schedclk2 & 0x7) == 0) statclock((struct clockframe *)framep); #ifdef SMP } else { ia64_set_itm(ia64_get_itc() + itm_reload); mtx_lock_spin(&sched_lock); hardclock_process(curthread, TRAPF_USERMODE(framep)); if ((schedclk2 & 0x7) == 0) statclock_process(curkse, TRAPF_PC(framep), TRAPF_USERMODE(framep)); mtx_unlock_spin(&sched_lock); } #endif critical_exit(); #ifdef SMP } else if (vector == ipi_vector[IPI_AST]) { asts[PCPU_GET(cpuid)]++; CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid)); } else if (vector == ipi_vector[IPI_RENDEZVOUS]) { rdvs[PCPU_GET(cpuid)]++; CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid)); smp_rendezvous_action(); } else if (vector == ipi_vector[IPI_STOP]) { u_int32_t mybit = PCPU_GET(cpumask); CTR1(KTR_SMP, "IPI_STOP, cpuid=%d", PCPU_GET(cpuid)); savectx(PCPU_GET(pcb)); stopped_cpus |= mybit; while ((started_cpus & mybit) == 0) /* spin */; started_cpus &= ~mybit; stopped_cpus &= ~mybit; if (PCPU_GET(cpuid) == 0 && cpustop_restartfunc != NULL) { void (*f)(void) = cpustop_restartfunc; cpustop_restartfunc = NULL; (*f)(); } } else if (vector == ipi_vector[IPI_TEST]) { CTR1(KTR_SMP, "IPI_TEST, cpuid=%d", PCPU_GET(cpuid)); mp_ipi_test++; #endif } else { ints[PCPU_GET(cpuid)]++; ia64_dispatch_intr(framep, vector); } atomic_subtract_int(&td->td_intr_nesting_level, 1); }
/* * Try to reuse a vnode from the free list. This function is somewhat * advisory in that NULL can be returned as a normal case, even if free * vnodes are present. * * The scan is limited because it can result in excessive CPU use during * periods of extreme vnode use. * * NOTE: The returned vnode is not completely initialized. */ static struct vnode * cleanfreevnode(int maxcount) { struct vnode *vp; int count; int trigger = (long)vmstats.v_page_count / (activevnodes * 2 + 1); /* * Try to deactivate some vnodes cached on the active list. */ if (countcachedvnodes(0) < inactivevnodes) goto skip; for (count = 0; count < maxcount * 2; count++) { spin_lock(&vfs_spin); vp = TAILQ_NEXT(&vnode_active_rover, v_list); TAILQ_REMOVE(&vnode_active_list, &vnode_active_rover, v_list); if (vp == NULL) { TAILQ_INSERT_HEAD(&vnode_active_list, &vnode_active_rover, v_list); } else { TAILQ_INSERT_AFTER(&vnode_active_list, vp, &vnode_active_rover, v_list); } if (vp == NULL) { spin_unlock(&vfs_spin); continue; } if ((vp->v_refcnt & VREF_MASK) != 0) { spin_unlock(&vfs_spin); vp->v_act += VACT_INC; if (vp->v_act > VACT_MAX) /* SMP race ok */ vp->v_act = VACT_MAX; continue; } /* * decrement by less if the vnode's object has a lot of * VM pages. XXX possible SMP races. */ if (vp->v_act > 0) { vm_object_t obj; if ((obj = vp->v_object) != NULL && obj->resident_page_count >= trigger) { vp->v_act -= 1; } else { vp->v_act -= VACT_INC; } if (vp->v_act < 0) vp->v_act = 0; spin_unlock(&vfs_spin); continue; } /* * Try to deactivate the vnode. */ if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) atomic_add_int(&mycpu->gd_cachedvnodes, -1); atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); spin_unlock(&vfs_spin); vrele(vp); } skip: /* * Loop trying to lock the first vnode on the free list. * Cycle if we can't. */ for (count = 0; count < maxcount; count++) { spin_lock(&vfs_spin); vp = TAILQ_FIRST(&vnode_inactive_list); if (vp == NULL) { spin_unlock(&vfs_spin); break; } /* * non-blocking vx_get will also ref the vnode on success. */ if (vx_get_nonblock(vp)) { KKASSERT(vp->v_state == VS_INACTIVE); TAILQ_REMOVE(&vnode_inactive_list, vp, v_list); TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list); spin_unlock(&vfs_spin); continue; } /* * Because we are holding vfs_spin the vnode should currently * be inactive and VREF_TERMINATE should still be set. * * Once vfs_spin is released the vnode's state should remain * unmodified due to both the lock and ref on it. */ KKASSERT(vp->v_state == VS_INACTIVE); spin_unlock(&vfs_spin); #ifdef TRACKVNODE if ((u_long)vp == trackvnode) kprintf("cleanfreevnode %p %08x\n", vp, vp->v_flag); #endif /* * Do not reclaim/reuse a vnode while auxillary refs exists. * This includes namecache refs due to a related ncp being * locked or having children, a VM object association, or * other hold users. * * Do not reclaim/reuse a vnode if someone else has a real * ref on it. This can occur if a filesystem temporarily * releases the vnode lock during VOP_RECLAIM. */ if (vp->v_auxrefs || (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) { failed: if (vp->v_state == VS_INACTIVE) { spin_lock(&vfs_spin); if (vp->v_state == VS_INACTIVE) { TAILQ_REMOVE(&vnode_inactive_list, vp, v_list); TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list); } spin_unlock(&vfs_spin); } vx_put(vp); continue; } /* * VINACTIVE and VREF_TERMINATE are expected to both be set * for vnodes pulled from the inactive list, and cannot be * changed while we hold the vx lock. * * Try to reclaim the vnode. */ KKASSERT(vp->v_flag & VINACTIVE); KKASSERT(vp->v_refcnt & VREF_TERMINATE); if ((vp->v_flag & VRECLAIMED) == 0) { if (cache_inval_vp_nonblock(vp)) goto failed; vgone_vxlocked(vp); /* vnode is still VX locked */ } /* * At this point if there are no other refs or auxrefs on * the vnode with the inactive list locked, and we remove * the vnode from the inactive list, it should not be * possible for anyone else to access the vnode any more. * * Since the vnode is in a VRECLAIMED state, no new * namecache associations could have been made and the * vnode should have already been removed from its mountlist. * * Since we hold a VX lock on the vnode it cannot have been * reactivated (moved out of the inactive list). */ KKASSERT(TAILQ_EMPTY(&vp->v_namecache)); spin_lock(&vfs_spin); if (vp->v_auxrefs || (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) { spin_unlock(&vfs_spin); goto failed; } KKASSERT(vp->v_state == VS_INACTIVE); TAILQ_REMOVE(&vnode_inactive_list, vp, v_list); --inactivevnodes; vp->v_state = VS_DYING; spin_unlock(&vfs_spin); /* * Nothing should have been able to access this vp. Only * our ref should remain now. */ atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE); KASSERT(vp->v_refcnt == 1, ("vp %p badrefs %08x", vp, vp->v_refcnt)); /* * Return a VX locked vnode suitable for reuse. */ return(vp); } return(NULL); }
/**************************************************************** * VNODE ACQUISITION FUNCTIONS * **************************************************************** * * These functions must be used when accessing a vnode that has no * chance of being destroyed in a SMP race. That means the caller will * usually either hold an auxiliary reference (such as the namecache) * or hold some other lock that ensures that the vnode cannot be destroyed. * * These functions are MANDATORY for any code chain accessing a vnode * whos activation state is not known. * * vget() can be called with LK_NOWAIT and will return EBUSY if the * lock cannot be immediately acquired. * * vget()/vput() are used when reactivation is desired. * * vx_get() and vx_put() are used when reactivation is not desired. */ int vget(struct vnode *vp, int flags) { int error; /* * A lock type must be passed */ if ((flags & LK_TYPE_MASK) == 0) { panic("vget() called with no lock specified!"); /* NOT REACHED */ } /* * Reference the structure and then acquire the lock. * * NOTE: The requested lock might be a shared lock and does * not protect our access to the refcnt or other fields. */ if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0) atomic_add_int(&mycpu->gd_cachedvnodes, -1); if ((error = vn_lock(vp, flags | LK_FAILRECLAIM)) != 0) { /* * The lock failed, undo and return an error. This will not * normally trigger a termination. */ vrele(vp); } else if (vp->v_flag & VRECLAIMED) { /* * The node is being reclaimed and cannot be reactivated * any more, undo and return ENOENT. */ vn_unlock(vp); vrele(vp); error = ENOENT; } else if (vp->v_state == VS_ACTIVE) { /* * A VS_ACTIVE vnode coupled with the fact that we have * a vnode lock (even if shared) prevents v_state from * changing. Since the vnode is not in a VRECLAIMED state, * we can safely clear VINACTIVE. * * NOTE! Multiple threads may clear VINACTIVE if this is * shared lock. This race is allowed. */ _vclrflags(vp, VINACTIVE); /* SMP race ok */ vp->v_act += VACT_INC; if (vp->v_act > VACT_MAX) /* SMP race ok */ vp->v_act = VACT_MAX; error = 0; } else { /* * If the vnode is not VS_ACTIVE it must be reactivated * in addition to clearing VINACTIVE. An exclusive spin_lock * is needed to manipulate the vnode's list. * * Because the lockmgr lock might be shared, we might race * another reactivation, which we handle. In this situation, * however, the refcnt prevents other v_state races. * * As with above, clearing VINACTIVE is allowed to race other * clearings of VINACTIVE. * * VREF_TERMINATE and VREF_FINALIZE can only be cleared when * the refcnt is non-zero and the vnode has not been * reclaimed. This also means that the transitions do * not affect cachedvnodes. */ _vclrflags(vp, VINACTIVE); vp->v_act += VACT_INC; if (vp->v_act > VACT_MAX) /* SMP race ok */ vp->v_act = VACT_MAX; spin_lock(&vp->v_spin); switch(vp->v_state) { case VS_INACTIVE: _vactivate(vp); atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE | VREF_FINALIZE); spin_unlock(&vp->v_spin); break; case VS_CACHED: _vactivate(vp); atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE | VREF_FINALIZE); spin_unlock(&vp->v_spin); break; case VS_ACTIVE: atomic_clear_int(&vp->v_refcnt, VREF_FINALIZE); spin_unlock(&vp->v_spin); break; case VS_DYING: spin_unlock(&vp->v_spin); panic("Impossible VS_DYING state"); break; } error = 0; } return(error); }
/* * Remove an auxiliary reference from the vnode. */ void vdrop(struct vnode *vp) { atomic_add_int(&vp->v_auxrefs, -1); }
/* * Add an auxiliary data structure reference to the vnode. Auxiliary * references do not change the state of the vnode or prevent deactivation * or reclamation of the vnode, but will prevent the vnode from being * destroyed (kfree()'d). * * WARNING! vhold() must not acquire v_spin. The spinlock may or may not * already be held by the caller. vdrop() will clean up the * free list state. */ void vhold(struct vnode *vp) { atomic_add_int(&vp->v_auxrefs, 1); }
debuglockmgr(struct lock *lkp, u_int flags, const char *name, const char *file, int line) #endif { thread_t td; thread_t otd; int error; int extflags; int count; int pflags; int wflags; int timo; #ifdef DEBUG_LOCKS int i; #endif error = 0; if (mycpu->gd_intr_nesting_level && (flags & LK_NOWAIT) == 0 && (flags & LK_TYPE_MASK) != LK_RELEASE && panic_cpu_gd != mycpu ) { #ifndef DEBUG_LOCKS panic("lockmgr %s from %p: called from interrupt, ipi, " "or hard code section", lkp->lk_wmesg, ((int **)&lkp)[-1]); #else panic("lockmgr %s from %s:%d: called from interrupt, ipi, " "or hard code section", lkp->lk_wmesg, file, line); #endif } #ifdef DEBUG_LOCKS if (mycpu->gd_spinlocks && ((flags & LK_NOWAIT) == 0)) { panic("lockmgr %s from %s:%d: called with %d spinlocks held", lkp->lk_wmesg, file, line, mycpu->gd_spinlocks); } #endif extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; td = curthread; again: count = lkp->lk_count; cpu_ccfence(); switch (flags & LK_TYPE_MASK) { case LK_SHARED: /* * Shared lock critical path case */ if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) { if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) { COUNT(td, 1); break; } goto again; } /* * If the caller already holds the lock exclusively then * we silently obtain another count on the exclusive lock. * * WARNING! The old FreeBSD behavior was to downgrade, * but this creates a problem when recursions * return to the caller and the caller expects * its original exclusive lock to remain exclusively * locked. */ if (lkp->lk_lockholder == td) { KKASSERT(count & LKC_EXCL); if ((extflags & LK_CANRECURSE) == 0) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } panic("lockmgr: locking against myself"); } atomic_add_int(&lkp->lk_count, 1); COUNT(td, 1); break; } /* * Slow path */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; wflags = (td->td_flags & TDF_DEADLKTREAT) ? LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ); /* * Block while the lock is held exclusively or, conditionally, * if other threads are tring to obtain an exclusive lock or * upgrade to one. */ if (count & wflags) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } tsleep_interlock(lkp, pflags); if (!atomic_cmpset_int(&lkp->lk_count, count, count | LKC_SHREQ)) { goto again; } mycpu->gd_cnt.v_lock_name[0] = 'S'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } goto again; } /* * Otherwise we can bump the count */ if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) { COUNT(td, 1); break; } goto again; case LK_EXCLUSIVE: /* * Exclusive lock critical path. */ if (count == 0) { if (atomic_cmpset_int(&lkp->lk_count, count, LKC_EXCL | (count + 1))) { lkp->lk_lockholder = td; COUNT(td, 1); break; } goto again; } /* * Recursive lock if we already hold it exclusively. */ if (lkp->lk_lockholder == td) { KKASSERT(count & LKC_EXCL); if ((extflags & LK_CANRECURSE) == 0) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } panic("lockmgr: locking against myself"); } atomic_add_int(&lkp->lk_count, 1); COUNT(td, 1); break; } /* * We will block, handle LK_NOWAIT */ if (extflags & LK_NOWAIT) { error = EBUSY; break; } /* * Wait until we can obtain the exclusive lock. EXREQ is * automatically cleared when all current holders release * so if we abort the operation we can safely leave it set. * There might be other exclusive requesters. */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); if (!atomic_cmpset_int(&lkp->lk_count, count, count | LKC_EXREQ)) { goto again; } mycpu->gd_cnt.v_lock_name[0] = 'X'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } goto again; case LK_DOWNGRADE: /* * Downgrade an exclusive lock into a shared lock. All * counts on a recursive exclusive lock become shared. * * This function always succeeds. */ if (lkp->lk_lockholder != td || (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) { panic("lockmgr: not holding exclusive lock"); } #ifdef DEBUG_LOCKS for (i = 0; i < LOCKMGR_DEBUG_ARRAY_SIZE; i++) { if (td->td_lockmgr_stack[i] == lkp && td->td_lockmgr_stack_id[i] > 0 ) { td->td_lockmgr_stack_id[i]--; break; } } #endif /* * NOTE! Must NULL-out lockholder before releasing LKC_EXCL. */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (atomic_cmpset_int(&lkp->lk_count, count, count & ~(LKC_EXCL|LKC_SHREQ))) { if (count & LKC_SHREQ) wakeup(lkp); break; } lkp->lk_lockholder = otd; goto again; case LK_EXCLUPGRADE: /* * Upgrade from a single shared lock to an exclusive lock. * * If another process is ahead of us to get an upgrade, * then we want to fail rather than have an intervening * exclusive access. The shared lock is released on * failure. */ if (count & LKC_UPREQ) { flags = LK_RELEASE; error = EBUSY; goto again; } /* fall through into normal upgrade */ case LK_UPGRADE: /* * Upgrade a shared lock to an exclusive one. This can cause * the lock to be temporarily released and stolen by other * threads. LK_SLEEPFAIL or LK_NOWAIT may be used to detect * this case, or use LK_EXCLUPGRADE. * * If the lock is already exclusively owned by us, this * operation is a NOP. * * If we return an error (even NOWAIT), the current lock will * be released. * * Start with the critical path. */ if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) { if (atomic_cmpset_int(&lkp->lk_count, count, count | LKC_EXCL)) { lkp->lk_lockholder = td; break; } goto again; } /* * If we already hold the lock exclusively this operation * succeeds and is a NOP. */ if (count & LKC_EXCL) { if (lkp->lk_lockholder == td) break; panic("lockmgr: upgrade unowned lock"); } if ((count & LKC_MASK) == 0) panic("lockmgr: upgrade unowned lock"); /* * We cannot upgrade without blocking at this point. */ if (extflags & LK_NOWAIT) { flags = LK_RELEASE; error = EBUSY; goto again; } /* * Release the shared lock and request the upgrade. */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ; /* * If someone else owns UPREQ and this transition would * allow it to be granted, we have to grant it. Otherwise * we release the shared lock. */ if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { wflags |= LKC_EXCL | LKC_UPGRANT; wflags |= count; wflags &= ~LKC_UPREQ; } else { wflags |= (count - 1); } if (atomic_cmpset_int(&lkp->lk_count, count, wflags)) { COUNT(td, -1); /* * Must wakeup the thread granted the upgrade. */ if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) wakeup(lkp); mycpu->gd_cnt.v_lock_name[0] = 'U'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } /* * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE, * depending on whether we were able to acquire the * LKC_UPREQ bit. */ if (count & LKC_UPREQ) flags = LK_EXCLUSIVE; /* someone else */ else flags = LK_WAITUPGRADE; /* we own the bit */ } goto again; case LK_WAITUPGRADE: /* * We own the LKC_UPREQ bit, wait until we are granted the * exclusive lock (LKC_UPGRANT is set). * * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL), * we have to undo the upgrade request and clean up any lock * that might have been granted via a race. */ if (count & LKC_UPGRANT) { if (atomic_cmpset_int(&lkp->lk_count, count, count & ~LKC_UPGRANT)) { lkp->lk_lockholder = td; KKASSERT(count & LKC_EXCL); break; } /* retry */ } else { pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); if (atomic_cmpset_int(&lkp->lk_count, count, count)) { mycpu->gd_cnt.v_lock_name[0] = 'U'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) { undo_upreq(lkp); break; } if (extflags & LK_SLEEPFAIL) { error = ENOLCK; undo_upreq(lkp); break; } } /* retry */ } goto again; case LK_RELEASE: /* * Release the currently held lock. If releasing the current * lock as part of an error return, error will ALREADY be * non-zero. * * When releasing the last lock we automatically transition * LKC_UPREQ to LKC_EXCL|1. * * WARNING! We cannot detect when there are multiple exclusive * requests pending. We clear EXREQ unconditionally * on the 1->0 transition so it is possible for * shared requests to race the next exclusive * request. * * Always succeeds. */ if ((count & LKC_MASK) == 0) panic("lockmgr: LK_RELEASE: no lock held"); if (count & LKC_EXCL) { if (lkp->lk_lockholder != LK_KERNTHREAD && lkp->lk_lockholder != td) { panic("lockmgr: pid %d, not exlusive " "lock holder thr %p/%p unlocking", (td->td_proc ? td->td_proc->p_pid : -1), td, lkp->lk_lockholder); } if ((count & (LKC_UPREQ|LKC_MASK)) == 1) { /* * Last exclusive count is being released */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (!atomic_cmpset_int(&lkp->lk_count, count, (count - 1) & ~(LKC_EXCL|LKC_EXREQ|LKC_SHREQ))) { lkp->lk_lockholder = otd; goto again; } if (count & (LKC_EXREQ|LKC_SHREQ)) wakeup(lkp); /* success */ } else if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { /* * Last exclusive count is being released but * an upgrade request is present, automatically * grant an exclusive state to the owner of * the upgrade request. */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (!atomic_cmpset_int(&lkp->lk_count, count, (count & ~LKC_UPREQ) | LKC_UPGRANT)) { lkp->lk_lockholder = otd; } wakeup(lkp); /* success */ } else { otd = lkp->lk_lockholder; if (!atomic_cmpset_int(&lkp->lk_count, count, count - 1)) { goto again; } /* success */ } /* success */ if (otd != LK_KERNTHREAD) COUNT(td, -1); } else { if ((count & (LKC_UPREQ|LKC_MASK)) == 1) { /* * Last shared count is being released. */ if (!atomic_cmpset_int(&lkp->lk_count, count, (count - 1) & ~(LKC_EXREQ|LKC_SHREQ))) { goto again; } if (count & (LKC_EXREQ|LKC_SHREQ)) wakeup(lkp); /* success */ } else if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { /* * Last shared count is being released but * an upgrade request is present, automatically * grant an exclusive state to the owner of * the upgrade request. */ if (!atomic_cmpset_int(&lkp->lk_count, count, (count & ~LKC_UPREQ) | LKC_EXCL | LKC_UPGRANT)) { goto again; } wakeup(lkp); } else { if (!atomic_cmpset_int(&lkp->lk_count, count, count - 1)) { goto again; } } /* success */ COUNT(td, -1); } break; default: panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ } return (error); }
static int p4_intr(int cpu, struct trapframe *tf) { uint32_t cccrval, ovf_mask, ovf_partner; int did_interrupt, error, ri; struct p4_cpu *pc; struct pmc *pm; pmc_value_t v; PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; ovf_mask |= P4_CCCR_OVF; if (p4_system_has_htt) ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1; else ovf_partner = 0; did_interrupt = 0; if (p4_system_has_htt) P4_PCPU_ACQ_INTR_SPINLOCK(pc); /* * Loop through all CCCRs, looking for ones that have * interrupted this CPU. */ for (ri = 0; ri < P4_NPMCS; ri++) { /* * Check if our partner logical CPU has already marked * this PMC has having interrupted it. If so, reset * the flag and process the interrupt, but leave the * hardware alone. */ if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) { P4_PCPU_SET_INTRFLAG(pc,ri,0); did_interrupt = 1; /* * Ignore de-configured or stopped PMCs. * Ignore PMCs not in sampling mode. */ pm = pc->pc_p4pmcs[ri].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { continue; } (void) pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); continue; } /* * Fresh interrupt. Look for the CCCR_OVF bit * and the OVF_Tx bit for this logical * processor being set. */ cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri); if ((cccrval & ovf_mask) != ovf_mask) continue; /* * If the other logical CPU would also have been * interrupted due to the PMC being shared, record * this fact in the per-cpu saved interrupt flag * bitmask. */ if (p4_system_has_htt && (cccrval & ovf_partner)) P4_PCPU_SET_INTRFLAG(pc, ri, 1); v = rdmsr(P4_PERFCTR_MSR_FIRST + ri); PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v); /* Stop the counter, and reset the overflow bit */ cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval); did_interrupt = 1; /* * Ignore de-configured or stopped PMCs. Ignore PMCs * not in sampling mode. */ pm = pc->pc_p4pmcs[ri].phw_pmc; if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { continue; } /* * Process the interrupt. Re-enable the PMC if * processing was successful. */ error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); /* * Only the first processor executing the NMI handler * in a HTT pair will restart a PMC, and that too * only if there were no errors. */ v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( pm->pm_sc.pm_reloadcount); wrmsr(P4_PERFCTR_MSR_FIRST + ri, v); if (error == 0) wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval | P4_CCCR_ENABLE); } /* allow the other CPU to proceed */ if (p4_system_has_htt) P4_PCPU_REL_INTR_SPINLOCK(pc); /* * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets * masked when a PMC interrupts the CPU. We need to unmask * the interrupt source explicitly. */ if (did_interrupt) lapic_reenable_pmc(); atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : &pmc_stats.pm_intr_ignored, 1); return (did_interrupt); }
static int msgdma_channel_submit_sg(device_t dev, struct xdma_channel *xchan, struct xdma_sglist *sg, uint32_t sg_n) { struct msgdma_channel *chan; struct msgdma_desc *desc; struct msgdma_softc *sc; uint32_t src_addr_lo; uint32_t dst_addr_lo; uint32_t len; uint32_t tmp; int i; sc = device_get_softc(dev); chan = (struct msgdma_channel *)xchan->chan; for (i = 0; i < sg_n; i++) { src_addr_lo = (uint32_t)sg[i].src_addr; dst_addr_lo = (uint32_t)sg[i].dst_addr; len = (uint32_t)sg[i].len; dprintf("%s: src %x dst %x len %d\n", __func__, src_addr_lo, dst_addr_lo, len); desc = chan->descs[chan->idx_head]; desc->read_lo = htole32(src_addr_lo); desc->write_lo = htole32(dst_addr_lo); desc->length = htole32(len); desc->transferred = 0; desc->status = 0; desc->reserved = 0; desc->control = 0; if (sg[i].direction == XDMA_MEM_TO_DEV) { if (sg[i].first == 1) { desc->control |= htole32(CONTROL_GEN_SOP); } if (sg[i].last == 1) { desc->control |= htole32(CONTROL_GEN_EOP); desc->control |= htole32(CONTROL_TC_IRQ_EN | CONTROL_ET_IRQ_EN | CONTROL_ERR_M); } } else { desc->control |= htole32(CONTROL_END_ON_EOP | (1 << 13)); desc->control |= htole32(CONTROL_TC_IRQ_EN | CONTROL_ET_IRQ_EN | CONTROL_ERR_M); } tmp = chan->idx_head; atomic_add_int(&chan->descs_used_count, 1); chan->idx_head = msgdma_next_desc(chan, chan->idx_head); desc->control |= htole32(CONTROL_OWN | CONTROL_GO); bus_dmamap_sync(chan->dma_tag, chan->dma_map[tmp], BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } return (0); }
/* * Flush waiting shared locks. The lock's prior state is passed in and must * be adjusted atomically only if it matches and LINKSPIN is not set. * * IMPORTANT! The caller has left one active count on the lock for us to * consume. We will apply this to the first link, but must add * additional counts for any other links. */ static int mtx_chain_link_sh(mtx_t *mtx, u_int olock) { thread_t td = curthread; mtx_link_t *link; u_int addcount; u_int nlock; olock &= ~MTX_LINKSPIN; nlock = olock | MTX_LINKSPIN; nlock &= ~MTX_EXCLUSIVE; crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { /* * It should not be possible for SHWANTED to be set without * any links pending. */ KKASSERT(mtx->mtx_shlink != NULL); /* * We have to process the count for all shared locks before * we process any of the links. Count the additional shared * locks beyond the first link (which is already accounted * for) and associate the full count with the lock * immediately. */ addcount = 0; for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink; link = link->next) { ++addcount; } if (addcount > 0) atomic_add_int(&mtx->mtx_lock, addcount); /* * We can wakeup all waiting shared locks. */ while ((link = mtx->mtx_shlink) != NULL) { KKASSERT(link->state == MTX_LINK_LINKED_SH); if (link->next == link) { mtx->mtx_shlink = NULL; } else { mtx->mtx_shlink = link->next; link->next->prev = link->prev; link->prev->next = link->next; } link->next = NULL; link->prev = NULL; cpu_sfence(); if (link->callback) { link->state = MTX_LINK_CALLEDBACK; link->callback(link, link->arg, 0); } else { cpu_sfence(); link->state = MTX_LINK_ACQUIRED; wakeup(link); } } atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | MTX_SHWANTED); crit_exit_raw(td); return 1; } /* retry */ crit_exit_raw(td); return 0; }
/* * Handle a single exception. */ void itsa(struct trap_frame *trapframe, struct cpu_info *ci, struct proc *p, int type) { int i; unsigned ucode = 0; vm_prot_t ftype; extern vaddr_t onfault_table[]; int onfault; int typ = 0; union sigval sv; struct pcb *pcb; switch (type) { case T_TLB_MOD: /* check for kernel address */ if (trapframe->badvaddr < 0) { pt_entry_t *pte, entry; paddr_t pa; vm_page_t pg; pte = kvtopte(trapframe->badvaddr); entry = *pte; #ifdef DIAGNOSTIC if (!(entry & PG_V) || (entry & PG_M)) panic("trap: ktlbmod: invalid pte"); #endif if (pmap_is_page_ro(pmap_kernel(), trunc_page(trapframe->badvaddr), entry)) { /* write to read only page in the kernel */ ftype = VM_PROT_WRITE; pcb = &p->p_addr->u_pcb; goto kernel_fault; } entry |= PG_M; *pte = entry; KERNEL_LOCK(); pmap_update_kernel_page(trapframe->badvaddr & ~PGOFSET, entry); pa = pfn_to_pad(entry); pg = PHYS_TO_VM_PAGE(pa); if (pg == NULL) panic("trap: ktlbmod: unmanaged page"); pmap_set_modify(pg); KERNEL_UNLOCK(); return; } /* FALLTHROUGH */ case T_TLB_MOD+T_USER: { pt_entry_t *pte, entry; paddr_t pa; vm_page_t pg; pmap_t pmap = p->p_vmspace->vm_map.pmap; if (!(pte = pmap_segmap(pmap, trapframe->badvaddr))) panic("trap: utlbmod: invalid segmap"); pte += uvtopte(trapframe->badvaddr); entry = *pte; #ifdef DIAGNOSTIC if (!(entry & PG_V) || (entry & PG_M)) panic("trap: utlbmod: invalid pte"); #endif if (pmap_is_page_ro(pmap, trunc_page(trapframe->badvaddr), entry)) { /* write to read only page */ ftype = VM_PROT_WRITE; pcb = &p->p_addr->u_pcb; goto fault_common_no_miss; } entry |= PG_M; *pte = entry; KERNEL_LOCK(); pmap_update_user_page(pmap, (trapframe->badvaddr & ~PGOFSET), entry); pa = pfn_to_pad(entry); pg = PHYS_TO_VM_PAGE(pa); if (pg == NULL) panic("trap: utlbmod: unmanaged page"); pmap_set_modify(pg); KERNEL_UNLOCK(); return; } case T_TLB_LD_MISS: case T_TLB_ST_MISS: ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ; pcb = &p->p_addr->u_pcb; /* check for kernel address */ if (trapframe->badvaddr < 0) { vaddr_t va; int rv; kernel_fault: va = trunc_page((vaddr_t)trapframe->badvaddr); onfault = pcb->pcb_onfault; pcb->pcb_onfault = 0; KERNEL_LOCK(); rv = uvm_fault(kernel_map, trunc_page(va), 0, ftype); KERNEL_UNLOCK(); pcb->pcb_onfault = onfault; if (rv == 0) return; if (onfault != 0) { pcb->pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; } /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ if (pcb->pcb_onfault != 0) { /* * We want to resolve the TLB fault before invoking * pcb_onfault if necessary. */ goto fault_common; } else { goto err; } case T_TLB_LD_MISS+T_USER: ftype = VM_PROT_READ; pcb = &p->p_addr->u_pcb; goto fault_common; case T_TLB_ST_MISS+T_USER: ftype = VM_PROT_WRITE; pcb = &p->p_addr->u_pcb; fault_common: #ifdef CPU_R4000 if (r4000_errata != 0) { if (eop_tlb_miss_handler(trapframe, ci, p) != 0) return; } #endif fault_common_no_miss: #ifdef CPU_R4000 if (r4000_errata != 0) { eop_cleanup(trapframe, p); } #endif { vaddr_t va; struct vmspace *vm; vm_map_t map; int rv; vm = p->p_vmspace; map = &vm->vm_map; va = trunc_page((vaddr_t)trapframe->badvaddr); onfault = pcb->pcb_onfault; pcb->pcb_onfault = 0; KERNEL_LOCK(); rv = uvm_fault(map, va, 0, ftype); pcb->pcb_onfault = onfault; /* * If this was a stack access we keep track of the maximum * accessed stack size. Also, if vm_fault gets a protection * failure it is due to accessing the stack region outside * the current limit and we need to reflect that as an access * error. */ if ((caddr_t)va >= vm->vm_maxsaddr) { if (rv == 0) uvm_grow(p, va); else if (rv == EACCES) rv = EFAULT; } KERNEL_UNLOCK(); if (rv == 0) return; if (!USERMODE(trapframe->sr)) { if (onfault != 0) { pcb->pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; } ucode = ftype; i = SIGSEGV; typ = SEGV_MAPERR; break; } case T_ADDR_ERR_LD+T_USER: /* misaligned or kseg access */ case T_ADDR_ERR_ST+T_USER: /* misaligned or kseg access */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; typ = BUS_ADRALN; break; case T_BUS_ERR_IFETCH+T_USER: /* BERR asserted to cpu */ case T_BUS_ERR_LD_ST+T_USER: /* BERR asserted to cpu */ ucode = 0; /* XXX should be VM_PROT_something */ i = SIGBUS; typ = BUS_OBJERR; break; case T_SYSCALL+T_USER: { struct trap_frame *locr0 = p->p_md.md_regs; struct sysent *callp; unsigned int code; register_t tpc; int numsys, error; struct args { register_t i[8]; } args; register_t rval[2]; atomic_add_int(&uvmexp.syscalls, 1); /* compute next PC after syscall instruction */ tpc = trapframe->pc; /* Remember if restart */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; callp = p->p_p->ps_emul->e_sysent; numsys = p->p_p->ps_emul->e_nsysent; code = locr0->v0; switch (code) { case SYS_syscall: case SYS___syscall: /* * Code is first argument, followed by actual args. * __syscall provides the code as a quad to maintain * proper alignment of 64-bit arguments on 32-bit * platforms, which doesn't change anything here. */ code = locr0->a0; if (code >= numsys) callp += p->p_p->ps_emul->e_nosys; /* (illegal) */ else callp += code; i = callp->sy_argsize / sizeof(register_t); args.i[0] = locr0->a1; args.i[1] = locr0->a2; args.i[2] = locr0->a3; if (i > 3) { args.i[3] = locr0->a4; args.i[4] = locr0->a5; args.i[5] = locr0->a6; args.i[6] = locr0->a7; if (i > 7) if ((error = copyin((void *)locr0->sp, &args.i[7], sizeof(register_t)))) goto bad; } break; default: if (code >= numsys) callp += p->p_p->ps_emul->e_nosys; /* (illegal) */ else callp += code; i = callp->sy_narg; args.i[0] = locr0->a0; args.i[1] = locr0->a1; args.i[2] = locr0->a2; args.i[3] = locr0->a3; if (i > 4) { args.i[4] = locr0->a4; args.i[5] = locr0->a5; args.i[6] = locr0->a6; args.i[7] = locr0->a7; } } rval[0] = 0; rval[1] = locr0->v1; #if defined(DDB) || defined(DEBUG) trapdebug[TRAPSIZE * ci->ci_cpuid + (trppos[ci->ci_cpuid] == 0 ? TRAPSIZE : trppos[ci->ci_cpuid]) - 1].code = code; #endif error = mi_syscall(p, code, callp, args.i, rval); switch (error) { case 0: locr0->v0 = rval[0]; locr0->v1 = rval[1]; locr0->a3 = 0; break; case ERESTART: locr0->pc = tpc; break; case EJUSTRETURN: break; /* nothing to do */ default: bad: locr0->v0 = error; locr0->a3 = 1; } mi_syscall_return(p, code, error, rval); return; } case T_BREAK: #ifdef DDB kdb_trap(type, trapframe); #endif /* Reenable interrupts if necessary */ if (trapframe->sr & SR_INT_ENAB) { enableintr(); } return; case T_BREAK+T_USER: { caddr_t va; u_int32_t instr; struct trap_frame *locr0 = p->p_md.md_regs; /* compute address of break instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; /* read break instruction */ copyin(va, &instr, sizeof(int32_t)); switch ((instr & BREAK_VAL_MASK) >> BREAK_VAL_SHIFT) { case 6: /* gcc range error */ i = SIGFPE; typ = FPE_FLTSUB; /* skip instruction */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; break; case 7: /* gcc3 divide by zero */ i = SIGFPE; typ = FPE_INTDIV; /* skip instruction */ if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; break; #ifdef PTRACE case BREAK_SSTEP_VAL: if (p->p_md.md_ss_addr == (long)va) { #ifdef DEBUG printf("trap: %s (%d): breakpoint at %p " "(insn %08x)\n", p->p_comm, p->p_pid, (void *)p->p_md.md_ss_addr, p->p_md.md_ss_instr); #endif /* Restore original instruction and clear BP */ process_sstep(p, 0); typ = TRAP_BRKPT; } else { typ = TRAP_TRACE; } i = SIGTRAP; break; #endif #ifdef FPUEMUL case BREAK_FPUEMUL_VAL: /* * If this is a genuine FP emulation break, * resume execution to our branch destination. */ if ((p->p_md.md_flags & MDP_FPUSED) != 0 && p->p_md.md_fppgva + 4 == (vaddr_t)va) { struct vm_map *map = &p->p_vmspace->vm_map; p->p_md.md_flags &= ~MDP_FPUSED; locr0->pc = p->p_md.md_fpbranchva; /* * Prevent access to the relocation page. * XXX needs to be fixed to work with rthreads */ uvm_fault_unwire(map, p->p_md.md_fppgva, p->p_md.md_fppgva + PAGE_SIZE); (void)uvm_map_protect(map, p->p_md.md_fppgva, p->p_md.md_fppgva + PAGE_SIZE, UVM_PROT_NONE, FALSE); return; } /* FALLTHROUGH */ #endif default: typ = TRAP_TRACE; i = SIGTRAP; break; } break; } case T_IWATCH+T_USER: case T_DWATCH+T_USER: { caddr_t va; /* compute address of trapped instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; printf("watch exception @ %p\n", va); #ifdef RM7K_PERFCNTR if (rm7k_watchintr(trapframe)) { /* Return to user, don't add any more overhead */ return; } #endif i = SIGTRAP; typ = TRAP_BRKPT; break; } case T_TRAP+T_USER: { caddr_t va; u_int32_t instr; struct trap_frame *locr0 = p->p_md.md_regs; /* compute address of trap instruction */ va = (caddr_t)trapframe->pc; if (trapframe->cause & CR_BR_DELAY) va += 4; /* read break instruction */ copyin(va, &instr, sizeof(int32_t)); if (trapframe->cause & CR_BR_DELAY) locr0->pc = MipsEmulateBranch(locr0, trapframe->pc, 0, 0); else locr0->pc += 4; #ifdef RM7K_PERFCNTR if (instr == 0x040c0000) { /* Performance cntr trap */ int result; result = rm7k_perfcntr(trapframe->a0, trapframe->a1, trapframe->a2, trapframe->a3); locr0->v0 = -result; /* Return to user, don't add any more overhead */ return; } else #endif /* * GCC 4 uses teq with code 7 to signal divide by * zero at runtime. This is one instruction shorter * than the BEQ + BREAK combination used by gcc 3. */ if ((instr & 0xfc00003f) == 0x00000034 /* teq */ && (instr & 0x001fffc0) == ((ZERO << 16) | (7 << 6))) { i = SIGFPE; typ = FPE_INTDIV; } else { i = SIGEMT; /* Stuff it with something for now */ typ = 0; } break; } case T_RES_INST+T_USER: i = SIGILL; typ = ILL_ILLOPC; break; case T_COP_UNUSABLE+T_USER: /* * Note MIPS IV COP1X instructions issued with FPU * disabled correctly report coprocessor 1 as the * unusable coprocessor number. */ if ((trapframe->cause & CR_COP_ERR) != CR_COP1_ERR) { i = SIGILL; /* only FPU instructions allowed */ typ = ILL_ILLOPC; break; } #ifdef FPUEMUL MipsFPTrap(trapframe); #else enable_fpu(p); #endif return; case T_FPE: printf("FPU Trap: PC %lx CR %lx SR %lx\n", trapframe->pc, trapframe->cause, trapframe->sr); goto err; case T_FPE+T_USER: MipsFPTrap(trapframe); return; case T_OVFLOW+T_USER: i = SIGFPE; typ = FPE_FLTOVF; break; case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ case T_BUS_ERR_LD_ST: /* BERR asserted to cpu */ pcb = &p->p_addr->u_pcb; if ((onfault = pcb->pcb_onfault) != 0) { pcb->pcb_onfault = 0; trapframe->pc = onfault_table[onfault]; return; } goto err; default: err: disableintr(); #if !defined(DDB) && defined(DEBUG) trapDump("trap", printf); #endif printf("\nTrap cause = %d Frame %p\n", type, trapframe); printf("Trap PC %p RA %p fault %p\n", (void *)trapframe->pc, (void *)trapframe->ra, (void *)trapframe->badvaddr); #ifdef DDB stacktrace(!USERMODE(trapframe->sr) ? trapframe : p->p_md.md_regs); kdb_trap(type, trapframe); #endif panic("trap"); } #ifdef FPUEMUL /* * If a relocated delay slot causes an exception, blame the * original delay slot address - userland is not supposed to * know anything about emulation bowels. */ if ((p->p_md.md_flags & MDP_FPUSED) != 0 && trapframe->badvaddr == p->p_md.md_fppgva) trapframe->badvaddr = p->p_md.md_fpslotva; #endif p->p_md.md_regs->pc = trapframe->pc; p->p_md.md_regs->cause = trapframe->cause; p->p_md.md_regs->badvaddr = trapframe->badvaddr; sv.sival_ptr = (void *)trapframe->badvaddr; KERNEL_LOCK(); trapsignal(p, i, ucode, typ, sv); KERNEL_UNLOCK(); }
static void atomic_increment(sp_counted_base_atomic_type volatile *pw) { atomic_add_int(&pw->ui,1); }
static int mpc7xxx_intr(int cpu, struct trapframe *tf) { int i, error, retval; uint32_t config; struct pmc *pm; struct powerpc_cpu *pac; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[powerpc,%d] out of range CPU %d", __LINE__, cpu)); PMCDBG(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *) tf, TRAPF_USERMODE(tf)); retval = 0; pac = powerpc_pcpu[cpu]; config = mfspr(SPR_MMCR0) & ~SPR_MMCR0_FC; /* * look for all PMCs that have interrupted: * - look for a running, sampling PMC which has overflowed * and which has a valid 'struct pmc' association * * If found, we call a helper to process the interrupt. */ for (i = 0; i < MPC7XXX_MAX_PMCS; i++) { if ((pm = pac->pc_ppcpmcs[i].phw_pmc) == NULL || !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { continue; } if (!MPC7XXX_PMC_HAS_OVERFLOWED(i)) continue; retval = 1; /* Found an interrupting PMC. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* Stop the counter if logging fails. */ error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); if (error != 0) mpc7xxx_stop_pmc(cpu, i); /* reload count. */ mpc7xxx_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount); } atomic_add_int(retval ? &pmc_stats.pm_intr_processed : &pmc_stats.pm_intr_ignored, 1); /* Re-enable PERF exceptions. */ if (retval) mtspr(SPR_MMCR0, config | SPR_MMCR0_PMXE); return (retval); }
static vm_page_t rtR0MemObjFreeBSDContigPhysAllocHelper(vm_object_t pObject, vm_pindex_t iPIndex, u_long cPages, vm_paddr_t VmPhysAddrHigh, u_long uAlignment, bool fWire) { vm_page_t pPages; int cTries = 0; #if __FreeBSD_version > 1000000 int fFlags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; if (fWire) fFlags |= VM_ALLOC_WIRED; while (cTries <= 1) { #if __FreeBSD_version >= 1000030 VM_OBJECT_WLOCK(pObject); #else VM_OBJECT_LOCK(pObject); #endif pPages = vm_page_alloc_contig(pObject, iPIndex, fFlags, cPages, 0, VmPhysAddrHigh, uAlignment, 0, VM_MEMATTR_DEFAULT); #if __FreeBSD_version >= 1000030 VM_OBJECT_WUNLOCK(pObject); #else VM_OBJECT_UNLOCK(pObject); #endif if (pPages) break; vm_pageout_grow_cache(cTries, 0, VmPhysAddrHigh); cTries++; } return pPages; #else while (cTries <= 1) { pPages = vm_phys_alloc_contig(cPages, 0, VmPhysAddrHigh, uAlignment, 0); if (pPages) break; vm_contig_grow_cache(cTries, 0, VmPhysAddrHigh); cTries++; } if (!pPages) return pPages; #if __FreeBSD_version >= 1000030 VM_OBJECT_WLOCK(pObject); #else VM_OBJECT_LOCK(pObject); #endif for (vm_pindex_t iPage = 0; iPage < cPages; iPage++) { vm_page_t pPage = pPages + iPage; vm_page_insert(pPage, pObject, iPIndex + iPage); pPage->valid = VM_PAGE_BITS_ALL; if (fWire) { pPage->wire_count = 1; atomic_add_int(&cnt.v_wire_count, 1); } } #if __FreeBSD_version >= 1000030 VM_OBJECT_WUNLOCK(pObject); #else VM_OBJECT_UNLOCK(pObject); #endif return pPages; #endif }
/* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT); if (newtag == NULL) return (ENOMEM); newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* * Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = min(parent->lowaddr, newtag->lowaddr); newtag->highaddr = max(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } *dmat = newtag; return (error); }
static int cbb_pci_filt(void *arg) { struct cbb_softc *sc = arg; uint32_t sockevent; uint8_t csc; int retval = FILTER_STRAY; /* * Some chips also require us to read the old ExCA registe for card * status change when we route CSC vis PCI. This isn't supposed to be * required, but it clears the interrupt state on some chipsets. * Maybe there's a setting that would obviate its need. Maybe we * should test the status bits and deal with them, but so far we've * not found any machines that don't also give us the socket status * indication above. * * This call used to be unconditional. However, further research * suggests that we hit this condition when the card READY interrupt * fired. So now we only read it for 16-bit cards, and we only claim * the interrupt if READY is set. If this still causes problems, then * the next step would be to read this if we have a 16-bit card *OR* * we have no card. We treat the READY signal as if it were the power * completion signal. Some bridges may double signal things here, bit * signalling twice should be OK since we only sleep on the powerintr * in one place and a double wakeup would be benign there. */ if (sc->flags & CBB_16BIT_CARD) { csc = exca_getb(&sc->exca[0], EXCA_CSC); if (csc & EXCA_CSC_READY) { atomic_add_int(&sc->powerintr, 1); wakeup((void *)&sc->powerintr); retval = FILTER_HANDLED; } } /* * Read the socket event. Sometimes, the theory goes, the PCI bus is * so loaded that it cannot satisfy the read request, so we get * garbage back from the following read. We have to filter out the * garbage so that we don't spontaneously reset the card under high * load. PCI isn't supposed to act like this. No doubt this is a bug * in the PCI bridge chipset (or cbb brige) that's being used in * certain amd64 laptops today. Work around the issue by assuming * that any bits we don't know about being set means that we got * garbage. */ sockevent = cbb_get(sc, CBB_SOCKET_EVENT); if (sockevent != 0 && (sockevent & ~CBB_SOCKET_EVENT_VALID_MASK) == 0) { /* * If anything has happened to the socket, we assume that the * card is no longer OK, and we shouldn't call its ISR. We * set cardok as soon as we've attached the card. This helps * in a noisy eject, which happens all too often when users * are ejecting their PC Cards. * * We use this method in preference to checking to see if the * card is still there because the check suffers from a race * condition in the bouncing case. */ #define DELTA (CBB_SOCKET_MASK_CD) if (sockevent & DELTA) { cbb_clrb(sc, CBB_SOCKET_MASK, DELTA); cbb_set(sc, CBB_SOCKET_EVENT, DELTA); sc->cardok = 0; cbb_disable_func_intr(sc); wakeup(&sc->intrhand); } #undef DELTA /* * Wakeup anybody waiting for a power interrupt. We have to * use atomic_add_int for wakups on other cores. */ if (sockevent & CBB_SOCKET_EVENT_POWER) { cbb_clrb(sc, CBB_SOCKET_MASK, CBB_SOCKET_EVENT_POWER); cbb_set(sc, CBB_SOCKET_EVENT, CBB_SOCKET_EVENT_POWER); atomic_add_int(&sc->powerintr, 1); wakeup((void *)&sc->powerintr); } /* * Status change interrupts aren't presently used in the * rest of the driver. For now, just ACK them. */ if (sockevent & CBB_SOCKET_EVENT_CSTS) cbb_set(sc, CBB_SOCKET_EVENT, CBB_SOCKET_EVENT_CSTS); retval = FILTER_HANDLED; } return retval; }
/* * Adding a ref to an inode is only legal if the inode already has at least * one ref. */ void hammer2_inode_ref(hammer2_inode_t *ip) { atomic_add_int(&ip->refs, 1); }
/* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; /* Return a NULL tag on failure */ *dmat = NULL; /* Enforce the usage of BUS_GET_DMA_TAG(). */ if (parent == NULL) panic("%s: parent DMA tag NULL", __func__); newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_NOWAIT); if (newtag == NULL) return (ENOMEM); /* * The method table pointer and the cookie need to be taken over from * the parent. */ newtag->dt_cookie = parent->dt_cookie; newtag->dt_mt = parent->dt_mt; newtag->dt_parent = parent; newtag->dt_alignment = alignment; newtag->dt_boundary = boundary; newtag->dt_lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->dt_highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->dt_filter = filter; newtag->dt_filterarg = filterarg; newtag->dt_maxsize = maxsize; newtag->dt_nsegments = nsegments; newtag->dt_maxsegsz = maxsegsz; newtag->dt_flags = flags; newtag->dt_ref_count = 1; /* Count ourselves */ newtag->dt_map_count = 0; if (lockfunc != NULL) { newtag->dt_lockfunc = lockfunc; newtag->dt_lockfuncarg = lockfuncarg; } else { newtag->dt_lockfunc = dflt_lock; newtag->dt_lockfuncarg = NULL; } newtag->dt_segments = NULL; /* Take into account any restrictions imposed by our parent tag. */ newtag->dt_lowaddr = ulmin(parent->dt_lowaddr, newtag->dt_lowaddr); newtag->dt_highaddr = ulmax(parent->dt_highaddr, newtag->dt_highaddr); if (newtag->dt_boundary == 0) newtag->dt_boundary = parent->dt_boundary; else if (parent->dt_boundary != 0) newtag->dt_boundary = ulmin(parent->dt_boundary, newtag->dt_boundary); atomic_add_int(&parent->dt_ref_count, 1); if (newtag->dt_boundary > 0) newtag->dt_maxsegsz = ulmin(newtag->dt_maxsegsz, newtag->dt_boundary); *dmat = newtag; return (0); }
static void iv_lazypmap(uintptr_t a, uintptr_t b) { pmap_lazyfix_action(); atomic_add_int(&smp_tlb_wait, 1); }
int fork1(struct thread *td, struct fork_req *fr) { struct proc *p1, *newproc; struct thread *td2; struct vmspace *vm2; struct file *fp_procdesc; vm_ooffset_t mem_charged; int error, nprocs_new, ok; static int curfail; static struct timeval lastfail; int flags, pages; flags = fr->fr_flags; pages = fr->fr_pages; if ((flags & RFSTOPPED) != 0) MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL); else MPASS(fr->fr_procp == NULL); /* Check for the undefined or unimplemented flags. */ if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) return (EINVAL); /* Signal value requires RFTSIGZMB. */ if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) return (EINVAL); /* Can't copy and clear. */ if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); /* Check the validity of the signal number. */ if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) return (EINVAL); if ((flags & RFPROCDESC) != 0) { /* Can't not create a process yet get a process descriptor. */ if ((flags & RFPROC) == 0) return (EINVAL); /* Must provide a place to put a procdesc if creating one. */ if (fr->fr_pd_fd == NULL) return (EINVAL); /* Check if we are using supported flags. */ if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) return (EINVAL); } p1 = td->td_proc; /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ if ((flags & RFPROC) == 0) { if (fr->fr_procp != NULL) *fr->fr_procp = NULL; else if (fr->fr_pidp != NULL) *fr->fr_pidp = 0; return (fork_norfproc(td, flags)); } fp_procdesc = NULL; newproc = NULL; vm2 = NULL; /* * Increment the nprocs resource before allocations occur. * Although process entries are dynamically created, we still * keep a global limit on the maximum number we will * create. There are hard-limits as to the number of processes * that can run, established by the KVA and memory usage for * the process data. * * Don't allow a nonprivileged user to use the last ten * processes; don't let root exceed the limit. */ nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred, PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) { error = EAGAIN; sx_xlock(&allproc_lock); if (ppsratecheck(&lastfail, &curfail, 1)) { printf("maxproc limit exceeded by uid %u (pid %d); " "see tuning(7) and login.conf(5)\n", td->td_ucred->cr_ruid, p1->p_pid); } sx_xunlock(&allproc_lock); goto fail2; } /* * If required, create a process descriptor in the parent first; we * will abandon it if something goes wrong. We don't finit() until * later. */ if (flags & RFPROCDESC) { error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, fr->fr_pd_flags, fr->fr_pd_fcaps); if (error != 0) goto fail2; } mem_charged = 0; if (pages == 0) pages = kstack_pages; /* Allocate new proc. */ newproc = uma_zalloc(proc_zone, M_WAITOK); td2 = FIRST_THREAD_IN_PROC(newproc); if (td2 == NULL) { td2 = thread_alloc(pages); if (td2 == NULL) { error = ENOMEM; goto fail2; } proc_linkup(newproc, td2); } else { if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { if (td2->td_kstack != 0) vm_thread_dispose(td2); if (!thread_alloc_stack(td2, pages)) { error = ENOMEM; goto fail2; } } } if ((flags & RFMEM) == 0) { vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); if (vm2 == NULL) { error = ENOMEM; goto fail2; } if (!swap_reserve(mem_charged)) { /* * The swap reservation failed. The accounting * from the entries of the copied vm2 will be * subtracted in vmspace_free(), so force the * reservation there. */ swap_reserve_force(mem_charged); error = ENOMEM; goto fail2; } } else vm2 = NULL; /* * XXX: This is ugly; when we copy resource usage, we need to bump * per-cred resource counters. */ proc_set_cred_init(newproc, crhold(td->td_ucred)); /* * Initialize resource accounting for the child process. */ error = racct_proc_fork(p1, newproc); if (error != 0) { error = EAGAIN; goto fail1; } #ifdef MAC mac_proc_init(newproc); #endif newproc->p_klist = knlist_alloc(&newproc->p_mtx); STAILQ_INIT(&newproc->p_ktr); /* We have to lock the process tree while we look for a pid. */ sx_slock(&proctree_lock); sx_xlock(&allproc_lock); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. * * XXXRW: Can we avoid privilege here if it's not needed? */ error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0); if (error == 0) ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0); else { ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPROC)); } if (ok) { do_fork(td, fr, newproc, td2, vm2, fp_procdesc); return (0); } error = EAGAIN; sx_sunlock(&proctree_lock); sx_xunlock(&allproc_lock); #ifdef MAC mac_proc_destroy(newproc); #endif racct_proc_exit(newproc); fail1: crfree(newproc->p_ucred); newproc->p_ucred = NULL; fail2: if (vm2 != NULL) vmspace_free(vm2); uma_zfree(proc_zone, newproc); if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { fdclose(td, fp_procdesc, *fr->fr_pd_fd); fdrop(fp_procdesc, td); } atomic_add_int(&nprocs, -1); pause("fork", hz / 2); return (error); }
int sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id) { struct sctp_inpcb *inp, *n_inp; struct sctp_tcb *stcb; uint32_t state; inp = (struct sctp_inpcb *)head->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT); return (EFAULT); } stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); if (stcb == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); return (ENOTCONN); } state = SCTP_GET_STATE((&stcb->asoc)); if ((state == SCTP_STATE_EMPTY) || (state == SCTP_STATE_INUSE)) { SCTP_TCB_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN); return (ENOTCONN); } n_inp = (struct sctp_inpcb *)so->so_pcb; n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_CONNECTED | SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */ (SCTP_PCB_COPY_FLAGS & inp->sctp_flags)); n_inp->sctp_socket = so; n_inp->sctp_features = inp->sctp_features; n_inp->sctp_mobility_features = inp->sctp_mobility_features; n_inp->sctp_frag_point = inp->sctp_frag_point; n_inp->sctp_cmt_on_off = inp->sctp_cmt_on_off; n_inp->ecn_supported = inp->ecn_supported; n_inp->prsctp_supported = inp->prsctp_supported; n_inp->auth_supported = inp->auth_supported; n_inp->asconf_supported = inp->asconf_supported; n_inp->reconfig_supported = inp->reconfig_supported; n_inp->nrsack_supported = inp->nrsack_supported; n_inp->pktdrop_supported = inp->pktdrop_supported; n_inp->partial_delivery_point = inp->partial_delivery_point; n_inp->sctp_context = inp->sctp_context; n_inp->max_cwnd = inp->max_cwnd; n_inp->local_strreset_support = inp->local_strreset_support; n_inp->inp_starting_point_for_iterator = NULL; /* copy in the authentication parameters from the original endpoint */ if (n_inp->sctp_ep.local_hmacs) sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs); n_inp->sctp_ep.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs); if (n_inp->sctp_ep.local_auth_chunks) sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks); n_inp->sctp_ep.local_auth_chunks = sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks); (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys, &n_inp->sctp_ep.shared_keys); /* * Now we must move it from one hash table to another and get the * stcb in the right place. */ sctp_move_pcb_and_assoc(inp, n_inp, stcb); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (0); }
int _pthread_create(pthread_t * thread, const pthread_attr_t * attr, void *(*start_routine) (void *), void *arg) { struct pthread *curthread, *new_thread; struct thr_param param; struct sched_param sched_param; struct rtprio rtp; sigset_t set, oset; cpuset_t *cpusetp; int i, cpusetsize, create_suspended, locked, old_stack_prot, ret; cpusetp = NULL; ret = cpusetsize = 0; _thr_check_init(); /* * Tell libc and others now they need lock to protect their data. */ if (_thr_isthreaded() == 0) { _malloc_first_thread(); if (_thr_setthreaded(1)) return (EAGAIN); } curthread = _get_curthread(); if ((new_thread = _thr_alloc(curthread)) == NULL) return (EAGAIN); memset(¶m, 0, sizeof(param)); if (attr == NULL || *attr == NULL) /* Use the default thread attributes: */ new_thread->attr = _pthread_attr_default; else { new_thread->attr = *(*attr); cpusetp = new_thread->attr.cpuset; cpusetsize = new_thread->attr.cpusetsize; new_thread->attr.cpuset = NULL; new_thread->attr.cpusetsize = 0; } if (new_thread->attr.sched_inherit == PTHREAD_INHERIT_SCHED) { /* inherit scheduling contention scope */ if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) new_thread->attr.flags |= PTHREAD_SCOPE_SYSTEM; else new_thread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM; new_thread->attr.prio = curthread->attr.prio; new_thread->attr.sched_policy = curthread->attr.sched_policy; } new_thread->tid = TID_TERMINATED; old_stack_prot = _rtld_get_stack_prot(); if (create_stack(&new_thread->attr) != 0) { /* Insufficient memory to create a stack: */ _thr_free(curthread, new_thread); return (EAGAIN); } /* * Write a magic value to the thread structure * to help identify valid ones: */ new_thread->magic = THR_MAGIC; new_thread->start_routine = start_routine; new_thread->arg = arg; new_thread->cancel_enable = 1; new_thread->cancel_async = 0; /* Initialize the mutex queue: */ for (i = 0; i < TMQ_NITEMS; i++) TAILQ_INIT(&new_thread->mq[i]); /* Initialise hooks in the thread structure: */ if (new_thread->attr.suspend == THR_CREATE_SUSPENDED) { new_thread->flags = THR_FLAGS_NEED_SUSPEND; create_suspended = 1; } else { create_suspended = 0; } new_thread->state = PS_RUNNING; if (new_thread->attr.flags & PTHREAD_CREATE_DETACHED) new_thread->flags |= THR_FLAGS_DETACHED; /* Add the new thread. */ new_thread->refcount = 1; _thr_link(curthread, new_thread); /* * Handle the race between __pthread_map_stacks_exec and * thread linkage. */ if (old_stack_prot != _rtld_get_stack_prot()) _thr_stack_fix_protection(new_thread); /* Return thread pointer eariler so that new thread can use it. */ (*thread) = new_thread; if (SHOULD_REPORT_EVENT(curthread, TD_CREATE) || cpusetp != NULL) { THR_THREAD_LOCK(curthread, new_thread); locked = 1; } else locked = 0; param.start_func = (void (*)(void *)) thread_start; param.arg = new_thread; param.stack_base = new_thread->attr.stackaddr_attr; param.stack_size = new_thread->attr.stacksize_attr; param.tls_base = (char *)new_thread->tcb; param.tls_size = sizeof(struct tcb); param.child_tid = &new_thread->tid; param.parent_tid = &new_thread->tid; param.flags = 0; if (new_thread->attr.flags & PTHREAD_SCOPE_SYSTEM) param.flags |= THR_SYSTEM_SCOPE; if (new_thread->attr.sched_inherit == PTHREAD_INHERIT_SCHED) param.rtp = NULL; else { sched_param.sched_priority = new_thread->attr.prio; _schedparam_to_rtp(new_thread->attr.sched_policy, &sched_param, &rtp); param.rtp = &rtp; } /* Schedule the new thread. */ if (create_suspended) { SIGFILLSET(set); SIGDELSET(set, SIGTRAP); __sys_sigprocmask(SIG_SETMASK, &set, &oset); new_thread->sigmask = oset; SIGDELSET(new_thread->sigmask, SIGCANCEL); } ret = thr_new(¶m, sizeof(param)); if (ret != 0) { ret = errno; /* * Translate EPROCLIM into well-known POSIX code EAGAIN. */ if (ret == EPROCLIM) ret = EAGAIN; } if (create_suspended) __sys_sigprocmask(SIG_SETMASK, &oset, NULL); if (ret != 0) { if (!locked) THR_THREAD_LOCK(curthread, new_thread); new_thread->state = PS_DEAD; new_thread->tid = TID_TERMINATED; new_thread->flags |= THR_FLAGS_DETACHED; new_thread->refcount--; if (new_thread->flags & THR_FLAGS_NEED_SUSPEND) { new_thread->cycle++; _thr_umtx_wake(&new_thread->cycle, INT_MAX, 0); } _thr_try_gc(curthread, new_thread); /* thread lock released */ atomic_add_int(&_thread_active_threads, -1); } else if (locked) { if (cpusetp != NULL) { if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, TID(new_thread), cpusetsize, cpusetp)) { ret = errno; /* kill the new thread */ new_thread->force_exit = 1; new_thread->flags |= THR_FLAGS_DETACHED; _thr_try_gc(curthread, new_thread); /* thread lock released */ goto out; } } _thr_report_creation(curthread, new_thread); THR_THREAD_UNLOCK(curthread, new_thread); } out: if (ret) (*thread) = 0; return (ret); }