/* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { short type; int error = 0, hdrcmplt = 0; u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) lle = ro->ro_lle; rt0 = ro->ro_rt; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); hlen = ETHER_HDR_LEN; switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = 0; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN); else bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; eh = (const struct ether_header *)dst->sa_data; (void)memcpy(esrc, eh->ether_shost, sizeof (esrc)); /* FALLTHROUGH */ case AF_UNSPEC: loop_copy = 0; /* if this is for us, don't do it */ eh = (const struct ether_header *)dst->sa_data; (void)memcpy(edst, eh->ether_dhost, sizeof (edst)); type = eh->ether_type; break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } if (lle != NULL && (lle->la_flags & LLE_IFADDR)) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); eh = mtod(m, struct ether_header *); (void)memcpy(&eh->ether_type, &type, sizeof(eh->ether_type)); (void)memcpy(eh->ether_dhost, edst, sizeof (edst)); if (hdrcmplt) (void)memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); else (void)memcpy(eh->ether_shost, IF_LLADDR(ifp), sizeof(eh->ether_shost)); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { if (m->m_flags & M_BCAST) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else ifp->if_iqdrops++; } else if (bcmp(eh->ether_dhost, eh->ether_shost, ETHER_ADDR_LEN) == 0) { update_mbuf_csumflags(m, m); (void) if_simloop(ifp, m, dst->sa_family, hlen); return (0); /* XXX */ } } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (IFP2AC(ifp)->ac_netgraph != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); }
static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error = 0; pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, * this does mean there is a short window where inp's * fields are unstable. Could this lead to a * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); } } #endif #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); INP_HASH_WUNLOCK(pcbinfo); #ifdef INET #endif INP_WUNLOCK(inp); return (error); bad: INP_WUNLOCK(inp); m_freem(m); return (error); }
int drmfb_attach(struct drmfb_softc *sc, const struct drmfb_attach_args *da) { const struct drm_fb_helper_surface_size *const sizes = da->da_fb_sizes; const prop_dictionary_t dict = device_properties(da->da_dev); #if NVGA > 0 struct drm_device *const dev = da->da_fb_helper->dev; #endif static const struct genfb_ops zero_genfb_ops; struct genfb_ops genfb_ops = zero_genfb_ops; enum { CONS_VGA, CONS_GENFB, CONS_NONE } what_was_cons; int error; /* genfb requires this. */ KASSERTMSG((void *)&sc->sc_genfb == device_private(da->da_dev), "drmfb_softc must be first member of device softc"); sc->sc_da = *da; prop_dictionary_set_uint32(dict, "width", sizes->surface_width); prop_dictionary_set_uint32(dict, "height", sizes->surface_height); prop_dictionary_set_uint8(dict, "depth", sizes->surface_bpp); prop_dictionary_set_uint16(dict, "linebytes", roundup2((sizes->surface_width * howmany(sizes->surface_bpp, 8)), 64)); prop_dictionary_set_uint32(dict, "address", 0); /* XXX >32-bit */ CTASSERT(sizeof(uintptr_t) <= sizeof(uint64_t)); prop_dictionary_set_uint64(dict, "virtual_address", (uint64_t)(uintptr_t)da->da_fb_vaddr); prop_dictionary_set_uint64(dict, "mode_callback", (uint64_t)(uintptr_t)&drmfb_genfb_mode_callback); /* XXX Whattakludge! */ #if NVGA > 0 if ((da->da_params->dp_is_vga_console != NULL) && (*da->da_params->dp_is_vga_console)(dev)) { what_was_cons = CONS_VGA; prop_dictionary_set_bool(dict, "is_console", true); vga_cndetach(); if (da->da_params->dp_disable_vga) (*da->da_params->dp_disable_vga)(dev); } else #endif if (genfb_is_console() && genfb_is_enabled()) { what_was_cons = CONS_GENFB; prop_dictionary_set_bool(dict, "is_console", true); } else { what_was_cons = CONS_NONE; prop_dictionary_set_bool(dict, "is_console", false); } sc->sc_genfb.sc_dev = sc->sc_da.da_dev; genfb_init(&sc->sc_genfb); genfb_ops.genfb_ioctl = drmfb_genfb_ioctl; genfb_ops.genfb_mmap = drmfb_genfb_mmap; genfb_ops.genfb_enable_polling = drmfb_genfb_enable_polling; genfb_ops.genfb_disable_polling = drmfb_genfb_disable_polling; error = genfb_attach(&sc->sc_genfb, &genfb_ops); if (error) { aprint_error_dev(sc->sc_da.da_dev, "failed to attach genfb: %d\n", error); goto fail0; } /* Success! */ return 0; fail0: KASSERT(error); /* XXX Restore console... */ switch (what_was_cons) { case CONS_VGA: break; case CONS_GENFB: break; case CONS_NONE: break; default: break; } return error; }
static void srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_SRAT_CPU_AFFINITY *cpu; ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; ACPI_SRAT_MEM_AFFINITY *mem; int domain, i, slot; switch (entry->Type) { case ACPI_SRAT_TYPE_CPU_AFFINITY: cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; domain = cpu->ProximityDomainLo | cpu->ProximityDomainHi[0] << 8 | cpu->ProximityDomainHi[1] << 16 | cpu->ProximityDomainHi[2] << 24; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", cpu->ApicId, domain, (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) break; KASSERT(!cpus[cpu->ApicId].enabled, ("Duplicate local APIC ID %u", cpu->ApicId)); cpus[cpu->ApicId].domain = domain; cpus[cpu->ApicId].enabled = 1; break; case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", x2apic->ApicId, x2apic->ProximityDomain, (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) break; KASSERT(!cpus[x2apic->ApicId].enabled, ("Duplicate local APIC ID %u", x2apic->ApicId)); cpus[x2apic->ApicId].domain = x2apic->ProximityDomain; cpus[x2apic->ApicId].enabled = 1; break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: mem = (ACPI_SRAT_MEM_AFFINITY *)entry; if (bootverbose) printf( "SRAT: Found memory domain %d addr %jx len %jx: %s\n", mem->ProximityDomain, (uintmax_t)mem->BaseAddress, (uintmax_t)mem->Length, (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? "enabled" : "disabled"); if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) break; if (num_mem == VM_PHYSSEG_MAX) { printf("SRAT: Too many memory regions\n"); *(int *)arg = ENXIO; break; } slot = num_mem; for (i = 0; i < num_mem; i++) { if (mem_info[i].end <= mem->BaseAddress) continue; if (mem_info[i].start < (mem->BaseAddress + mem->Length)) { printf("SRAT: Overlapping memory entries\n"); *(int *)arg = ENXIO; return; } slot = i; } for (i = num_mem; i > slot; i--) mem_info[i] = mem_info[i - 1]; mem_info[slot].start = mem->BaseAddress; mem_info[slot].end = mem->BaseAddress + mem->Length; mem_info[slot].domain = mem->ProximityDomain; num_mem++; break; } }
static int pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio) { struct pci_vpd_element vpd_element, *vpd_user; struct pcicfg_vpd *vpd; size_t len; int error, i; vpd = pci_fetch_vpd_list(dev); if (vpd->vpd_reg == 0 || vpd->vpd_ident == NULL) return (ENXIO); /* * Calculate the amount of space needed in the data buffer. An * identifier element is always present followed by the read-only * and read-write keywords. */ len = sizeof(struct pci_vpd_element) + strlen(vpd->vpd_ident); for (i = 0; i < vpd->vpd_rocnt; i++) len += sizeof(struct pci_vpd_element) + vpd->vpd_ros[i].len; for (i = 0; i < vpd->vpd_wcnt; i++) len += sizeof(struct pci_vpd_element) + vpd->vpd_w[i].len; if (lvio->plvi_len == 0) { lvio->plvi_len = len; return (0); } if (lvio->plvi_len < len) { lvio->plvi_len = len; return (ENOMEM); } /* * Copyout the identifier string followed by each keyword and * value. */ vpd_user = lvio->plvi_data; vpd_element.pve_keyword[0] = '\0'; vpd_element.pve_keyword[1] = '\0'; vpd_element.pve_flags = PVE_FLAG_IDENT; vpd_element.pve_datalen = strlen(vpd->vpd_ident); error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_ident, vpd_user->pve_data, strlen(vpd->vpd_ident)); if (error) return (error); vpd_user = PVE_NEXT(vpd_user); vpd_element.pve_flags = 0; for (i = 0; i < vpd->vpd_rocnt; i++) { vpd_element.pve_keyword[0] = vpd->vpd_ros[i].keyword[0]; vpd_element.pve_keyword[1] = vpd->vpd_ros[i].keyword[1]; vpd_element.pve_datalen = vpd->vpd_ros[i].len; error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_ros[i].value, vpd_user->pve_data, vpd->vpd_ros[i].len); if (error) return (error); vpd_user = PVE_NEXT(vpd_user); } vpd_element.pve_flags = PVE_FLAG_RW; for (i = 0; i < vpd->vpd_wcnt; i++) { vpd_element.pve_keyword[0] = vpd->vpd_w[i].keyword[0]; vpd_element.pve_keyword[1] = vpd->vpd_w[i].keyword[1]; vpd_element.pve_datalen = vpd->vpd_w[i].len; error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_w[i].value, vpd_user->pve_data, vpd->vpd_w[i].len); if (error) return (error); vpd_user = PVE_NEXT(vpd_user); } KASSERT((char *)vpd_user - (char *)lvio->plvi_data == len, ("length mismatch")); lvio->plvi_len = len; return (0); }
void send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams = ftxp ? 8 : 6, flowclen; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); if (ftxp) { uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[6].val = htobe32(sndbuf); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = htobe32(ftxp->mss); CTR6(KTR_CXGBE, "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, ftxp->rcv_nxt); } else { flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[4].val = htobe32(512); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[5].val = htobe32(512); CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); } txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toep->flags |= TPF_FLOWC_WR_SENT; t4_wrq_tx(sc, wr); }
int sys_execv(userptr_t progname, userptr_t *arguments) { struct proc *proc = curproc; struct addrspace *as; struct vnode *v; vaddr_t entrypoint, stackptr; int result; //kprintf("****EXECV]***** process-%d trying to exec", proc->pid); //lock_acquire(execlock); //kprintf("****EXECV]***** process-%d acquired exec lock", proc->pid); if(progname == NULL || progname == (void *)0x80000000 || progname == (void *)0x40000000) { return EFAULT; } if(arguments == NULL || arguments == (void *)0x80000000 || arguments == (void *)0x40000000) { return EFAULT; } /* This process should have an address space copied during fork */ KASSERT(proc != NULL); char *_progname; size_t size; int i=0, count=0; _progname = (char *) kmalloc(sizeof(char)*PATH_MAX); result = copyinstr(progname, _progname, PATH_MAX, &size); if(result) { kfree(_progname); return EFAULT; } if(strlen(_progname) == 0) { kfree(_progname); return EINVAL; } if(swapping_started == true) { } kfree(_progname); char *args = (char *) kmalloc(sizeof(char)*ARG_MAX); result = copyinstr((const_userptr_t)arguments, args, ARG_MAX, &size); if(result) { kfree(args); return EFAULT; } /* Copy the user arguments on to the kernel */ int offset = 0; while((char *) arguments[count] != NULL) { result = copyinstr((const_userptr_t) arguments[count], args+offset, ARG_MAX, &size); if(result) { kfree(args); return EFAULT; } offset += size; count++; } /* Open the file */ result = vfs_open((char *)progname, O_RDONLY, 0, &v); if(result) { kfree(args); return result; } /* Destroy the current address space and Create a new address space */ as_destroy(proc->p_addrspace); proc->p_addrspace = NULL; KASSERT(proc_getas() == NULL); as = as_create(); if(as == NULL) { kfree(args); vfs_close(v); return ENOMEM; } /* Switch to it and activate it */ proc_setas(as); as_activate(); /* Load the executable. */ // kprintf("free pages available before load_elf : %d \n", coremap_free_bytes()/4096); result = load_elf(v, &entrypoint); if(result) { kfree(args); vfs_close(v); return result; } /* Done with the file now */ vfs_close(v); /* Define the user stack in the address space */ result = as_define_stack(as, &stackptr); if(result) { kfree(args); return result; } i = 0; int prevlen = 0, cur = 0; char **stkargs=(char**) kmalloc(sizeof(char*)*(count+1)); while(i < offset) { int len=0, olen; for(len=0; args[i] != '\0'; len++, i++); olen = len; len = len + (4 - len%4); char *arg = kmalloc(len); //arg = kstrdup(args+prevlen); //kprintf("%s\n", arg); int j = prevlen; for(int k=0; k<len; k++) { if(k >= olen) arg[k] = '\0'; else arg[k] = args[j++]; } //kprintf("%s\n", arg); stackptr -= len; result = copyout((const void *)arg, (userptr_t)stackptr, (size_t)len); if(result) { kfree(args); kfree(stkargs); return result; } kfree(arg); stkargs[cur++] = (char *)stackptr; prevlen += olen + 1; i++; } stkargs[cur] = NULL; kfree(args); for(i=(cur) ; i>=0; i--) { stackptr = stackptr - sizeof(char *); //kprintf("copying arg %d at [%p]\n", i, *(stkargs+i)); result = copyout((const void *)(stkargs+i), (userptr_t) stackptr, (sizeof(char *))); if(result) { kfree(stkargs); return result; } prevlen += 4; } kfree(stkargs); //unsigned int free = coremap_free_bytes(); // unsigned int free_pages = free/4096; // kprintf("free pages available : %d \n", free_pages); //lock_release(execlock); //kprintf("****EXECV]***** process-%d released exec lock", proc->pid); enter_new_process(count, (userptr_t) stackptr, NULL, stackptr, entrypoint); panic("enter_new_process returned\n"); return EINVAL; }
static void testa(struct array *a) { int testarray[TESTSIZE]; int i, j, n, r, *p; for (i=0; i<TESTSIZE; i++) { testarray[i]=i; } n = array_num(a); KASSERT(n==0); for (i=0; i<TESTSIZE; i++) { r = array_add(a, &testarray[i], NULL); KASSERT(r==0); n = array_num(a); KASSERT(n==i+1); } n = array_num(a); KASSERT(n==TESTSIZE); for (i=0; i<TESTSIZE; i++) { p = array_get(a, i); KASSERT(*p == i); } n = array_num(a); KASSERT(n==TESTSIZE); for (j=0; j<TESTSIZE*4; j++) { i = random()%TESTSIZE; p = array_get(a, i); KASSERT(*p == i); } n = array_num(a); KASSERT(n==TESTSIZE); for (i=0; i<TESTSIZE; i++) { array_set(a, i, &testarray[TESTSIZE-i-1]); } for (i=0; i<TESTSIZE; i++) { p = array_get(a, i); KASSERT(*p == TESTSIZE-i-1); } r = array_setsize(a, TESTSIZE/2); KASSERT(r==0); for (i=0; i<TESTSIZE/2; i++) { p = array_get(a, i); KASSERT(*p == TESTSIZE-i-1); } array_remove(a, 1); for (i=1; i<TESTSIZE/2 - 1; i++) { p = array_get(a, i); KASSERT(*p == TESTSIZE-i-2); } p = array_get(a, 0); KASSERT(*p == TESTSIZE-1); array_setsize(a, 2); p = array_get(a, 0); KASSERT(*p == TESTSIZE-1); p = array_get(a, 1); KASSERT(*p == TESTSIZE-3); array_set(a, 1, NULL); array_setsize(a, 2); p = array_get(a, 0); KASSERT(*p == TESTSIZE-1); p = array_get(a, 1); KASSERT(p==NULL); array_setsize(a, TESTSIZE*10); p = array_get(a, 0); KASSERT(*p == TESTSIZE-1); p = array_get(a, 1); KASSERT(p==NULL); }
/* * Map an interrupt using the firmware reg, interrupt-map and * interrupt-map-mask properties. * The interrupt property to be mapped must be of size intrsz, and pointed to * by intr. The regs property of the node for which the mapping is done must * be passed as regs. This property is an array of register specifications; * the size of the address part of such a specification must be passed as * physsz. Only the first element of the property is used. * imap and imapsz hold the interrupt mask and it's size. * imapmsk is a pointer to the interrupt-map-mask property, which must have * a size of physsz + intrsz; it may be NULL, in which case a full mask is * assumed. * maskbuf must point to a buffer of length physsz + intrsz. * The interrupt is returned in result, which must point to a buffer of length * rintrsz (which gives the expected size of the mapped interrupt). * Returns number of cells in the interrupt if a mapping was found, 0 otherwise. */ int ofw_bus_search_intrmap(void *intr, int intrsz, void *regs, int physsz, void *imap, int imapsz, void *imapmsk, void *maskbuf, void *result, int rintrsz, phandle_t *iparent) { phandle_t parent; uint8_t *ref = maskbuf; uint8_t *uiintr = intr; uint8_t *uiregs = regs; uint8_t *uiimapmsk = imapmsk; uint8_t *mptr; pcell_t paddrsz; pcell_t pintrsz; int i, tsz; if (imapmsk != NULL) { for (i = 0; i < physsz; i++) ref[i] = uiregs[i] & uiimapmsk[i]; for (i = 0; i < intrsz; i++) ref[physsz + i] = uiintr[i] & uiimapmsk[physsz + i]; } else { bcopy(regs, ref, physsz); bcopy(intr, ref + physsz, intrsz); } mptr = imap; i = imapsz; paddrsz = 0; while (i > 0) { bcopy(mptr + physsz + intrsz, &parent, sizeof(parent)); #ifndef OFW_IMAP_NO_IPARENT_ADDR_CELLS /* * Find if we need to read the parent address data. * CHRP-derived OF bindings, including ePAPR-compliant FDTs, * use this as an optional part of the specifier. */ if (OF_getencprop(OF_node_from_xref(parent), "#address-cells", &paddrsz, sizeof(paddrsz)) == -1) paddrsz = 0; /* default */ paddrsz *= sizeof(pcell_t); #endif if (OF_searchencprop(OF_node_from_xref(parent), "#interrupt-cells", &pintrsz, sizeof(pintrsz)) == -1) pintrsz = 1; /* default */ pintrsz *= sizeof(pcell_t); /* Compute the map stride size. */ tsz = physsz + intrsz + sizeof(phandle_t) + paddrsz + pintrsz; KASSERT(i >= tsz, ("ofw_bus_search_intrmap: truncated map")); if (bcmp(ref, mptr, physsz + intrsz) == 0) { bcopy(mptr + physsz + intrsz + sizeof(parent) + paddrsz, result, MIN(rintrsz, pintrsz)); if (iparent != NULL) *iparent = parent; return (pintrsz/sizeof(pcell_t)); } mptr += tsz; i -= tsz; } return (0); }
/** * Parse the next core entry from the EROM table and produce a bcma_corecfg * to be owned by the caller. * * @param erom EROM read state. * @param[out] result On success, the core's device info. The caller inherits * ownership of this allocation. * * @return If successful, returns 0. If the end of the EROM table is hit, * ENOENT will be returned. On error, returns a non-zero error value. */ int bcma_erom_parse_corecfg(struct bcma_erom *erom, struct bcma_corecfg **result) { struct bcma_corecfg *cfg; struct bcma_erom_core core; uint8_t first_region_type; bus_size_t initial_offset; u_int core_index; int core_unit; int error; cfg = NULL; initial_offset = bcma_erom_tell(erom); /* Parse the next core entry */ if ((error = bcma_erom_parse_core(erom, &core))) return (error); /* Determine the core's index and unit numbers */ bcma_erom_reset(erom); core_unit = 0; core_index = 0; for (; bcma_erom_tell(erom) != initial_offset; core_index++) { struct bcma_erom_core prev_core; /* Parse next core */ if ((error = erom_seek_next(erom, BCMA_EROM_ENTRY_TYPE_CORE))) return (error); if ((error = bcma_erom_parse_core(erom, &prev_core))) return (error); /* Is earlier unit? */ if (core.vendor == prev_core.vendor && core.device == prev_core.device) { core_unit++; } /* Seek to next core */ if ((error = erom_seek_next(erom, BCMA_EROM_ENTRY_TYPE_CORE))) return (error); } /* We already parsed the core descriptor */ if ((error = erom_skip_core(erom))) return (error); /* Allocate our corecfg */ cfg = bcma_alloc_corecfg(core_index, core_unit, core.vendor, core.device, core.rev); if (cfg == NULL) return (ENOMEM); /* These are 5-bit values in the EROM table, and should never be able * to overflow BCMA_PID_MAX. */ KASSERT(core.num_mport <= BCMA_PID_MAX, ("unsupported mport count")); KASSERT(core.num_dport <= BCMA_PID_MAX, ("unsupported dport count")); KASSERT(core.num_mwrap + core.num_swrap <= BCMA_PID_MAX, ("unsupported wport count")); if (bootverbose) { EROM_LOG(erom, "core%u: %s %s (cid=%hx, rev=%hu, unit=%d)\n", core_index, bhnd_vendor_name(core.vendor), bhnd_find_core_name(core.vendor, core.device), core.device, core.rev, core_unit); } cfg->num_master_ports = core.num_mport; cfg->num_dev_ports = 0; /* determined below */ cfg->num_bridge_ports = 0; /* determined blow */ cfg->num_wrapper_ports = core.num_mwrap + core.num_swrap; /* Parse Master Port Descriptors */ for (uint8_t i = 0; i < core.num_mport; i++) { struct bcma_mport *mport; struct bcma_erom_mport mpd; /* Parse the master port descriptor */ error = bcma_erom_parse_mport(erom, &mpd); if (error) goto failed; /* Initialize a new bus mport structure */ mport = malloc(sizeof(struct bcma_mport), M_BHND, M_NOWAIT); if (mport == NULL) { error = ENOMEM; goto failed; } mport->mp_vid = mpd.port_vid; mport->mp_num = mpd.port_num; /* Update dinfo */ STAILQ_INSERT_TAIL(&cfg->master_ports, mport, mp_link); } /* * Determine whether this is a bridge device; if so, we can * expect the first sequence of address region descriptors to * be of EROM_REGION_TYPE_BRIDGE instead of * BCMA_EROM_REGION_TYPE_DEVICE. * * It's unclear whether this is the correct mechanism by which we * should detect/handle bridge devices, but this approach matches * that of (some of) Broadcom's published drivers. */ if (core.num_dport > 0) { uint32_t entry; if ((error = bcma_erom_peek32(erom, &entry))) goto failed; if (BCMA_EROM_ENTRY_IS(entry, REGION) && BCMA_EROM_GET_ATTR(entry, REGION_TYPE) == BCMA_EROM_REGION_TYPE_BRIDGE) { first_region_type = BCMA_EROM_REGION_TYPE_BRIDGE; cfg->num_dev_ports = 0; cfg->num_bridge_ports = core.num_dport; } else { first_region_type = BCMA_EROM_REGION_TYPE_DEVICE; cfg->num_dev_ports = core.num_dport; cfg->num_bridge_ports = 0; } } /* Device/bridge port descriptors */ for (uint8_t sp_num = 0; sp_num < core.num_dport; sp_num++) { error = erom_corecfg_fill_port_regions(erom, cfg, sp_num, first_region_type); if (error) goto failed; } /* Wrapper (aka device management) descriptors (for master ports). */ for (uint8_t sp_num = 0; sp_num < core.num_mwrap; sp_num++) { error = erom_corecfg_fill_port_regions(erom, cfg, sp_num, BCMA_EROM_REGION_TYPE_MWRAP); if (error) goto failed; } /* Wrapper (aka device management) descriptors (for slave ports). */ for (uint8_t i = 0; i < core.num_swrap; i++) { /* Slave wrapper ports are not numbered distinctly from master * wrapper ports. */ /* * Broadcom DDR1/DDR2 Memory Controller * (cid=82e, rev=1, unit=0, d/mw/sw = 2/0/1 ) -> * bhnd0: erom[0xdc]: core6 agent0.0: mismatch got: 0x1 (0x2) * * ARM BP135 AMBA3 AXI to APB Bridge * (cid=135, rev=0, unit=0, d/mw/sw = 1/0/1 ) -> * bhnd0: erom[0x124]: core9 agent1.0: mismatch got: 0x0 (0x2) * * core.num_mwrap * ===> * (core.num_mwrap > 0) ? * core.num_mwrap : * ((core.vendor == BHND_MFGID_BCM) ? 1 : 0) */ uint8_t sp_num; sp_num = (core.num_mwrap > 0) ? core.num_mwrap : ((core.vendor == BHND_MFGID_BCM) ? 1 : 0) + i; error = erom_corecfg_fill_port_regions(erom, cfg, sp_num, BCMA_EROM_REGION_TYPE_SWRAP); if (error) goto failed; } *result = cfg; return (0); failed: if (cfg != NULL) bcma_free_corecfg(cfg); return error; }
void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; int i = 0, ucode = 0, code; u_int type; register_t addr = 0; vm_offset_t eva; ksiginfo_t ksi; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A return value of '1' from the * hook means that the NMI was handled by it and that we can * return immediately. */ if (type == T_NMI && pmc_intr && (*pmc_intr)(PCPU_GET(cpuid), frame)) goto out; #endif if (type == T_MCHK) { mca_intr(); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) goto out; #endif if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM)) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP && frame->tf_eip != (int)cpu_switch_load_gs) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * Page faults need interrupts disabled until later, * and we shouldn't enable interrupts while holding * a spin lock. */ if (type != T_PAGEFLT && td->td_md.md_spinlock_count == 0) enable_intr(); } } eva = 0; code = frame->tf_err; if (type == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by * interrupts. This problem is worked around by using * an interrupt gate for the pagefault handler. We * are finally ready to read %cr2 and conditionally * reenable interrupts. If we hold a spin lock, then * we must not reenable interrupts. This might be a * spurious page fault. */ eva = rcr2(); if (td->td_md.md_spinlock_count == 0) enable_intr(); } if ((ISPL(frame->tf_cs) == SEL_UPL) || ((frame->tf_eflags & PSL_VM) && !(curpcb->pcb_flags & PCB_VM86CALL))) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ #ifdef DEV_NPX ucode = npxtrap_x87(); if (ucode == -1) goto userout; #else ucode = 0; #endif i = SIGFPE; break; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == 0) goto user; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; i = SIGILL; break; } #endif if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto userout; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: #ifdef DEV_NPX KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; #endif uprintf("pid %d killed due to lack of floating point\n", p->p_pid); i = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ #if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU) ucode = npxtrap_sse(); if (ucode == -1) goto userout; #else ucode = 0; #endif i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); goto out; case T_DNA: #ifdef DEV_NPX KASSERT(!PCB_USER_FPU(td->td_pcb), ("Unregistered use of FPU in kernel")); if (npxdna()) goto out; #endif break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); goto out; } if (type == T_STKFLT) break; /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif goto out; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_eip == (int)doreti_iret) { frame->tf_eip = (int)doreti_iret_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_ds) { frame->tf_eip = (int)doreti_popl_ds_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_es) { frame->tf_eip = (int)doreti_popl_es_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_fs) { frame->tf_eip = (int)doreti_popl_fs_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out; } if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap() && !(curpcb->pcb_flags & PCB_VM86CALL)) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB if (kdb_trap(type, 0, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton * for debugging. */ if (kdb_on_nmi) { printf ("NMI ... going to debugger\n"); kdb_trap(type, 0, frame); } #endif /* KDB */ goto out; } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x esp 0x%08x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_esp, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; }
void cv_broadcast(struct cv *cv, struct lock *lock) { KASSERT( lock_do_i_hold(lock) ); wchan_wakeall(cv->cv_wchan); }
void cv_signal(struct cv *cv, struct lock *lock) { KASSERT( lock_do_i_hold(lock) ); wchan_wakeone(cv->cv_wchan); }
/* * Process a received Ethernet packet; the packet is in the * mbuf chain m with the ethernet header at the front. */ static void ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } #ifdef DIAGNOSTIC if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n"); m_freem(m); return; } #endif /* * Do consistency checks to verify assumptions * made by code past this point. */ if ((m->m_flags & M_PKTHDR) == 0) { if_printf(ifp, "discard frame w/o packet header\n"); ifp->if_ierrors++; m_freem(m); return; } if (m->m_len < ETHER_HDR_LEN) { /* XXX maybe should pullup? */ if_printf(ifp, "discard frame w/o leading ethernet " "header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); ifp->if_ierrors++; m_freem(m); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); ifp->if_ierrors++; m_freem(m); return; } #ifdef DIAGNOSTIC if (m->m_pkthdr.rcvif != ifp) { if_printf(ifp, "Warning, frame marked as received on %s\n", m->m_pkthdr.rcvif->if_xname); } #endif CURVNET_SET_QUIET(ifp->if_vnet); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; ifp->if_imcasts++; } #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. */ ETHER_BPF_MTAP(ifp, m); /* * If the CRC is still on the packet, trim it off. We do this once * and once only in case we are re-entered. Nothing else on the * Ethernet receive path expects to see the FCS. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } if (!(ifp->if_capenable & IFCAP_HWSTATS)) ifp->if_ibytes += m->m_pkthdr.len; /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); CURVNET_RESTORE(); return; } /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { KASSERT(lagg_input_p != NULL, ("%s: if_lagg not loaded!", __func__)); m = (*lagg_input_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { CURVNET_RESTORE(); return; } } /* * If the hardware did not process an 802.1Q tag, do this now, * to allow 802.1P priority frames to be passed to the main input * path correctly. * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels. */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { #ifdef DIAGNOSTIC if_printf(ifp, "cannot pullup VLAN header\n"); #endif ifp->if_ierrors++; m_freem(m); CURVNET_RESTORE(); return; } evl = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); m->m_flags |= M_VLANTAG; bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); /* Allow ng_ether(4) to claim this frame. */ if (IFP2AC(ifp)->ac_netgraph != NULL) { KASSERT(ng_ether_input_p != NULL, ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); }
static void uvm_unloanpage(struct vm_page **ploans, int npages) { struct vm_page *pg; kmutex_t *slock; mutex_enter(&uvm_pageqlock); while (npages-- > 0) { pg = *ploans++; /* * do a little dance to acquire the object or anon lock * as appropriate. we are locking in the wrong order, * so we have to do a try-lock here. */ slock = NULL; while (pg->uobject != NULL || pg->uanon != NULL) { if (pg->uobject != NULL) { slock = pg->uobject->vmobjlock; } else { slock = pg->uanon->an_lock; } if (mutex_tryenter(slock)) { break; } /* XXX Better than yielding but inadequate. */ kpause("livelock", false, 1, &uvm_pageqlock); slock = NULL; } /* * drop our loan. if page is owned by an anon but * PQ_ANON is not set, the page was loaned to the anon * from an object which dropped ownership, so resolve * this by turning the anon's loan into real ownership * (ie. decrement loan_count again and set PQ_ANON). * after all this, if there are no loans left, put the * page back a paging queue (if the page is owned by * an anon) or free it (if the page is now unowned). */ KASSERT(pg->loan_count > 0); pg->loan_count--; if (pg->uobject == NULL && pg->uanon != NULL && (pg->pqflags & PQ_ANON) == 0) { KASSERT(pg->loan_count > 0); pg->loan_count--; pg->pqflags |= PQ_ANON; } if (pg->loan_count == 0 && pg->uobject == NULL && pg->uanon == NULL) { KASSERT((pg->flags & PG_BUSY) == 0); uvm_pagefree(pg); } if (slock != NULL) { mutex_exit(slock); } } mutex_exit(&uvm_pageqlock); }
/* * Process debugging system call. */ int sys_ptrace(struct proc *p, void *v, register_t *retval) { struct sys_ptrace_args /* { syscallarg(int) req; syscallarg(pid_t) pid; syscallarg(caddr_t) addr; syscallarg(int) data; } */ *uap = v; struct proc *t; /* target thread */ struct process *tr; /* target process */ struct uio uio; struct iovec iov; struct ptrace_io_desc piod; struct ptrace_event pe; struct ptrace_thread_state pts; struct reg *regs; #if defined (PT_SETFPREGS) || defined (PT_GETFPREGS) struct fpreg *fpregs; #endif #if defined (PT_SETXMMREGS) || defined (PT_GETXMMREGS) struct xmmregs *xmmregs; #endif #ifdef PT_WCOOKIE register_t wcookie; #endif int error, write; int temp; int req = SCARG(uap, req); int s; /* "A foolish consistency..." XXX */ switch (req) { case PT_TRACE_ME: t = p; break; /* calls that only operate on the PID */ case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_KILL: case PT_ATTACH: case PT_IO: case PT_SET_EVENT_MASK: case PT_GET_EVENT_MASK: case PT_GET_PROCESS_STATE: case PT_GET_THREAD_FIRST: case PT_GET_THREAD_NEXT: default: /* Find the process we're supposed to be operating on. */ if ((t = pfind(SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); break; /* calls that accept a PID or a thread ID */ case PT_CONTINUE: case PT_DETACH: #ifdef PT_STEP case PT_STEP: #endif case PT_GETREGS: case PT_SETREGS: #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: #endif if (SCARG(uap, pid) > THREAD_PID_OFFSET) { t = pfind(SCARG(uap, pid) - THREAD_PID_OFFSET); if (t == NULL) return (ESRCH); } else { if ((t = pfind(SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); } break; } tr = t->p_p; if ((tr->ps_flags & PS_INEXEC) != 0) return (EAGAIN); /* Make sure we can operate on it. */ switch (req) { case PT_TRACE_ME: /* Saying that you're being traced is always legal. */ break; case PT_ATTACH: /* * You can't attach to a process if: * (1) it's the process that's doing the attaching, */ if (tr == p->p_p) return (EINVAL); /* * (2) it's a system process */ if (ISSET(tr->ps_flags, PS_SYSTEM)) return (EPERM); /* * (3) it's already being traced, or */ if (ISSET(tr->ps_flags, PS_TRACED)) return (EBUSY); /* * (4) it's not owned by you, or the last exec * gave us setuid/setgid privs (unless * you're root), or... * * [Note: once PS_SUGID or PS_SUGIDEXEC gets set in * execve(), they stay set until the process does * another execve(). Hence this prevents a setuid * process which revokes its special privileges using * setuid() from being traced. This is good security.] */ if ((tr->ps_ucred->cr_ruid != p->p_ucred->cr_ruid || ISSET(tr->ps_flags, PS_SUGIDEXEC | PS_SUGID)) && (error = suser(p, 0)) != 0) return (error); /* * (5) ...it's init, which controls the security level * of the entire system, and the system was not * compiled with permanently insecure mode turned * on. */ if ((tr->ps_pid == 1) && (securelevel > -1)) return (EPERM); /* * (6) it's an ancestor of the current process and * not init (because that would create a loop in * the process graph). */ if (tr->ps_pid != 1 && inferior(p->p_p, tr)) return (EINVAL); break; case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_IO: case PT_CONTINUE: case PT_KILL: case PT_DETACH: #ifdef PT_STEP case PT_STEP: #endif case PT_SET_EVENT_MASK: case PT_GET_EVENT_MASK: case PT_GET_PROCESS_STATE: case PT_GETREGS: case PT_SETREGS: #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: #endif #ifdef PT_WCOOKIE case PT_WCOOKIE: #endif /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(tr->ps_flags, PS_TRACED)) return (EPERM); /* * (2) it's not being traced by _you_, or */ if (tr->ps_pptr != p->p_p) return (EBUSY); /* * (3) it's not currently stopped. */ if (t->p_stat != SSTOP || !ISSET(tr->ps_flags, PS_WAITED)) return (EBUSY); break; case PT_GET_THREAD_FIRST: case PT_GET_THREAD_NEXT: /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(tr->ps_flags, PS_TRACED)) return (EPERM); /* * (2) it's not being traced by _you_, or */ if (tr->ps_pptr != p->p_p) return (EBUSY); /* * Do the work here because the request isn't actually * associated with 't' */ if (SCARG(uap, data) != sizeof(pts)) return (EINVAL); if (req == PT_GET_THREAD_NEXT) { error = copyin(SCARG(uap, addr), &pts, sizeof(pts)); if (error) return (error); t = pfind(pts.pts_tid - THREAD_PID_OFFSET); if (t == NULL || ISSET(t->p_flag, P_WEXIT)) return (ESRCH); if (t->p_p != tr) return (EINVAL); t = TAILQ_NEXT(t, p_thr_link); } else { t = TAILQ_FIRST(&tr->ps_threads); } if (t == NULL) pts.pts_tid = -1; else pts.pts_tid = t->p_pid + THREAD_PID_OFFSET; return (copyout(&pts, SCARG(uap, addr), sizeof(pts))); default: /* It was not a legal request. */ return (EINVAL); } /* Do single-step fixup if needed. */ FIX_SSTEP(t); /* Now do the operation. */ write = 0; *retval = 0; switch (req) { case PT_TRACE_ME: /* Just set the trace flag. */ atomic_setbits_int(&tr->ps_flags, PS_TRACED); tr->ps_oppid = tr->ps_pptr->ps_pid; if (tr->ps_ptstat == NULL) tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat), M_SUBPROC, M_WAITOK); memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat)); return (0); case PT_WRITE_I: /* XXX no separate I and D spaces */ case PT_WRITE_D: write = 1; temp = SCARG(uap, data); case PT_READ_I: /* XXX no separate I and D spaces */ case PT_READ_D: /* write = 0 done above. */ iov.iov_base = (caddr_t)&temp; iov.iov_len = sizeof(int); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(vaddr_t)SCARG(uap, addr); uio.uio_resid = sizeof(int); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_procp = p; error = process_domem(p, t, &uio, write ? PT_WRITE_I : PT_READ_I); if (write == 0) *retval = temp; return (error); case PT_IO: error = copyin(SCARG(uap, addr), &piod, sizeof(piod)); if (error) return (error); iov.iov_base = piod.piod_addr; iov.iov_len = piod.piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(vaddr_t)piod.piod_offs; uio.uio_resid = piod.piod_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_procp = p; switch (piod.piod_op) { case PIOD_READ_I: req = PT_READ_I; uio.uio_rw = UIO_READ; break; case PIOD_READ_D: req = PT_READ_D; uio.uio_rw = UIO_READ; break; case PIOD_WRITE_I: req = PT_WRITE_I; uio.uio_rw = UIO_WRITE; break; case PIOD_WRITE_D: req = PT_WRITE_D; uio.uio_rw = UIO_WRITE; break; case PIOD_READ_AUXV: req = PT_READ_D; uio.uio_rw = UIO_READ; temp = tr->ps_emul->e_arglen * sizeof(char *); if (uio.uio_offset > temp) return (EIO); if (uio.uio_resid > temp - uio.uio_offset) uio.uio_resid = temp - uio.uio_offset; piod.piod_len = iov.iov_len = uio.uio_resid; error = process_auxv_offset(p, t, &uio); if (error) return (error); break; default: return (EINVAL); } error = process_domem(p, t, &uio, req); piod.piod_len -= uio.uio_resid; (void) copyout(&piod, SCARG(uap, addr), sizeof(piod)); return (error); #ifdef PT_STEP case PT_STEP: /* * From the 4.4BSD PRM: * "Execution continues as in request PT_CONTINUE; however * as soon as possible after execution of at least one * instruction, execution stops again. [ ... ]" */ #endif case PT_CONTINUE: /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) return (EINVAL); /* If the address parameter is not (int *)1, set the pc. */ if ((int *)SCARG(uap, addr) != (int *)1) if ((error = process_set_pc(t, SCARG(uap, addr))) != 0) goto relebad; #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(t, req == PT_STEP); if (error) goto relebad; #endif goto sendsig; case PT_DETACH: /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) return (EINVAL); #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(t, req == PT_STEP); if (error) goto relebad; #endif /* give process back to original parent or init */ if (tr->ps_oppid != tr->ps_pptr->ps_pid) { struct process *ppr; ppr = prfind(tr->ps_oppid); proc_reparent(tr, ppr ? ppr : initprocess); } /* not being traced any more */ tr->ps_oppid = 0; atomic_clearbits_int(&tr->ps_flags, PS_TRACED|PS_WAITED); sendsig: memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat)); /* Finally, deliver the requested signal (or none). */ if (t->p_stat == SSTOP) { t->p_xstat = SCARG(uap, data); SCHED_LOCK(s); setrunnable(t); SCHED_UNLOCK(s); } else { if (SCARG(uap, data) != 0) psignal(t, SCARG(uap, data)); } return (0); relebad: return (error); case PT_KILL: if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* just send the process a KILL signal. */ SCARG(uap, data) = SIGKILL; goto sendsig; /* in PT_CONTINUE, above. */ case PT_ATTACH: /* * As was done in procfs: * Go ahead and set the trace flag. * Save the old parent (it's reset in * _DETACH, and also in kern_exit.c:wait4() * Reparent the process so that the tracing * proc gets to see all the action. * Stop the target. */ atomic_setbits_int(&tr->ps_flags, PS_TRACED); tr->ps_oppid = tr->ps_pptr->ps_pid; if (tr->ps_pptr != p->p_p) proc_reparent(tr, p->p_p); if (tr->ps_ptstat == NULL) tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat), M_SUBPROC, M_WAITOK); SCARG(uap, data) = SIGSTOP; goto sendsig; case PT_GET_EVENT_MASK: if (SCARG(uap, data) != sizeof(pe)) return (EINVAL); memset(&pe, 0, sizeof(pe)); pe.pe_set_event = tr->ps_ptmask; return (copyout(&pe, SCARG(uap, addr), sizeof(pe))); case PT_SET_EVENT_MASK: if (SCARG(uap, data) != sizeof(pe)) return (EINVAL); if ((error = copyin(SCARG(uap, addr), &pe, sizeof(pe)))) return (error); tr->ps_ptmask = pe.pe_set_event; return (0); case PT_GET_PROCESS_STATE: if (SCARG(uap, data) != sizeof(*tr->ps_ptstat)) return (EINVAL); if (tr->ps_single) tr->ps_ptstat->pe_tid = tr->ps_single->p_pid + THREAD_PID_OFFSET; return (copyout(tr->ps_ptstat, SCARG(uap, addr), sizeof(*tr->ps_ptstat))); case PT_SETREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), regs, sizeof(*regs)); if (error == 0) { error = process_write_regs(t, regs); } free(regs, M_TEMP, sizeof(*regs)); return (error); case PT_GETREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK); error = process_read_regs(t, regs); if (error == 0) error = copyout(regs, SCARG(uap, addr), sizeof (*regs)); free(regs, M_TEMP, sizeof(*regs)); return (error); #ifdef PT_SETFPREGS case PT_SETFPREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), fpregs, sizeof(*fpregs)); if (error == 0) { error = process_write_fpregs(t, fpregs); } free(fpregs, M_TEMP, sizeof(*fpregs)); return (error); #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK); error = process_read_fpregs(t, fpregs); if (error == 0) error = copyout(fpregs, SCARG(uap, addr), sizeof(*fpregs)); free(fpregs, M_TEMP, sizeof(*fpregs)); return (error); #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), xmmregs, sizeof(*xmmregs)); if (error == 0) { error = process_write_xmmregs(t, xmmregs); } free(xmmregs, M_TEMP, sizeof(*xmmregs)); return (error); #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK); error = process_read_xmmregs(t, xmmregs); if (error == 0) error = copyout(xmmregs, SCARG(uap, addr), sizeof(*xmmregs)); free(xmmregs, M_TEMP, sizeof(*xmmregs)); return (error); #endif #ifdef PT_WCOOKIE case PT_WCOOKIE: wcookie = process_get_wcookie (t); return (copyout(&wcookie, SCARG(uap, addr), sizeof (register_t))); #endif } #ifdef DIAGNOSTIC panic("ptrace: impossible"); #endif return 0; }
/* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int first_buf_printf = 1; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* collect extra flags that shutdown_nice might have set */ howto |= shutdown_howto; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { register struct buf *bp; int iter, nbusy, pbusy; #ifndef PREEMPTION int subiter; #endif waittime = 0; wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if (isbufbusy(bp)) nbusy++; if (nbusy == 0) { if (first_buf_printf) printf("All buffers synced."); break; } if (first_buf_printf) { printf("Syncing disks, buffers remaining... "); first_buf_printf = 0; } printf("%d ", nbusy); if (nbusy < pbusy) iter = 0; pbusy = nbusy; wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); #ifdef PREEMPTION /* * Drop Giant and spin for a while to allow * interrupt threads to run. */ DROP_GIANT(); DELAY(50000 * iter); PICKUP_GIANT(); #else /* * Drop Giant and context switch several times to * allow interrupt threads to run. */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { thread_lock(curthread); mi_switch(SW_VOL, NULL); thread_unlock(curthread); DELAY(1000); } PICKUP_GIANT(); #endif } printf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if (isbufbusy(bp)) { #if 0 /* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ if (bp->b_dev == NULL) { TAILQ_REMOVE(&mountlist, bp->b_vp->v_mount, mnt_list); continue; } #endif nbusy++; if (show_busybufs > 0) { printf( "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:", nbusy, bp, bp->b_vp, bp->b_flags, (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); BUF_LOCKPRINTINFO(bp); if (show_busybufs > 1) vn_printf(bp->b_vp, "vnode content: "); } } } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("Giving up on %d buffers\n", nbusy); DELAY(5000000); /* 5 seconds */ } else { if (!first_buf_printf) printf("Final sync complete\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } swapoff_all(); DELAY(100000); /* wait for console output to finish */ } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ }
/* * uvm_loanbreak: break loan on a uobj page * * => called with uobj locked * => the page should be busy * => return value: * newly allocated page if succeeded */ struct vm_page * uvm_loanbreak(struct vm_page *uobjpage) { struct vm_page *pg; #ifdef DIAGNOSTIC struct uvm_object *uobj = uobjpage->uobject; #endif KASSERT(uobj != NULL); KASSERT(mutex_owned(uobj->vmobjlock)); KASSERT(uobjpage->flags & PG_BUSY); /* alloc new un-owned page */ pg = uvm_pagealloc(NULL, 0, NULL, 0); if (pg == NULL) return NULL; /* * copy the data from the old page to the new * one and clear the fake flags on the new page (keep it busy). * force a reload of the old page by clearing it from all * pmaps. * transfer dirtiness of the old page to the new page. * then lock the page queues to rename the pages. */ uvm_pagecopy(uobjpage, pg); /* old -> new */ pg->flags &= ~PG_FAKE; pmap_page_protect(uobjpage, VM_PROT_NONE); if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) { pmap_clear_modify(pg); pg->flags |= PG_CLEAN; } else { /* uvm_pagecopy marked it dirty */ KASSERT((pg->flags & PG_CLEAN) == 0); /* a object with a dirty page should be dirty. */ KASSERT(!UVM_OBJ_IS_CLEAN(uobj)); } if (uobjpage->flags & PG_WANTED) wakeup(uobjpage); /* uobj still locked */ uobjpage->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(uobjpage, NULL); mutex_enter(&uvm_pageqlock); /* * replace uobjpage with new page. */ uvm_pagereplace(uobjpage, pg); /* * if the page is no longer referenced by * an anon (i.e. we are breaking an O->K * loan), then remove it from any pageq's. */ if (uobjpage->uanon == NULL) uvm_pagedequeue(uobjpage); /* * at this point we have absolutely no * control over uobjpage */ /* install new page */ uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); /* * done! loan is broken and "pg" is * PG_BUSY. it can now replace uobjpage. */ return pg; }
/* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. */ void make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); long bufsize; uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", __func__, toep->tid, toep, inp); tp->t_state = TCPS_ESTABLISHED; tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = toep->rx_credits << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; /* * If we were unable to send all rx credits via opt0, save the remainder * in rx_credits so that they can be handed over with the next credit * update. */ SOCKBUF_LOCK(&so->so_rcv); bufsize = select_rcv_wnd(so); SOCKBUF_UNLOCK(&so->so_rcv); toep->rx_credits = bufsize - tp->rcv_wnd; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) bufsize = V_tcp_autosndbuf_max; else bufsize = sbspace(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); ftxp.snd_nxt = tp->snd_nxt; ftxp.rcv_nxt = tp->rcv_nxt; ftxp.snd_space = bufsize; ftxp.mss = tp->t_maxseg; send_flowc_wr(toep, &ftxp); soisconnected(so); }
static inline int uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) { vaddr_t curaddr = ufi->orig_rvaddr; vsize_t togo = ufi->size; struct vm_aref *aref = &ufi->entry->aref; struct uvm_object *uobj = ufi->entry->object.uvm_obj; struct vm_anon *anon; int rv, result = 0; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * lock us the rest of the way down (we unlock before return) */ if (aref->ar_amap) { amap_lock(aref->ar_amap); } /* * loop until done */ while (togo) { /* * find the page we want. check the anon layer first. */ if (aref->ar_amap) { anon = amap_lookup(aref, curaddr - ufi->entry->start); } else { anon = NULL; } /* locked: map, amap, uobj */ if (anon) { rv = uvm_loananon(ufi, output, flags, anon); } else if (uobj) { rv = uvm_loanuobj(ufi, output, flags, curaddr); } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { rv = uvm_loanzero(ufi, output, flags); } else { uvmfault_unlockall(ufi, aref->ar_amap, uobj); rv = -1; } /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ KASSERT(rv > 0 || aref->ar_amap == NULL || !mutex_owned(aref->ar_amap->am_lock)); KASSERT(rv > 0 || uobj == NULL || !mutex_owned(uobj->vmobjlock)); /* total failure */ if (rv < 0) { UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0); return (-1); } /* relock failed, need to do another lookup */ if (rv == 0) { UVMHIST_LOG(loanhist, "relock failure %d", result ,0,0,0); return (result); } /* * got it... advance to next page */ result++; togo -= PAGE_SIZE; curaddr += PAGE_SIZE; } /* * unlock what we locked, unlock the maps and return */ if (aref->ar_amap) { amap_unlock(aref->ar_amap); } uvmfault_unlockmaps(ufi, false); UVMHIST_LOG(loanhist, "done %d", result, 0,0,0); return (result); }
/* Other helper routines. */ static int ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, unsigned int value, int flags, int compat32) { struct filedesc *fdp; struct ksem *ks; struct file *fp; char *path; Fnv32_t fnv; int error, fd; if (value > SEM_VALUE_MAX) return (EINVAL); fdp = td->td_proc->p_fd; mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS; error = falloc(td, &fp, &fd, O_CLOEXEC); if (error) { if (name == NULL) error = ENOSPC; return (error); } /* * Go ahead and copyout the file descriptor now. This is a bit * premature, but it is a lot easier to handle errors as opposed * to later when we've possibly created a new semaphore, etc. */ error = ksem_create_copyout_semid(td, semidp, fd, compat32); if (error) { fdclose(td, fp, fd); fdrop(fp, td); return (error); } if (name == NULL) { /* Create an anonymous semaphore. */ ks = ksem_alloc(td->td_ucred, mode, value); if (ks == NULL) error = ENOSPC; else ks->ks_flags |= KS_ANONYMOUS; } else { path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK); error = copyinstr(name, path, MAXPATHLEN, NULL); /* Require paths to start with a '/' character. */ if (error == 0 && path[0] != '/') error = EINVAL; if (error) { fdclose(td, fp, fd); fdrop(fp, td); free(path, M_KSEM); return (error); } fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&ksem_dict_lock); ks = ksem_lookup(path, fnv); if (ks == NULL) { /* Object does not exist, create it if requested. */ if (flags & O_CREAT) { ks = ksem_alloc(td->td_ucred, mode, value); if (ks == NULL) error = ENFILE; else { ksem_insert(path, fnv, ks); path = NULL; } } else error = ENOENT; } else { /* * Object already exists, obtain a new * reference if requested and permitted. */ if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) error = EEXIST; else { #ifdef MAC error = mac_posixsem_check_open(td->td_ucred, ks); if (error == 0) #endif error = ksem_access(ks, td->td_ucred); } if (error == 0) ksem_hold(ks); #ifdef INVARIANTS else ks = NULL; #endif } sx_xunlock(&ksem_dict_lock); if (path) free(path, M_KSEM); } if (error) { KASSERT(ks == NULL, ("ksem_create error with a ksem")); fdclose(td, fp, fd); fdrop(fp, td); return (error); } KASSERT(ks != NULL, ("ksem_create w/o a ksem")); finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); fdrop(fp, td); return (0); }
int uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) { struct uvm_faultinfo ufi; void **result, **output; int rv, error; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * ensure that one and only one of the flags is set */ KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ ((flags & UVM_LOAN_TOPAGE) == 0)); /* * "output" is a pointer to the current place to put the loaned page. */ result = v; output = &result[0]; /* start at the beginning ... */ /* * while we've got pages to do */ while (len > 0) { /* * fill in params for a call to uvmfault_lookup */ ufi.orig_map = map; ufi.orig_rvaddr = start; ufi.orig_size = len; /* * do the lookup, the only time this will fail is if we hit on * an unmapped region (an error) */ if (!uvmfault_lookup(&ufi, false)) { error = ENOENT; goto fail; } /* * map now locked. now do the loanout... */ rv = uvm_loanentry(&ufi, &output, flags); if (rv < 0) { /* all unlocked due to error */ error = EINVAL; goto fail; } /* * done! the map is unlocked. advance, if possible. * * XXXCDC: could be recoded to hold the map lock with * smarter code (but it only happens on map entry * boundaries, so it isn't that bad). */ if (rv) { rv <<= PAGE_SHIFT; len -= rv; start += rv; } } UVMHIST_LOG(loanhist, "success", 0,0,0,0); return 0; fail: /* * failed to complete loans. drop any loans and return failure code. * map is already unlocked. */ if (output - result) { if (flags & UVM_LOAN_TOANON) { uvm_unloananon((struct vm_anon **)result, output - result); } else { uvm_unloanpage((struct vm_page **)result, output - result); } } UVMHIST_LOG(loanhist, "error %d", error,0,0,0); return (error); }
static int devfs_fo_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) { struct devfs_node *node; struct vnode *vp; int ioflag; int error; cdev_t dev; KASSERT(uio->uio_td == curthread, ("uio_td %p is not p %p", uio->uio_td, curthread)); vp = (struct vnode *)fp->f_data; if (vp == NULL || vp->v_type == VBAD) return EBADF; node = DEVFS_NODE(vp); if (vp->v_type == VREG) bwillwrite(uio->uio_resid); vp = (struct vnode *)fp->f_data; if ((dev = vp->v_rdev) == NULL) return EBADF; reference_dev(dev); if ((flags & O_FOFFSET) == 0) uio->uio_offset = fp->f_offset; ioflag = IO_UNIT; if (vp->v_type == VREG && ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) { ioflag |= IO_APPEND; } if (flags & O_FBLOCKING) { /* ioflag &= ~IO_NDELAY; */ } else if (flags & O_FNONBLOCKING) { ioflag |= IO_NDELAY; } else if (fp->f_flag & FNONBLOCK) { ioflag |= IO_NDELAY; } if (flags & O_FBUFFERED) { /* ioflag &= ~IO_DIRECT; */ } else if (flags & O_FUNBUFFERED) { ioflag |= IO_DIRECT; } else if (fp->f_flag & O_DIRECT) { ioflag |= IO_DIRECT; } if (flags & O_FASYNCWRITE) { /* ioflag &= ~IO_SYNC; */ } else if (flags & O_FSYNCWRITE) { ioflag |= IO_SYNC; } else if (fp->f_flag & O_FSYNC) { ioflag |= IO_SYNC; } if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)) ioflag |= IO_SYNC; ioflag |= sequential_heuristic(uio, fp); error = dev_dwrite(dev, uio, ioflag, fp); release_dev(dev); if (node) { nanotime(&node->atime); nanotime(&node->mtime); } if ((flags & O_FOFFSET) == 0) fp->f_offset = uio->uio_offset; fp->f_nextoff = uio->uio_offset; return (error); }
int uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, struct vm_anon *anon) { struct vm_page *pg; int error; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * if we are loaning to "another" anon then it is easy, we just * bump the reference count on the current anon and return a * pointer to it (it becomes copy-on-write shared). */ if (flags & UVM_LOAN_TOANON) { KASSERT(mutex_owned(anon->an_lock)); pg = anon->an_page; if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { if (pg->wire_count > 0) { UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, ufi->entry->object.uvm_obj); return (-1); } pmap_page_protect(pg, VM_PROT_READ); } anon->an_ref++; **output = anon; (*output)++; UVMHIST_LOG(loanhist, "->A done", 0,0,0,0); return (1); } /* * we are loaning to a kernel-page. we need to get the page * resident so we can wire it. uvmfault_anonget will handle * this for us. */ KASSERT(mutex_owned(anon->an_lock)); error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); /* * if we were unable to get the anon, then uvmfault_anonget has * unlocked everything and returned an error code. */ if (error) { UVMHIST_LOG(loanhist, "error %d", error,0,0,0); /* need to refault (i.e. refresh our lookup) ? */ if (error == ERESTART) { return (0); } /* "try again"? sleep a bit and retry ... */ if (error == EAGAIN) { kpause("loanagain", false, hz/2, NULL); return (0); } /* otherwise flag it as an error */ return (-1); } /* * we have the page and its owner locked: do the loan now. */ pg = anon->an_page; mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0); KASSERT(pg->uobject == NULL); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL); return (-1); } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } pg->loan_count++; uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); **output = pg; (*output)++; /* unlock and return success */ if (pg->uobject) mutex_exit(pg->uobject->vmobjlock); UVMHIST_LOG(loanhist, "->K done", 0,0,0,0); return (1); }
static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); /* * XXXRW: Need to clarify locking of v4/v6 flags. */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto out; } in6_sin6_2_sin(&sin, sin6); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = EISCONN; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); return (error); }
/* * uvm_loanuobjpages: loan pages from a uobj out (O->K) * * => uobj shouldn't be locked. (we'll lock it) * => fail with EBUSY if we meet a wired page. */ int uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, struct vm_page **origpgpp) { int ndone; /* # of pages loaned out */ struct vm_page **pgpp; int error; int i; kmutex_t *slock; pgpp = origpgpp; for (ndone = 0; ndone < orignpages; ) { int npages; /* npendloan: # of pages busied but not loand out yet. */ int npendloan = 0xdead; /* XXX gcc */ reget: npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone); mutex_enter(uobj->vmobjlock); error = (*uobj->pgops->pgo_get)(uobj, pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0, VM_PROT_READ, 0, PGO_SYNCIO); if (error == EAGAIN) { kpause("loanuopg", false, hz/2, NULL); continue; } if (error) goto fail; KASSERT(npages > 0); /* loan and unbusy pages */ slock = NULL; for (i = 0; i < npages; i++) { kmutex_t *nextslock; /* slock for next page */ struct vm_page *pg = *pgpp; /* XXX assuming that the page is owned by uobj */ KASSERT(pg->uobject != NULL); nextslock = pg->uobject->vmobjlock; if (slock != nextslock) { if (slock) { KASSERT(npendloan > 0); error = uvm_loanpage(pgpp - npendloan, npendloan); mutex_exit(slock); if (error) goto fail; ndone += npendloan; KASSERT(origpgpp + ndone == pgpp); } slock = nextslock; npendloan = 0; mutex_enter(slock); } if ((pg->flags & PG_RELEASED) != 0) { /* * release pages and try again. */ mutex_exit(slock); for (; i < npages; i++) { pg = pgpp[i]; slock = pg->uobject->vmobjlock; mutex_enter(slock); mutex_enter(&uvm_pageqlock); uvm_page_unbusy(&pg, 1); mutex_exit(&uvm_pageqlock); mutex_exit(slock); } goto reget; } npendloan++; pgpp++; KASSERT(origpgpp + ndone + npendloan == pgpp); } KASSERT(slock != NULL); KASSERT(npendloan > 0); error = uvm_loanpage(pgpp - npendloan, npendloan); mutex_exit(slock); if (error) goto fail; ndone += npendloan; KASSERT(origpgpp + ndone == pgpp); } return 0; fail: uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE); return error; }
/* * General page fault handler. */ void do_fault(struct trapframe *tf, struct lwp *l, struct vm_map *map, vaddr_t va, vm_prot_t atype) { int error; if (pmap_fault(map->pmap, va, atype)) return; struct pcb * const pcb = lwp_getpcb(l); void * const onfault = pcb->pcb_onfault; const bool user = TRAP_USERMODE(tf); if (cpu_intr_p()) { KASSERT(!user); error = EFAULT; } else { pcb->pcb_onfault = NULL; error = uvm_fault(map, va, atype); pcb->pcb_onfault = onfault; } if (error != 0) { ksiginfo_t ksi; if (onfault != NULL) { tf->tf_r0 = error; tf->tf_r15 = (tf->tf_r15 & ~R15_PC) | (register_t)onfault; return; } #ifdef DDB if (db_validating) { db_faulted = true; tf->tf_r15 += INSN_SIZE; return; } #endif if (!user) { #ifdef DDB db_printf("Unhandled data abort in kernel mode\n"); kdb_trap(T_FAULT, tf); #else #ifdef DEBUG printf("Unhandled data abort:\n"); printregs(tf); #endif panic("unhandled data abort in kernel mode"); #endif } KSI_INIT_TRAP(&ksi); if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed: " "out of swap\n", l->l_proc->p_pid, l->l_proc->p_comm, l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); ksi.ksi_signo = SIGKILL; } else ksi.ksi_signo = SIGSEGV; ksi.ksi_code = (error == EPERM) ? SEGV_ACCERR : SEGV_MAPERR; ksi.ksi_addr = (void *) va; trapsignal(l, &ksi); } else if (!user) { ucas_ras_check(tf); } }
static int uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) { struct vm_amap *amap = ufi->entry->aref.ar_amap; struct uvm_object *uobj = ufi->entry->object.uvm_obj; struct vm_page *pg; int error, npages; bool locked; UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); /* * first we must make sure the page is resident. * * XXXCDC: duplicate code with uvm_fault(). */ /* locked: maps(read), amap(if there) */ mutex_enter(uobj->vmobjlock); /* locked: maps(read), amap(if there), uobj */ if (uobj->pgops->pgo_get) { /* try locked pgo_get */ npages = 1; pg = NULL; error = (*uobj->pgops->pgo_get)(uobj, va - ufi->entry->start + ufi->entry->offset, &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); } else { error = EIO; /* must have pgo_get op */ } /* * check the result of the locked pgo_get. if there is a problem, * then we fail the loan. */ if (error && error != EBUSY) { uvmfault_unlockall(ufi, amap, uobj); return (-1); } /* * if we need to unlock for I/O, do so now. */ if (error == EBUSY) { uvmfault_unlockall(ufi, amap, NULL); /* locked: uobj */ npages = 1; error = (*uobj->pgops->pgo_get)(uobj, va - ufi->entry->start + ufi->entry->offset, &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); /* locked: <nothing> */ if (error) { if (error == EAGAIN) { kpause("fltagain2", false, hz/2, NULL); return (0); } return (-1); } /* * pgo_get was a success. attempt to relock everything. */ locked = uvmfault_relock(ufi); if (locked && amap) amap_lock(amap); uobj = pg->uobject; mutex_enter(uobj->vmobjlock); /* * verify that the page has not be released and re-verify * that amap slot is still free. if there is a problem we * drop our lock (thus force a lookup refresh/retry). */ if ((pg->flags & PG_RELEASED) != 0 || (locked && amap && amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start))) { if (locked) uvmfault_unlockall(ufi, amap, NULL); locked = false; } /* * didn't get the lock? release the page and retry. */ if (locked == false) { if (pg->flags & PG_WANTED) { wakeup(pg); } if (pg->flags & PG_RELEASED) { mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); mutex_exit(&uvm_pageqlock); mutex_exit(uobj->vmobjlock); return (0); } mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); mutex_exit(uobj->vmobjlock); return (0); } } KASSERT(uobj == pg->uobject); /* * at this point we have the page we want ("pg") marked PG_BUSY for us * and we have all data structures locked. do the loanout. page can * not be PG_RELEASED (we caught this above). */ if ((flags & UVM_LOAN_TOANON) == 0) { if (uvm_loanpage(&pg, 1)) { uvmfault_unlockall(ufi, amap, uobj); return (-1); } mutex_exit(uobj->vmobjlock); **output = pg; (*output)++; return (1); } #ifdef notdef /* * must be a loan to an anon. check to see if there is already * an anon associated with this page. if so, then just return * a reference to this object. the page should already be * mapped read-only because it is already on loan. */ if (pg->uanon) { /* XXX: locking */ anon = pg->uanon; anon->an_ref++; if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); mutex_exit(uobj->vmobjlock); **output = anon; (*output)++; return (1); } /* * need to allocate a new anon */ anon = uvm_analloc(); if (anon == NULL) { goto fail; } mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); goto fail; } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } pg->loan_count++; pg->uanon = anon; anon->an_page = pg; anon->an_lock = /* TODO: share amap lock */ uvm_pageactivate(pg); mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); mutex_exit(uobj->vmobjlock); mutex_exit(&anon->an_lock); **output = anon; (*output)++; return (1); fail: UVMHIST_LOG(loanhist, "fail", 0,0,0,0); /* * unlock everything and bail out. */ if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); uvmfault_unlockall(ufi, amap, uobj, NULL); if (anon) { anon->an_ref--; uvm_anon_free(anon); } #endif /* notdef */ return (-1); }
static int sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) { struct sglist *sg; vm_page_t m_paddr, page; vm_pindex_t offset; vm_paddr_t paddr; vm_memattr_t memattr; size_t space; int i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); sg = object->handle; memattr = object->memattr; VM_OBJECT_UNLOCK(object); offset = m[reqpage]->pindex; /* * Lookup the physical address of the requested page. An initial * value of '1' instead of '0' is used so we can assert that the * page is found since '0' can be a valid page-aligned physical * address. */ space = 0; paddr = 1; for (i = 0; i < sg->sg_nseg; i++) { if (space + sg->sg_segs[i].ss_len <= (offset * PAGE_SIZE)) { space += sg->sg_segs[i].ss_len; continue; } paddr = sg->sg_segs[i].ss_paddr + offset * PAGE_SIZE - space; break; } KASSERT(paddr != 1, ("invalid SG page index")); /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && pmap_page_get_memattr(m_paddr) != memattr) { memattr = pmap_page_get_memattr(m_paddr); printf( "WARNING: A device driver has set \"memattr\" inconsistently.\n"); } /* Return a fake page for the requested page. */ KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS), ("backing page for SG is fake")); /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ for (i = 0; i < count; i++) { vm_page_lock(m[i]); vm_page_free(m[i]); vm_page_unlock(m[i]); } vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); }
/* * This function is very similar to ctl_ioctl_do_datamove(). Is there a * way to combine the functionality? * * XXX KDM may need to move this into a thread. We're doing a bcopy in the * caller's context, which will usually be the backend. That may not be a * good thing. */ static void cfcs_datamove(union ctl_io *io) { union ccb *ccb; bus_dma_segment_t cam_sg_entry, *cam_sglist; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; int cam_sg_count, ctl_sg_count, cam_sg_start; int cam_sg_offset; int len_to_copy, len_copied; int ctl_watermark, cam_watermark; int i, j; cam_sg_offset = 0; cam_sg_start = 0; ccb = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; /* * Note that we have a check in cfcs_action() to make sure that any * CCBs with "bad" flags are returned with CAM_REQ_INVALID. This * is just to make sure no one removes that check without updating * this code to provide the additional functionality necessary to * support those modes of operation. */ KASSERT(((ccb->ccb_h.flags & CFCS_BAD_CCB_FLAGS) == 0), ("invalid " "CAM flags %#x", (ccb->ccb_h.flags & CFCS_BAD_CCB_FLAGS))); /* * Simplify things on both sides by putting single buffers into a * single entry S/G list. */ switch ((ccb->ccb_h.flags & CAM_DATA_MASK)) { case CAM_DATA_SG: { int len_seen; cam_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; cam_sg_count = ccb->csio.sglist_cnt; for (i = 0, len_seen = 0; i < cam_sg_count; i++) { if ((len_seen + cam_sglist[i].ds_len) >= io->scsiio.kern_rel_offset) { cam_sg_start = i; cam_sg_offset = io->scsiio.kern_rel_offset - len_seen; break; } len_seen += cam_sglist[i].ds_len; } break; } case CAM_DATA_VADDR: cam_sglist = &cam_sg_entry; cam_sglist[0].ds_len = ccb->csio.dxfer_len; cam_sglist[0].ds_addr = (bus_addr_t)ccb->csio.data_ptr; cam_sg_count = 1; cam_sg_start = 0; cam_sg_offset = io->scsiio.kern_rel_offset; break; default: panic("Invalid CAM flags %#x", ccb->ccb_h.flags); } if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } ctl_watermark = 0; cam_watermark = cam_sg_offset; len_copied = 0; for (i = cam_sg_start, j = 0; i < cam_sg_count && j < ctl_sg_count;) { uint8_t *cam_ptr, *ctl_ptr; len_to_copy = MIN(cam_sglist[i].ds_len - cam_watermark, ctl_sglist[j].len - ctl_watermark); cam_ptr = (uint8_t *)cam_sglist[i].ds_addr; cam_ptr = cam_ptr + cam_watermark; if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) { /* * XXX KDM fix this! */ panic("need to implement bus address support"); #if 0 kern_ptr = bus_to_virt(kern_sglist[j].addr); #endif } else ctl_ptr = (uint8_t *)ctl_sglist[j].addr; ctl_ptr = ctl_ptr + ctl_watermark; ctl_watermark += len_to_copy; cam_watermark += len_to_copy; if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) { CTL_DEBUG_PRINT(("%s: copying %d bytes to CAM\n", __func__, len_to_copy)); CTL_DEBUG_PRINT(("%s: from %p to %p\n", ctl_ptr, __func__, cam_ptr)); bcopy(ctl_ptr, cam_ptr, len_to_copy); } else { CTL_DEBUG_PRINT(("%s: copying %d bytes from CAM\n", __func__, len_to_copy)); CTL_DEBUG_PRINT(("%s: from %p to %p\n", cam_ptr, __func__, ctl_ptr)); bcopy(cam_ptr, ctl_ptr, len_to_copy); } len_copied += len_to_copy; if (cam_sglist[i].ds_len == cam_watermark) { i++; cam_watermark = 0; } if (ctl_sglist[j].len == ctl_watermark) { j++; ctl_watermark = 0; } } io->scsiio.ext_data_filled += len_copied; if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = NULL; io->io_hdr.flags |= CTL_FLAG_STATUS_SENT; ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_REQ_CMP; xpt_done(ccb); } io->scsiio.be_move_done(io); }