/* * Pool tokens are used to provide a type-stable serializing token * pointer that does not race against disappearing data structures. * * This routine is called in early boot just after we setup the BSP's * globaldata structure. */ void lwkt_token_pool_init(void) { int i; for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i) lwkt_token_init(&pool_tokens[i].token, "pool"); }
/* * Initialize inode hash table. */ void ext2_ihashinit(void) { ext2_ihash = 16; while (ext2_ihash < desiredvnodes) ext2_ihash <<= 1; ext2_ihashtbl = kmalloc(sizeof(void *) * ext2_ihash, M_EXT2IHASH, M_WAITOK|M_ZERO); --ext2_ihash; lwkt_token_init(&ext2_ihash_token, "ext2ihash"); }
static void syncer_ctx_init(struct syncer_ctx *ctx, struct mount *mp) { ctx->sc_mp = mp; lwkt_token_init(&ctx->sc_token, "syncer"); ctx->sc_flags = 0; ctx->syncer_workitem_pending = hashinit(syncer_maxdelay, M_DEVBUF, &ctx->syncer_mask); ctx->syncer_delayno = 0; }
/* * Obtain a new vnode. The returned vnode is VX locked & vrefd. * * All new vnodes set the VAGE flags. An open() of the vnode will * decrement the (2-bit) flags. Vnodes which are opened several times * are thus retained in the cache over vnodes which are merely stat()d. * * We always allocate the vnode. Attempting to recycle existing vnodes * here can lead to numerous deadlocks, particularly with softupdates. */ struct vnode * allocvnode(int lktimeout, int lkflags) { struct vnode *vp; /* * Do not flag for synchronous recyclement unless there are enough * freeable vnodes to recycle and the number of vnodes has * significantly exceeded our target. We want the normal vnlru * process to handle the cleaning (at 9/10's) before we are forced * to flag it here at 11/10's for userexit path processing. */ if (numvnodes >= maxvnodes * 11 / 10 && cachedvnodes + inactivevnodes >= maxvnodes * 5 / 10) { struct thread *td = curthread; if (td->td_lwp) atomic_set_int(&td->td_lwp->lwp_mpflags, LWP_MP_VNLRU); } /* * lktimeout only applies when LK_TIMELOCK is used, and only * the pageout daemon uses it. The timeout may not be zero * or the pageout daemon can deadlock in low-VM situations. */ if (lktimeout == 0) lktimeout = hz / 10; vp = kmalloc(sizeof(*vp), M_VNODE, M_ZERO | M_WAITOK); lwkt_token_init(&vp->v_token, "vnode"); lockinit(&vp->v_lock, "vnode", lktimeout, lkflags); TAILQ_INIT(&vp->v_namecache); RB_INIT(&vp->v_rbclean_tree); RB_INIT(&vp->v_rbdirty_tree); RB_INIT(&vp->v_rbhash_tree); spin_init(&vp->v_spin, "allocvnode"); lockmgr(&vp->v_lock, LK_EXCLUSIVE); atomic_add_int(&numvnodes, 1); vp->v_refcnt = 1; vp->v_flag = VAGE0 | VAGE1; vp->v_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; KKASSERT(TAILQ_EMPTY(&vp->v_namecache)); /* exclusive lock still held */ vp->v_filesize = NOOFFSET; vp->v_type = VNON; vp->v_tag = 0; vp->v_state = VS_CACHED; _vactivate(vp); return (vp); }
/* * Get a socket structure, and initialize it. * Note that it would probably be better to allocate socket * and PCB at the same time, but I'm not convinced that all * the protocols can be easily modified to do this. */ struct socket * soalloc(int waitok) { struct socket *so; unsigned waitmask; waitmask = waitok ? M_WAITOK : M_NOWAIT; so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); if (so) { /* XXX race condition for reentrant kernel */ TAILQ_INIT(&so->so_aiojobq); TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); lwkt_token_init(&so->so_rcv.ssb_token, "rcvtok"); lwkt_token_init(&so->so_snd.ssb_token, "sndtok"); so->so_state = SS_NOFDREF; so->so_refs = 1; } return so; }
void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { BUG_ON(clock_id != CLOCK_MONOTONIC); memset(timer, 0, sizeof(struct hrtimer)); timer->clock_id = clock_id; timer->ht_mode = mode; lwkt_token_init(&timer->timer_token, "timer token"); callout_init_mp(&(timer)->timer_callout); }
/* * Initialize a malloc pipeline for the specified malloc type and allocation * size. Create an array to cache up to nom_count buffers and preallocate * them. */ void mpipe_init(malloc_pipe_t mpipe, malloc_type_t type, int bytes, int nnom, int nmax, int mpflags, void (*construct)(void *, void *), void (*deconstruct)(void *, void *), void *priv) { int n; if (nnom < 1) nnom = 1; if (nmax < 0) nmax = 0x7FFF0000; /* some very large number */ if (nmax < nnom) nmax = nnom; bzero(mpipe, sizeof(struct malloc_pipe)); mpipe->type = type; mpipe->bytes = bytes; mpipe->mpflags = mpflags; mpipe->construct = construct; mpipe->deconstruct = deconstruct; mpipe->priv = priv; if ((mpflags & MPF_NOZERO) == 0) mpipe->mflags |= M_ZERO; if (mpflags & MPF_INT) mpipe->mflags |= M_USE_RESERVE | M_USE_INTERRUPT_RESERVE; mpipe->ary_count = nnom; mpipe->max_count = nmax; mpipe->array = kmalloc(nnom * sizeof(mpipe->array[0]), M_MPIPEARY, M_WAITOK | M_ZERO); while (mpipe->free_count < nnom) { n = mpipe->free_count; mpipe->array[n] = kmalloc(bytes, mpipe->type, M_WAITOK | mpipe->mflags); if (construct) construct(mpipe->array[n], priv); ++mpipe->free_count; ++mpipe->total_count; } STAILQ_INIT(&mpipe->queue); lwkt_token_init(&mpipe->token, "mpipe token"); /* * Create a support thread for the mpipe queue */ if (mpflags & MPF_CALLBACK) { kthread_create(mpipe_thread, mpipe, &mpipe->thread, "mpipe_%s", type->ks_shortdesc); } }
/* * Callbacks from machine-dependant startup code (e.g. init386) to set * up low level entities related to cpu #0's globaldata. * * Called from very low level boot code. */ void mi_proc0init(struct globaldata *gd, struct user *proc0paddr) { lwkt_init_thread(&thread0, proc0paddr, LWKT_THREAD_STACK, 0, gd); lwkt_set_comm(&thread0, "thread0"); RB_INIT(&proc0.p_lwp_tree); spin_init(&proc0.p_spin); lwkt_token_init(&proc0.p_token, "iproc"); proc0.p_lasttid = 0; /* +1 = next TID */ lwp_rb_tree_RB_INSERT(&proc0.p_lwp_tree, &lwp0); lwp0.lwp_thread = &thread0; lwp0.lwp_proc = &proc0; proc0.p_usched = usched_init(); lwp0.lwp_cpumask = (cpumask_t)-1; lwkt_token_init(&lwp0.lwp_token, "lwp_token"); spin_init(&lwp0.lwp_spin); varsymset_init(&proc0.p_varsymset, NULL); thread0.td_flags |= TDF_RUNNING; thread0.td_proc = &proc0; thread0.td_lwp = &lwp0; thread0.td_switch = cpu_lwkt_switch; lwkt_schedule_self(curthread); }
/* * Create per-filesystem syncer process */ void vn_syncer_thr_create(struct mount *mp) { struct syncer_ctx *ctx; static int syncalloc = 0; ctx = kmalloc(sizeof(struct syncer_ctx), M_TEMP, M_WAITOK | M_ZERO); ctx->sc_mp = mp; ctx->sc_flags = 0; ctx->syncer_workitem_pending = hashinit(SYNCER_MAXDELAY, M_DEVBUF, &ctx->syncer_mask); ctx->syncer_delayno = 0; lwkt_token_init(&ctx->sc_token, "syncer"); mp->mnt_syncer_ctx = ctx; kthread_create(syncer_thread, ctx, &ctx->sc_thread, "syncer%d", ++syncalloc & 0x7FFFFFFF); }
static int hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, struct ucred *cred) { struct hammer_mount_info info; hammer_mount_t hmp; hammer_volume_t rootvol; struct vnode *rootvp; struct vnode *devvp = NULL; const char *upath; /* volume name in userspace */ char *path; /* volume name in system space */ int error; int i; int master_id; char *next_volume_ptr = NULL; /* * Accept hammer_mount_info. mntpt is NULL for root mounts at boot. */ if (mntpt == NULL) { bzero(&info, sizeof(info)); info.asof = 0; info.hflags = 0; info.nvolumes = 1; next_volume_ptr = mp->mnt_stat.f_mntfromname; /* Count number of volumes separated by ':' */ for (char *p = next_volume_ptr; *p != '\0'; ++p) { if (*p == ':') { ++info.nvolumes; } } mp->mnt_flag &= ~MNT_RDONLY; /* mount R/W */ } else { if ((error = copyin(data, &info, sizeof(info))) != 0) return (error); } /* * updating or new mount */ if (mp->mnt_flag & MNT_UPDATE) { hmp = (void *)mp->mnt_data; KKASSERT(hmp != NULL); } else { if (info.nvolumes <= 0 || info.nvolumes > HAMMER_MAX_VOLUMES) return (EINVAL); hmp = NULL; } /* * master-id validation. The master id may not be changed by a * mount update. */ if (info.hflags & HMNT_MASTERID) { if (hmp && hmp->master_id != info.master_id) { kprintf("hammer: cannot change master id " "with mount update\n"); return(EINVAL); } master_id = info.master_id; if (master_id < -1 || master_id >= HAMMER_MAX_MASTERS) return (EINVAL); } else { if (hmp) master_id = hmp->master_id; else master_id = 0; } /* * Internal mount data structure */ if (hmp == NULL) { hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); mp->mnt_data = (qaddr_t)hmp; hmp->mp = mp; /*TAILQ_INIT(&hmp->recycle_list);*/ /* * Make sure kmalloc type limits are set appropriately. * * Our inode kmalloc group is sized based on maxvnodes * (controlled by the system, not us). */ kmalloc_create(&hmp->m_misc, "HAMMER-others"); kmalloc_create(&hmp->m_inodes, "HAMMER-inodes"); kmalloc_raise_limit(hmp->m_inodes, 0); /* unlimited */ hmp->root_btree_beg.localization = 0x00000000U; hmp->root_btree_beg.obj_id = -0x8000000000000000LL; hmp->root_btree_beg.key = -0x8000000000000000LL; hmp->root_btree_beg.create_tid = 1; hmp->root_btree_beg.delete_tid = 1; hmp->root_btree_beg.rec_type = 0; hmp->root_btree_beg.obj_type = 0; hmp->root_btree_end.localization = 0xFFFFFFFFU; hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL; hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL; hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL; hmp->root_btree_end.delete_tid = 0; /* special case */ hmp->root_btree_end.rec_type = 0xFFFFU; hmp->root_btree_end.obj_type = 0; hmp->krate.freq = 1; /* maximum reporting rate (hz) */ hmp->krate.count = -16; /* initial burst */ hmp->sync_lock.refs = 1; hmp->free_lock.refs = 1; hmp->undo_lock.refs = 1; hmp->blkmap_lock.refs = 1; hmp->snapshot_lock.refs = 1; hmp->volume_lock.refs = 1; TAILQ_INIT(&hmp->delay_list); TAILQ_INIT(&hmp->flush_group_list); TAILQ_INIT(&hmp->objid_cache_list); TAILQ_INIT(&hmp->undo_lru_list); TAILQ_INIT(&hmp->reclaim_list); RB_INIT(&hmp->rb_dedup_crc_root); RB_INIT(&hmp->rb_dedup_off_root); TAILQ_INIT(&hmp->dedup_lru_list); } hmp->hflags &= ~HMNT_USERFLAGS; hmp->hflags |= info.hflags & HMNT_USERFLAGS; hmp->master_id = master_id; if (info.asof) { mp->mnt_flag |= MNT_RDONLY; hmp->asof = info.asof; } else { hmp->asof = HAMMER_MAX_TID; } hmp->volume_to_remove = -1; /* * Re-open read-write if originally read-only, or vise-versa. * * When going from read-only to read-write execute the stage2 * recovery if it has not already been run. */ if (mp->mnt_flag & MNT_UPDATE) { lwkt_gettoken(&hmp->fs_token); error = 0; if (hmp->ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { kprintf("HAMMER read-only -> read-write\n"); hmp->ronly = 0; RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, hammer_adjust_volume_mode, NULL); rootvol = hammer_get_root_volume(hmp, &error); if (rootvol) { hammer_recover_flush_buffers(hmp, rootvol, 1); error = hammer_recover_stage2(hmp, rootvol); bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hammer_rel_volume(rootvol, 0); } RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, hammer_reload_inode, NULL); /* kernel clears MNT_RDONLY */ } else if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { kprintf("HAMMER read-write -> read-only\n"); hmp->ronly = 1; /* messy */ RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, hammer_reload_inode, NULL); hmp->ronly = 0; hammer_flusher_sync(hmp); hammer_flusher_sync(hmp); hammer_flusher_sync(hmp); hmp->ronly = 1; RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, hammer_adjust_volume_mode, NULL); } lwkt_reltoken(&hmp->fs_token); return(error); } RB_INIT(&hmp->rb_vols_root); RB_INIT(&hmp->rb_inos_root); RB_INIT(&hmp->rb_redo_root); RB_INIT(&hmp->rb_nods_root); RB_INIT(&hmp->rb_undo_root); RB_INIT(&hmp->rb_resv_root); RB_INIT(&hmp->rb_bufs_root); RB_INIT(&hmp->rb_pfsm_root); hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); RB_INIT(&hmp->volu_root); RB_INIT(&hmp->undo_root); RB_INIT(&hmp->data_root); RB_INIT(&hmp->meta_root); RB_INIT(&hmp->lose_root); TAILQ_INIT(&hmp->iorun_list); lwkt_token_init(&hmp->fs_token, "hammerfs"); lwkt_token_init(&hmp->io_token, "hammerio"); lwkt_gettoken(&hmp->fs_token); /* * Load volumes */ path = objcache_get(namei_oc, M_WAITOK); hmp->nvolumes = -1; for (i = 0; i < info.nvolumes; ++i) { if (mntpt == NULL) { /* * Root mount. */ KKASSERT(next_volume_ptr != NULL); strcpy(path, ""); if (*next_volume_ptr != '/') { /* relative path */ strcpy(path, "/dev/"); } int k; for (k = strlen(path); k < MAXPATHLEN-1; ++k) { if (*next_volume_ptr == '\0') { break; } else if (*next_volume_ptr == ':') { ++next_volume_ptr; break; } else { path[k] = *next_volume_ptr; ++next_volume_ptr; } } path[k] = '\0'; error = 0; cdev_t dev = kgetdiskbyname(path); error = bdevvp(dev, &devvp); if (error) { kprintf("hammer_mountroot: can't find devvp\n"); } } else { error = copyin(&info.volumes[i], &upath, sizeof(char *)); if (error == 0) error = copyinstr(upath, path, MAXPATHLEN, NULL); } if (error == 0) error = hammer_install_volume(hmp, path, devvp); if (error) break; } objcache_put(namei_oc, path); /* * Make sure we found a root volume */ if (error == 0 && hmp->rootvol == NULL) { kprintf("hammer_mount: No root volume found!\n"); error = EINVAL; } /* * Check that all required volumes are available */ if (error == 0 && hammer_mountcheck_volumes(hmp)) { kprintf("hammer_mount: Missing volumes, cannot mount!\n"); error = EINVAL; } if (error) { /* called with fs_token held */ hammer_free_hmp(mp); return (error); } /* * No errors, setup enough of the mount point so we can lookup the * root vnode. */ mp->mnt_iosize_max = MAXPHYS; mp->mnt_kern_flag |= MNTK_FSMID; mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ /* * MPSAFE code. Note that VOPs and VFSops which are not MPSAFE * will acquire a per-mount token prior to entry and release it * on return, so even if we do not specify it we no longer get * the BGL regardlless of how we are flagged. */ mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; /*MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_IN_MPSAFE;*/ /* * note: f_iosize is used by vnode_pager_haspage() when constructing * its VOP_BMAP call. */ mp->mnt_stat.f_iosize = HAMMER_BUFSIZE; mp->mnt_stat.f_bsize = HAMMER_BUFSIZE; mp->mnt_vstat.f_frsize = HAMMER_BUFSIZE; mp->mnt_vstat.f_bsize = HAMMER_BUFSIZE; mp->mnt_maxsymlinklen = 255; mp->mnt_flag |= MNT_LOCAL; vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops); vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops); vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops); /* * The root volume's ondisk pointer is only valid if we hold a * reference to it. */ rootvol = hammer_get_root_volume(hmp, &error); if (error) goto failed; /* * Perform any necessary UNDO operations. The recovery code does * call hammer_undo_lookup() so we have to pre-cache the blockmap, * and then re-copy it again after recovery is complete. * * If this is a read-only mount the UNDO information is retained * in memory in the form of dirty buffer cache buffers, and not * written back to the media. */ bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); /* * Check filesystem version */ hmp->version = rootvol->ondisk->vol_version; if (hmp->version < HAMMER_VOL_VERSION_MIN || hmp->version > HAMMER_VOL_VERSION_MAX) { kprintf("HAMMER: mount unsupported fs version %d\n", hmp->version); error = ERANGE; goto done; } /* * The undo_rec_limit limits the size of flush groups to avoid * blowing out the UNDO FIFO. This calculation is typically in * the tens of thousands and is designed primarily when small * HAMMER filesystems are created. */ hmp->undo_rec_limit = hammer_undo_max(hmp) / 8192 + 100; if (hammer_debug_general & 0x0001) kprintf("HAMMER: undo_rec_limit %d\n", hmp->undo_rec_limit); /* * NOTE: Recover stage1 not only handles meta-data recovery, it * also sets hmp->undo_seqno for HAMMER VERSION 4+ filesystems. */ error = hammer_recover_stage1(hmp, rootvol); if (error) { kprintf("Failed to recover HAMMER filesystem on mount\n"); goto done; } /* * Finish setup now that we have a good root volume. * * The top 16 bits of fsid.val[1] is a pfs id. */ ksnprintf(mp->mnt_stat.f_mntfromname, sizeof(mp->mnt_stat.f_mntfromname), "%s", rootvol->ondisk->vol_name); mp->mnt_stat.f_fsid.val[0] = crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8); mp->mnt_stat.f_fsid.val[1] = crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8); mp->mnt_stat.f_fsid.val[1] &= 0x0000FFFF; mp->mnt_vstat.f_fsid_uuid = rootvol->ondisk->vol_fsid; mp->mnt_vstat.f_fsid = crc32(&mp->mnt_vstat.f_fsid_uuid, sizeof(mp->mnt_vstat.f_fsid_uuid)); /* * Certain often-modified fields in the root volume are cached in * the hammer_mount structure so we do not have to generate lots * of little UNDO structures for them. * * Recopy after recovery. This also has the side effect of * setting our cached undo FIFO's first_offset, which serves to * placemark the FIFO start for the NEXT flush cycle while the * on-disk first_offset represents the LAST flush cycle. */ hmp->next_tid = rootvol->ondisk->vol0_next_tid; hmp->flush_tid1 = hmp->next_tid; hmp->flush_tid2 = hmp->next_tid; bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks; hammer_flusher_create(hmp); /* * Locate the root directory using the root cluster's B-Tree as a * starting point. The root directory uses an obj_id of 1. * * FUTURE: Leave the root directory cached referenced but unlocked * in hmp->rootvp (need to flush it on unmount). */ error = hammer_vfs_vget(mp, NULL, 1, &rootvp); if (error) goto done; vput(rootvp); /*vn_unlock(hmp->rootvp);*/ if (hmp->ronly == 0) error = hammer_recover_stage2(hmp, rootvol); /* * If the stage2 recovery fails be sure to clean out all cached * vnodes before throwing away the mount structure or bad things * will happen. */ if (error) vflush(mp, 0, 0); done: if ((mp->mnt_flag & MNT_UPDATE) == 0) { /* New mount */ /* Populate info for mount point (NULL pad)*/ bzero(mp->mnt_stat.f_mntonname, MNAMELEN); size_t size; if (mntpt) { copyinstr(mntpt, mp->mnt_stat.f_mntonname, MNAMELEN -1, &size); } else { /* Root mount */ mp->mnt_stat.f_mntonname[0] = '/'; } } (void)VFS_STATFS(mp, &mp->mnt_stat, cred); hammer_rel_volume(rootvol, 0); failed: /* * Cleanup and return. */ if (error) { /* called with fs_token held */ hammer_free_hmp(mp); } else { lwkt_reltoken(&hmp->fs_token); } return (error); }
static void kobj_init_token(void *arg) { lwkt_token_init(&kobj_token, "kobj"); }