/* * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE * with an obj_type of HAMMER2_OBJTYPE_HARDLINK. This routine will gobble * the *chainp and return a new locked *chainp representing the file target * (the original *chainp will be unlocked). * * When a match is found the chain representing the original HARDLINK * will be returned in *ochainp with a ref, but not locked. * * When no match is found *chainp is set to NULL and EIO is returned. * (*ochainp) will still be set to the original chain with a ref but not * locked. */ int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp, hammer2_chain_t **ochainp) { hammer2_chain_t *chain = *chainp; hammer2_chain_t *parent; hammer2_inode_t *ip; hammer2_inode_t *pip; hammer2_key_t key_dummy; hammer2_key_t lhc; int cache_index = -1; pip = dip; hammer2_inode_ref(pip); /* for loop */ hammer2_chain_ref(chain); /* for (*ochainp) */ *ochainp = chain; /* * Locate the hardlink. pip is referenced and not locked, * ipp. * * chain is reused. */ lhc = chain->data->ipdata.inum; hammer2_chain_unlock(chain); chain = NULL; while ((ip = pip) != NULL) { parent = hammer2_inode_lock_ex(ip); hammer2_inode_drop(ip); /* loop */ KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE); chain = hammer2_chain_lookup(&parent, &key_dummy, lhc, lhc, &cache_index, 0); hammer2_chain_lookup_done(parent); /* discard parent */ if (chain) break; pip = ip->pip; /* safe, ip held locked */ if (pip) hammer2_inode_ref(pip); /* loop */ hammer2_inode_unlock_ex(ip, NULL); } /* * chain is locked, ip is locked. Unlock ip, return the locked * chain. *ipp is already set w/a ref count and not locked. * * (parent is already unlocked). */ if (ip) hammer2_inode_unlock_ex(ip, NULL); *chainp = chain; if (chain) { KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); /* already locked */ return (0); } else { return (EIO); } }
/* * Repoint ip->chain to nchain. Caller must hold the inode exclusively * locked. * * ip->chain is set to nchain. The prior chain in ip->chain is dropped * and nchain is ref'd. */ void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, hammer2_chain_t *nchain) { hammer2_chain_t *ochain; hammer2_inode_t *opip; /* * Repoint ip->chain if requested. */ ochain = ip->chain; ip->chain = nchain; if (nchain) hammer2_chain_ref(nchain); if (ochain) hammer2_chain_drop(ochain); /* * Repoint ip->pip if requested (non-NULL pip). */ if (pip && ip->pip != pip) { opip = ip->pip; hammer2_inode_ref(pip); ip->pip = pip; if (opip) hammer2_inode_drop(opip); } }
/* * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. * * Shared locks are especially sensitive to having too many shared * lock counts (from the same thread) on certain paths which might * need to upgrade them. Only one count of a shared lock can be * upgraded. */ hammer2_chain_t * hammer2_inode_lock_sh(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_inode_ref(ip); for (;;) { ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); chain = ip->chain; KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); /* * Resolve duplication races, resolve hardlinks by giving * up and cycling an exclusive lock. */ if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 && chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK) { break; } hammer2_chain_unlock(chain); ccms_thread_unlock(&ip->topo_cst); chain = hammer2_inode_lock_ex(ip); hammer2_inode_unlock_ex(ip, chain); } return (chain); }
/* * Allocate a XOP request. * * Once allocated a XOP request can be started, collected, and retired, * and can be retired early if desired. * * NOTE: Fifo indices might not be zero but ri == wi on objcache_get(). */ void * hammer2_xop_alloc(hammer2_inode_t *ip, int flags) { hammer2_xop_t *xop; xop = objcache_get(cache_xops, M_WAITOK); KKASSERT(xop->head.cluster.array[0].chain == NULL); xop->head.ip = ip; xop->head.func = NULL; xop->head.state = 0; xop->head.error = 0; xop->head.collect_key = 0; if (flags & HAMMER2_XOP_MODIFYING) xop->head.mtid = hammer2_trans_sub(ip->pmp); else xop->head.mtid = 0; xop->head.cluster.nchains = ip->cluster.nchains; xop->head.cluster.pmp = ip->pmp; xop->head.cluster.flags = HAMMER2_CLUSTER_LOCKED; /* * run_mask - Active thread (or frontend) associated with XOP */ xop->head.run_mask = HAMMER2_XOPMASK_VOP; hammer2_inode_ref(ip); return xop; }
/* * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. * * Shared locks are especially sensitive to having too many shared * lock counts (from the same thread) on certain paths which might * need to upgrade them. Only one count of a shared lock can be * upgraded. */ hammer2_chain_t * hammer2_inode_lock_sh(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_inode_ref(ip); again: ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); chain = ip->chain; KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); /* * Resolve duplication races */ if (hammer2_chain_refactor_test(chain, 1)) { hammer2_chain_unlock(chain); ccms_thread_unlock(&ip->topo_cst); chain = hammer2_inode_lock_ex(ip); hammer2_inode_unlock_ex(ip, chain); goto again; } return (chain); }
/* * Find the directory common to both fdip and tdip, hold and return * its inode. */ hammer2_inode_t * hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) { hammer2_inode_t *scan1; hammer2_inode_t *scan2; /* * We used to have a depth field but it complicated matters too * much for directory renames. So now its ugly. Check for * simple cases before giving up and doing it the expensive way. * * XXX need a bottom-up topology stability lock */ if (fdip == tdip || fdip == tdip->pip) { hammer2_inode_ref(fdip); return(fdip); } if (fdip->pip == tdip) { hammer2_inode_ref(tdip); return(tdip); } /* * XXX not MPSAFE */ for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { scan2 = tdip; while (scan2->pmp == tdip->pmp) { if (scan1 == scan2) { hammer2_inode_ref(scan1); return(scan1); } scan2 = scan2->pip; if (scan2 == NULL) break; } } panic("hammer2_inode_common_parent: no common parent %p %p\n", fdip, tdip); /* NOT REACHED */ return(NULL); }
/* * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. * * Shared locks are especially sensitive to having too many shared * lock counts (from the same thread) on certain paths which might * need to upgrade them. Only one count of a shared lock can be * upgraded. */ hammer2_chain_t * hammer2_inode_lock_sh(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); chain = ip->chain; KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(ip->hmp, chain, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); return (chain); }
/* * Lookup an inode by inode number */ hammer2_inode_t * hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum) { hammer2_inode_t *ip; if (pmp) { spin_lock(&pmp->inum_spin); ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); if (ip) hammer2_inode_ref(ip); spin_unlock(&pmp->inum_spin); } else { ip = NULL; } return(ip); }
/* * Lock an inode, with SYNCQ semantics. * * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of * flags for options: * * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The * inode locking function will automatically set the RDONLY flag. * shared locks are not subject to SYNCQ semantics, exclusive locks * are. * * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data. * Most front-end inode locks do. * * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that * the inode data be resolved. This is used by the syncthr because * it can run on an unresolved/out-of-sync cluster, and also by the * vnode reclamation code to avoid unnecessary I/O (particularly when * disposing of hundreds of thousands of cached vnodes). * * This function, along with lock4, has SYNCQ semantics. If the inode being * locked is on the SYNCQ, that is it has been staged by the syncer, we must * block until the operation is complete (even if we can lock the inode). In * order to reduce the stall time, we re-order the inode to the front of the * pmp->syncq prior to blocking. This reordering VERY significantly improves * performance. * * The inode locking function locks the inode itself, resolves any stale * chains in the inode's cluster, and allocates a fresh copy of the * cluster with 1 ref and all the underlying chains locked. * * ip->cluster will be stable while the inode is locked. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ void hammer2_inode_lock(hammer2_inode_t *ip, int how) { hammer2_pfs_t *pmp; hammer2_inode_ref(ip); pmp = ip->pmp; /* * Inode structure mutex - Shared lock */ if (how & HAMMER2_RESOLVE_SHARED) { hammer2_mtx_sh(&ip->lock); return; } /* * Inode structure mutex - Exclusive lock * * An exclusive lock (if not recursive) must wait for inodes on * SYNCQ to flush first, to ensure that meta-data dependencies such * as the nlink count and related directory entries are not split * across flushes. * * If the vnode is locked by the current thread it must be unlocked * across the tsleep() to avoid a deadlock. */ hammer2_mtx_ex(&ip->lock); if (hammer2_mtx_refs(&ip->lock) > 1) return; while ((ip->flags & HAMMER2_INODE_SYNCQ) && pmp) { hammer2_spin_ex(&pmp->list_spin); if (ip->flags & HAMMER2_INODE_SYNCQ) { tsleep_interlock(&ip->flags, 0); atomic_set_int(&ip->flags, HAMMER2_INODE_SYNCQ_WAKEUP); TAILQ_REMOVE(&pmp->syncq, ip, entry); TAILQ_INSERT_HEAD(&pmp->syncq, ip, entry); hammer2_spin_unex(&pmp->list_spin); hammer2_mtx_unlock(&ip->lock); tsleep(&ip->flags, PINTERLOCKED, "h2sync", 0); hammer2_mtx_ex(&ip->lock); continue; } hammer2_spin_unex(&pmp->list_spin); break; } }
/* * Lookup an inode by inode number */ hammer2_inode_t * hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum) { hammer2_inode_t *ip; KKASSERT(pmp); if (pmp->spmp_hmp) { ip = NULL; } else { hammer2_spin_ex(&pmp->inum_spin); ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); if (ip) hammer2_inode_ref(ip); hammer2_spin_unex(&pmp->inum_spin); } return(ip); }
/* * HAMMER2 inode locks * * HAMMER2 offers shared locks and exclusive locks on inodes. * * An inode's ip->chain pointer is resolved and stable while an inode is * locked, and can be cleaned out at any time (become NULL) when an inode * is not locked. * * The underlying chain is also locked and returned. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ hammer2_chain_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); /* * ip->chain fixup. Certain duplications used to move inodes * into indirect blocks (for example) can cause ip->chain to * become stale. */ again: chain = ip->chain; if (hammer2_chain_refactor_test(chain, 1)) { spin_lock(&chain->core->cst.spin); while (hammer2_chain_refactor_test(chain, 1)) chain = chain->next_parent; if (ip->chain != chain) { hammer2_chain_ref(chain); spin_unlock(&chain->core->cst.spin); hammer2_inode_repoint(ip, NULL, chain); hammer2_chain_drop(chain); } else { spin_unlock(&chain->core->cst.spin); } } KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); /* * Resolve duplication races */ if (hammer2_chain_refactor_test(chain, 1)) { hammer2_chain_unlock(chain); goto again; } return (chain); }
/* * When presented with a (*chainp) representing an inode of type * OBJTYPE_HARDLINK this code will save the original inode (with a ref) * in (*ipp), and then locate the hidden hardlink target in (dip) or * any parent directory above (dip). The locked (*chainp) is replaced * with a new locked (*chainp) representing the hardlink target. */ int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp, hammer2_inode_t **ipp) { hammer2_mount_t *hmp = dip->hmp; hammer2_chain_t *chain = *chainp; hammer2_chain_t *parent; hammer2_inode_t *pip; hammer2_key_t lhc; *ipp = chain->u.ip; hammer2_inode_ref(chain->u.ip); lhc = chain->u.ip->ip_data.inum; hammer2_inode_unlock_ex(chain->u.ip); pip = chain->u.ip->pip; chain = NULL; while (pip) { parent = &pip->chain; KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE); hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0); hammer2_chain_unlock(hmp, parent); if (chain) break; pip = pip->pip; } *chainp = chain; if (chain) { KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); /* already locked */ return (0); } else { return (EIO); } }
/* * HAMMER2 inode locks * * HAMMER2 offers shared locks and exclusive locks on inodes. * * An inode's ip->chain pointer is resolved and stable while an inode is * locked, and can be cleaned out at any time (become NULL) when an inode * is not locked. * * This function handles duplication races and hardlink replacement races * which can cause ip's cached chain to become stale. * * The underlying chain is also locked and returned. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ hammer2_chain_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_chain_t *ochain; hammer2_chain_core_t *core; int error; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); chain = ip->chain; core = chain->core; for (;;) { if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { spin_lock(&core->cst.spin); while (chain->flags & HAMMER2_CHAIN_DUPLICATED) chain = TAILQ_NEXT(chain, core_entry); hammer2_chain_ref(chain); spin_unlock(&core->cst.spin); hammer2_inode_repoint(ip, NULL, chain); hammer2_chain_drop(chain); } hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) break; hammer2_chain_unlock(chain); } if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK && (chain->flags & HAMMER2_CHAIN_DELETED) == 0) { error = hammer2_hardlink_find(ip->pip, &chain, &ochain); hammer2_chain_drop(ochain); KKASSERT(error == 0); /* XXX error handling */ } return (chain); }
/* * Each out of sync node sync-thread must issue an all-nodes XOP scan of * the inode. This creates a multiplication effect since the XOP scan itself * issues to all nodes. However, this is the only way we can safely * synchronize nodes which might have disparate I/O bandwidths and the only * way we can safely deal with stalled nodes. */ static int hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip, hammer2_deferred_list_t *list) { hammer2_xop_scanall_t *xop; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_pfs_t *pmp; hammer2_key_t key_next; hammer2_tid_t sync_tid; int cache_index = -1; int needrescan; int wantupdate; int error; int nerror; int idx; int n; pmp = ip->pmp; idx = thr->clindex; /* cluster node we are responsible for */ needrescan = 0; wantupdate = 0; if (ip->cluster.focus == NULL) return (EINPROGRESS); sync_tid = ip->cluster.focus->bref.modify_tid; #if 0 /* * Nothing to do if all slaves are synchronized. * Nothing to do if cluster not authoritatively readable. */ if (pmp->cluster_flags & HAMMER2_CLUSTER_SSYNCED) return(0); if ((pmp->cluster_flags & HAMMER2_CLUSTER_RDHARD) == 0) return(HAMMER2_ERROR_INCOMPLETE); #endif error = 0; /* * The inode is left unlocked during the scan. Issue a XOP * that does *not* include our cluster index to iterate * properly synchronized elements and resolve our cluster index * against it. */ hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); xop->key_beg = HAMMER2_KEY_MIN; xop->key_end = HAMMER2_KEY_MAX; hammer2_xop_start_except(&xop->head, hammer2_xop_scanall, idx); parent = hammer2_inode_chain(ip, idx, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); if (parent->bref.modify_tid != sync_tid) wantupdate = 1; hammer2_inode_unlock(ip); chain = hammer2_chain_lookup(&parent, &key_next, HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NODIRECT | HAMMER2_LOOKUP_NODATA); error = hammer2_xop_collect(&xop->head, 0); kprintf("XOP_INITIAL xop=%p clindex %d on %s\n", xop, thr->clindex, pmp->pfs_names[thr->clindex]); for (;;) { /* * We are done if our scan is done and the XOP scan is done. * We are done if the XOP scan failed (that is, we don't * have authoritative data to synchronize with). */ int advance_local = 0; int advance_xop = 0; int dodefer = 0; hammer2_chain_t *focus; kprintf("loop xop=%p chain[1]=%p lockcnt=%d\n", xop, xop->head.cluster.array[1].chain, (xop->head.cluster.array[1].chain ? xop->head.cluster.array[1].chain->lockcnt : -1) ); if (chain == NULL && error == ENOENT) break; if (error && error != ENOENT) break; /* * Compare */ if (chain && error == ENOENT) { /* * If we have local chains but the XOP scan is done, * the chains need to be deleted. */ n = -1; focus = NULL; } else if (chain == NULL) { /* * If our local scan is done but the XOP scan is not, * we need to create the missing chain(s). */ n = 1; focus = xop->head.cluster.focus; } else { /* * Otherwise compare to determine the action * needed. */ focus = xop->head.cluster.focus; n = hammer2_chain_cmp(chain, focus); } /* * Take action based on comparison results. */ if (n < 0) { /* * Delete extranious local data. This will * automatically advance the chain. */ nerror = hammer2_sync_destroy(thr, &parent, &chain, 0, idx); } else if (n == 0 && chain->bref.modify_tid != focus->bref.modify_tid) { /* * Matching key but local data or meta-data requires * updating. If we will recurse, we still need to * update to compatible content first but we do not * synchronize modify_tid until the entire recursion * has completed successfully. */ if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { nerror = hammer2_sync_replace( thr, parent, chain, 0, idx, focus); dodefer = 1; } else { nerror = hammer2_sync_replace( thr, parent, chain, focus->bref.modify_tid, idx, focus); } } else if (n == 0) { /* * 100% match, advance both */ advance_local = 1; advance_xop = 1; nerror = 0; } else if (n > 0) { /* * Insert missing local data. * * If we will recurse, we still need to update to * compatible content first but we do not synchronize * modify_tid until the entire recursion has * completed successfully. */ if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { nerror = hammer2_sync_insert( thr, &parent, &chain, 0, idx, focus); dodefer = 2; } else { nerror = hammer2_sync_insert( thr, &parent, &chain, focus->bref.modify_tid, idx, focus); } advance_local = 1; advance_xop = 1; } /* * We cannot recurse depth-first because the XOP is still * running in node threads for this scan. Create a placemarker * by obtaining and record the hammer2_inode. * * We excluded our node from the XOP so we must temporarily * add it to xop->head.cluster so it is properly incorporated * into the inode. * * The deferral is pushed onto a LIFO list for bottom-up * synchronization. */ if (error == 0 && dodefer) { hammer2_inode_t *nip; hammer2_deferred_ip_t *defer; KKASSERT(focus->bref.type == HAMMER2_BREF_TYPE_INODE); defer = kmalloc(sizeof(*defer), M_HAMMER2, M_WAITOK | M_ZERO); KKASSERT(xop->head.cluster.array[idx].chain == NULL); xop->head.cluster.array[idx].flags = HAMMER2_CITEM_INVALID; xop->head.cluster.array[idx].chain = chain; nip = hammer2_inode_get(pmp, ip, &xop->head.cluster, idx); xop->head.cluster.array[idx].chain = NULL; hammer2_inode_ref(nip); hammer2_inode_unlock(nip); defer->next = list->base; defer->ip = nip; list->base = defer; ++list->count; needrescan = 1; } /* * If at least one deferral was added and the deferral * list has grown too large, stop adding more. This * will trigger an EAGAIN return. */ if (needrescan && list->count > 1000) break; /* * Advancements for iteration. */ if (advance_xop) { error = hammer2_xop_collect(&xop->head, 0); } if (advance_local) { chain = hammer2_chain_next(&parent, chain, &key_next, key_next, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NODIRECT | HAMMER2_LOOKUP_NODATA); } } hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } /* * If we added deferrals we want the caller to synchronize them * and then call us again. * * NOTE: In this situation we do not yet want to synchronize our * inode, setting the error code also has that effect. */ if (error == 0 && needrescan) error = EAGAIN; /* * If no error occurred and work was performed, synchronize the * inode meta-data itself. * * XXX inode lock was lost */ if (error == 0 && wantupdate) { hammer2_xop_ipcluster_t *xop2; hammer2_chain_t *focus; xop2 = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); hammer2_xop_start_except(&xop2->head, hammer2_xop_ipcluster, idx); error = hammer2_xop_collect(&xop2->head, 0); if (error == 0) { focus = xop2->head.cluster.focus; kprintf("syncthr: update inode %p (%s)\n", focus, (focus ? (char *)focus->data->ipdata.filename : "?")); chain = hammer2_inode_chain_and_parent(ip, idx, &parent, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); KKASSERT(parent != NULL); nerror = hammer2_sync_replace( thr, parent, chain, sync_tid, idx, focus); hammer2_chain_unlock(chain); hammer2_chain_drop(chain); hammer2_chain_unlock(parent); hammer2_chain_drop(parent); /* XXX */ } hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP); } return error; }
/* * Get the vnode associated with the given inode, allocating the vnode if * necessary. The vnode will be returned exclusively locked. * * The caller must lock the inode (shared or exclusive). * * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim * races. */ struct vnode * hammer2_igetv(hammer2_inode_t *ip, int *errorp) { hammer2_inode_data_t *ipdata; hammer2_pfsmount_t *pmp; struct vnode *vp; ccms_state_t ostate; pmp = ip->pmp; KKASSERT(pmp != NULL); *errorp = 0; ipdata = &ip->chain->data->ipdata; for (;;) { /* * Attempt to reuse an existing vnode assignment. It is * possible to race a reclaim so the vget() may fail. The * inode must be unlocked during the vget() to avoid a * deadlock against a reclaim. */ vp = ip->vp; if (vp) { /* * Inode must be unlocked during the vget() to avoid * possible deadlocks, but leave the ip ref intact. * * vnode is held to prevent destruction during the * vget(). The vget() can still fail if we lost * a reclaim race on the vnode. */ vhold(vp); ostate = hammer2_inode_lock_temp_release(ip); if (vget(vp, LK_EXCLUSIVE)) { vdrop(vp); hammer2_inode_lock_temp_restore(ip, ostate); continue; } hammer2_inode_lock_temp_restore(ip, ostate); vdrop(vp); /* vp still locked and ref from vget */ if (ip->vp != vp) { kprintf("hammer2: igetv race %p/%p\n", ip->vp, vp); vput(vp); continue; } *errorp = 0; break; } /* * No vnode exists, allocate a new vnode. Beware of * allocation races. This function will return an * exclusively locked and referenced vnode. */ *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0); if (*errorp) { kprintf("hammer2: igetv getnewvnode failed %d\n", *errorp); vp = NULL; break; } /* * Lock the inode and check for an allocation race. */ ostate = hammer2_inode_lock_upgrade(ip); if (ip->vp != NULL) { vp->v_type = VBAD; vx_put(vp); hammer2_inode_lock_downgrade(ip, ostate); continue; } switch (ipdata->type) { case HAMMER2_OBJTYPE_DIRECTORY: vp->v_type = VDIR; break; case HAMMER2_OBJTYPE_REGFILE: vp->v_type = VREG; vinitvmio(vp, ipdata->size, HAMMER2_LBUFSIZE, (int)ipdata->size & HAMMER2_LBUFMASK); break; case HAMMER2_OBJTYPE_SOFTLINK: /* * XXX for now we are using the generic file_read * and file_write code so we need a buffer cache * association. */ vp->v_type = VLNK; vinitvmio(vp, ipdata->size, HAMMER2_LBUFSIZE, (int)ipdata->size & HAMMER2_LBUFMASK); break; case HAMMER2_OBJTYPE_CDEV: vp->v_type = VCHR; /* fall through */ case HAMMER2_OBJTYPE_BDEV: vp->v_ops = &pmp->mp->mnt_vn_spec_ops; if (ipdata->type != HAMMER2_OBJTYPE_CDEV) vp->v_type = VBLK; addaliasu(vp, ipdata->rmajor, ipdata->rminor); break; case HAMMER2_OBJTYPE_FIFO: vp->v_type = VFIFO; vp->v_ops = &pmp->mp->mnt_vn_fifo_ops; break; default: panic("hammer2: unhandled objtype %d", ipdata->type); break; } if (ip == pmp->iroot) vsetflags(vp, VROOT); vp->v_data = ip; ip->vp = vp; hammer2_inode_ref(ip); /* vp association */ hammer2_inode_lock_downgrade(ip, ostate); break; } /* * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0. */ if (hammer2_debug & 0x0002) { kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n", vp, vp->v_refcnt, vp->v_auxrefs); } return (vp); }
/* * The passed-in chain must be locked and the returned inode will also be * locked. This routine typically locates or allocates the inode, assigns * ip->chain (adding a ref to chain if necessary), and returns the inode. * * The hammer2_inode structure regulates the interface between the high level * kernel VNOPS API and the filesystem backend (the chains). * * WARNING! This routine sucks up the chain's lock (makes it part of the * inode lock from the point of view of the inode lock API), * so callers need to be careful. * * WARNING! The mount code is allowed to pass dip == NULL for iroot and * is allowed to pass pmp == NULL and dip == NULL for sroot. */ hammer2_inode_t * hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, hammer2_chain_t *chain) { hammer2_inode_t *nip; KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); /* * Interlocked lookup/ref of the inode. This code is only needed * when looking up inodes with nlinks != 0 (TODO: optimize out * otherwise and test for duplicates). */ again: for (;;) { nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum); if (nip == NULL) break; ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { /* race */ ccms_thread_unlock(&nip->topo_cst); hammer2_inode_drop(nip); continue; } if (nip->chain != chain) hammer2_inode_repoint(nip, NULL, chain); /* * Consolidated nip/nip->chain is locked (chain locked * by caller). */ return nip; } /* * We couldn't find the inode number, create a new inode. */ if (pmp) { nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO); atomic_add_long(&pmp->inmem_inodes, 1); hammer2_chain_memory_inc(pmp); hammer2_chain_memory_wakeup(pmp); } else { nip = kmalloc(sizeof(*nip), M_HAMMER2, M_WAITOK | M_ZERO); nip->flags = HAMMER2_INODE_SROOT; } nip->inum = chain->data->ipdata.inum; nip->size = chain->data->ipdata.size; nip->mtime = chain->data->ipdata.mtime; hammer2_inode_repoint(nip, NULL, chain); nip->pip = dip; /* can be NULL */ if (dip) hammer2_inode_ref(dip); /* ref dip for nip->pip */ nip->pmp = pmp; /* * ref and lock on nip gives it state compatible to after a * hammer2_inode_lock_ex() call. */ nip->refs = 1; ccms_cst_init(&nip->topo_cst, &nip->chain); ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); /* combination of thread lock and chain lock == inode lock */ /* * Attempt to add the inode. If it fails we raced another inode * get. Undo all the work and try again. */ if (pmp) { spin_lock(&pmp->inum_spin); if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) { spin_unlock(&pmp->inum_spin); ccms_thread_unlock(&nip->topo_cst); hammer2_inode_drop(nip); goto again; } atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE); spin_unlock(&pmp->inum_spin); } return (nip); }
/* * Caller holds pmp->list_spin and the inode should be locked. Merge ip * with the specified depend. * * If the ip is on SYNCQ it stays there and (void *)-1 is returned, indicating * that successive calls must ensure the ip is on a pass2 depend (or they are * all SYNCQ). If the passed-in depend is not NULL and not (void *)-1 then * we can set pass2 on it and return. * * If the ip is not on SYNCQ it is merged with the passed-in depend, creating * a self-depend if necessary, and depend->pass2 is set according * to the PASS2 flag. SIDEQ is set. */ static __noinline hammer2_depend_t * hammer2_inode_setdepend_locked(hammer2_inode_t *ip, hammer2_depend_t *depend) { hammer2_pfs_t *pmp = ip->pmp; hammer2_depend_t *dtmp; hammer2_inode_t *iptmp; /* * If ip is SYNCQ its entry is used for the syncq list and it will * no longer be associated with a dependency. Merging this status * with a passed-in depend implies PASS2. */ if (ip->flags & HAMMER2_INODE_SYNCQ) { if (depend == (void *)-1 || depend == NULL) { return ((void *)-1); } depend->pass2 = 1; hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN); return depend; } /* * If ip is already SIDEQ, merge ip->depend into the passed-in depend. * If it is not, associate the ip with the passed-in depend, creating * a single-entry dependency using depend_static if necessary. * * NOTE: The use of ip->depend_static always requires that the * specific ip containing the structure is part of that * particular depend_static's dependency group. */ if (ip->flags & HAMMER2_INODE_SIDEQ) { /* * Merge ip->depend with the passed-in depend. If the * passed-in depend is not a special case, all ips associated * with ip->depend (including the original ip) must be moved * to the passed-in depend. */ if (depend == NULL) { depend = ip->depend; } else if (depend == (void *)-1) { depend = ip->depend; depend->pass2 = 1; } else if (depend != ip->depend) { #ifdef INVARIANTS int sanitychk = 0; #endif dtmp = ip->depend; while ((iptmp = TAILQ_FIRST(&dtmp->sideq)) != NULL) { #ifdef INVARIANTS if (iptmp == ip) sanitychk = 1; #endif TAILQ_REMOVE(&dtmp->sideq, iptmp, entry); TAILQ_INSERT_TAIL(&depend->sideq, iptmp, entry); iptmp->depend = depend; } KKASSERT(sanitychk == 1); depend->count += dtmp->count; depend->pass2 |= dtmp->pass2; TAILQ_REMOVE(&pmp->depq, dtmp, entry); dtmp->count = 0; dtmp->pass2 = 0; } } else { /* * Add ip to the sideq, creating a self-dependency if * necessary. */ hammer2_inode_ref(ip); atomic_set_int(&ip->flags, HAMMER2_INODE_SIDEQ); if (depend == NULL) { depend = &ip->depend_static; TAILQ_INSERT_TAIL(&pmp->depq, depend, entry); } else if (depend == (void *)-1) { depend = &ip->depend_static; depend->pass2 = 1; TAILQ_INSERT_TAIL(&pmp->depq, depend, entry); } /* else add ip to passed-in depend */ TAILQ_INSERT_TAIL(&depend->sideq, ip, entry); ip->depend = depend; ++depend->count; ++pmp->sideq_count; } if (ip->flags & HAMMER2_INODE_SYNCQ_PASS2) depend->pass2 = 1; if (depend->pass2) hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN); return depend; }
/* * Exclusively lock up to four inodes, in order, with SYNCQ semantics. * ip1 and ip2 must not be NULL. ip3 and ip4 may be NULL, but if ip3 is * NULL then ip4 must also be NULL. * * This creates a dependency between up to four inodes. */ void hammer2_inode_lock4(hammer2_inode_t *ip1, hammer2_inode_t *ip2, hammer2_inode_t *ip3, hammer2_inode_t *ip4) { hammer2_inode_t *ips[4]; hammer2_inode_t *iptmp; hammer2_inode_t *ipslp; hammer2_depend_t *depend; hammer2_pfs_t *pmp; size_t count; size_t i; pmp = ip1->pmp; /* may be NULL */ KKASSERT(pmp == ip2->pmp); ips[0] = ip1; ips[1] = ip2; if (ip3 == NULL) { count = 2; } else if (ip4 == NULL) { count = 3; ips[2] = ip3; KKASSERT(pmp == ip3->pmp); } else { count = 4; ips[2] = ip3; ips[3] = ip4; KKASSERT(pmp == ip3->pmp); KKASSERT(pmp == ip4->pmp); } for (i = 0; i < count; ++i) hammer2_inode_ref(ips[i]); restart: /* * Lock the inodes in order */ for (i = 0; i < count; ++i) { hammer2_mtx_ex(&ips[i]->lock); } /* * Associate dependencies, record the first inode found on SYNCQ * (operation is allowed to proceed for inodes on PASS2) for our * sleep operation, this inode is theoretically the last one sync'd * in the sequence. * * All inodes found on SYNCQ are moved to the head of the syncq * to reduce stalls. */ hammer2_spin_ex(&pmp->list_spin); depend = NULL; ipslp = NULL; for (i = 0; i < count; ++i) { iptmp = ips[i]; depend = hammer2_inode_setdepend_locked(iptmp, depend); if (iptmp->flags & HAMMER2_INODE_SYNCQ) { TAILQ_REMOVE(&pmp->syncq, iptmp, entry); TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry); if (ipslp == NULL) ipslp = iptmp; } } hammer2_spin_unex(&pmp->list_spin); /* * Block and retry if any of the inodes are on SYNCQ. It is * important that we allow the operation to proceed in the * PASS2 case, to avoid deadlocking against the vnode. */ if (ipslp) { for (i = 0; i < count; ++i) hammer2_mtx_unlock(&ips[i]->lock); tsleep(&ipslp->flags, 0, "h2sync", 2); goto restart; } }
void hammer2_xop_setip2(hammer2_xop_head_t *xop, hammer2_inode_t *ip2) { xop->ip2 = ip2; hammer2_inode_ref(ip2); }
void hammer2_xop_setip3(hammer2_xop_head_t *xop, hammer2_inode_t *ip3) { xop->ip3 = ip3; hammer2_inode_ref(ip3); }
/* * This is called from the mount code to initialize pmp->ihidden */ void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) { hammer2_trans_t trans; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_chain_t *scan; hammer2_inode_data_t *ipdata; hammer2_key_t key_dummy; hammer2_key_t key_next; int cache_index; int error; int count; if (pmp->ihidden) return; /* * Find the hidden directory */ bzero(&key_dummy, sizeof(key_dummy)); hammer2_trans_init(&trans, pmp, NULL, 0); parent = hammer2_inode_lock_ex(pmp->iroot); chain = hammer2_chain_lookup(&parent, &key_dummy, HAMMER2_INODE_HIDDENDIR, HAMMER2_INODE_HIDDENDIR, &cache_index, 0); if (chain) { pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain); hammer2_inode_ref(pmp->ihidden); /* * Remove any unlinked files which were left open as-of * any system crash. */ count = 0; scan = hammer2_chain_lookup(&chain, &key_next, 0, HAMMER2_MAX_TID, &cache_index, HAMMER2_LOOKUP_NODATA); while (scan) { if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) { hammer2_chain_delete(&trans, scan, 0); ++count; } scan = hammer2_chain_next(&chain, scan, &key_next, 0, HAMMER2_MAX_TID, &cache_index, HAMMER2_LOOKUP_NODATA); } hammer2_inode_unlock_ex(pmp->ihidden, chain); hammer2_inode_unlock_ex(pmp->iroot, parent); hammer2_trans_done(&trans); kprintf("hammer2: PFS loaded hidden dir, " "removed %d dead entries\n", count); return; } /* * Create the hidden directory */ error = hammer2_chain_create(&trans, &parent, &chain, HAMMER2_INODE_HIDDENDIR, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); hammer2_inode_unlock_ex(pmp->iroot, parent); hammer2_chain_modify(&trans, &chain, 0); ipdata = &chain->data->ipdata; ipdata->type = HAMMER2_OBJTYPE_DIRECTORY; ipdata->inum = HAMMER2_INODE_HIDDENDIR; ipdata->nlinks = 1; kprintf("hammer2: PFS root missing hidden directory, creating\n"); pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain); hammer2_inode_ref(pmp->ihidden); hammer2_inode_unlock_ex(pmp->ihidden, chain); hammer2_trans_done(&trans); }
/* * Primary management thread for an element of a node. A thread will exist * for each element requiring management. * * No management threads are needed for the SPMP or for any PMP with only * a single MASTER. * * On the SPMP - handles bulkfree and dedup operations * On a PFS - handles remastering and synchronization */ void hammer2_primary_sync_thread(void *arg) { hammer2_thread_t *thr = arg; hammer2_pfs_t *pmp; hammer2_deferred_list_t list; hammer2_deferred_ip_t *defer; int error; pmp = thr->pmp; bzero(&list, sizeof(list)); lockmgr(&thr->lk, LK_EXCLUSIVE); while ((thr->flags & HAMMER2_THREAD_STOP) == 0) { /* * Handle freeze request */ if (thr->flags & HAMMER2_THREAD_FREEZE) { atomic_set_int(&thr->flags, HAMMER2_THREAD_FROZEN); atomic_clear_int(&thr->flags, HAMMER2_THREAD_FREEZE); } /* * Force idle if frozen until unfrozen or stopped. */ if (thr->flags & HAMMER2_THREAD_FROZEN) { lksleep(&thr->flags, &thr->lk, 0, "frozen", 0); continue; } /* * Reset state on REMASTER request */ if (thr->flags & HAMMER2_THREAD_REMASTER) { atomic_clear_int(&thr->flags, HAMMER2_THREAD_REMASTER); /* reset state */ } /* * Synchronization scan. */ kprintf("sync_slaves pfs %s clindex %d\n", pmp->pfs_names[thr->clindex], thr->clindex); hammer2_trans_init(pmp, 0); hammer2_inode_ref(pmp->iroot); for (;;) { int didbreak = 0; /* XXX lock synchronize pmp->modify_tid */ error = hammer2_sync_slaves(thr, pmp->iroot, &list); if (error != EAGAIN) break; while ((defer = list.base) != NULL) { hammer2_inode_t *nip; nip = defer->ip; error = hammer2_sync_slaves(thr, nip, &list); if (error && error != EAGAIN) break; if (hammer2_thr_break(thr)) { didbreak = 1; break; } /* * If no additional defers occurred we can * remove this one, otherwrise keep it on * the list and retry once the additional * defers have completed. */ if (defer == list.base) { --list.count; list.base = defer->next; kfree(defer, M_HAMMER2); defer = NULL; /* safety */ hammer2_inode_drop(nip); } } /* * If the thread is being remastered, frozen, or * stopped, clean up any left-over deferals. */ if (didbreak || (error && error != EAGAIN)) { kprintf("didbreak\n"); while ((defer = list.base) != NULL) { --list.count; hammer2_inode_drop(defer->ip); list.base = defer->next; kfree(defer, M_HAMMER2); } if (error == 0 || error == EAGAIN) error = EINPROGRESS; break; } } hammer2_inode_drop(pmp->iroot); hammer2_trans_done(pmp); if (error) kprintf("hammer2_sync_slaves: error %d\n", error); /* * Wait for event, or 5-second poll. */ lksleep(&thr->flags, &thr->lk, 0, "h2idle", hz * 5); } thr->td = NULL; wakeup(thr); lockmgr(&thr->lk, LK_RELEASE); /* thr structure can go invalid after this point */ }
/* * Create a new PFS under the super-root */ static int hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data) { hammer2_inode_data_t *nipdata; hammer2_chain_t *nchain; hammer2_dev_t *hmp; hammer2_ioc_pfs_t *pfs; hammer2_inode_t *nip; hammer2_tid_t mtid; int error; hmp = ip->pmp->pfs_hmps[0]; if (hmp == NULL) return (EINVAL); pfs = data; nip = NULL; if (pfs->name[0] == 0) return(EINVAL); pfs->name[sizeof(pfs->name) - 1] = 0; /* ensure 0-termination */ if (hammer2_ioctl_pfs_lookup(ip, pfs) == 0) return(EEXIST); hammer2_trans_init(hmp->spmp, 0); mtid = hammer2_trans_sub(hmp->spmp); nip = hammer2_inode_create(hmp->spmp->iroot, NULL, NULL, pfs->name, strlen(pfs->name), 0, 1, HAMMER2_OBJTYPE_DIRECTORY, 0, HAMMER2_INSERT_PFSROOT, &error); if (error == 0) { hammer2_inode_modify(nip); nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_modify(nchain, mtid, 0); nipdata = &nchain->data->ipdata; nip->meta.pfs_type = pfs->pfs_type; nip->meta.pfs_subtype = pfs->pfs_subtype; nip->meta.pfs_clid = pfs->pfs_clid; nip->meta.pfs_fsid = pfs->pfs_fsid; nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; /* * Set default compression and check algorithm. This * can be changed later. * * Do not allow compression on PFS's with the special name * "boot", the boot loader can't decompress (yet). */ nip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_NEWFS_DEFAULT); nip->meta.check_algo = HAMMER2_ENC_ALGO( HAMMER2_CHECK_ISCSI32); if (strcasecmp(pfs->name, "boot") == 0) { nip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO); } #if 0 hammer2_blockref_t bref; /* XXX new PFS needs to be rescanned / added */ bref = nchain->bref; kprintf("ADD LOCAL PFS (IOCTL): %s\n", nipdata->filename); hammer2_pfsalloc(nchain, nipdata, bref.modify_tid); #endif /* XXX rescan */ hammer2_chain_unlock(nchain); hammer2_chain_drop(nchain); /* * Super-root isn't mounted, fsync it */ hammer2_inode_ref(nip); hammer2_inode_unlock(nip); hammer2_inode_fsync(nip); hammer2_inode_drop(nip); } hammer2_trans_done(hmp->spmp); return (error); }