hammer2_chain_t * hammer2_inode_chain_and_parent(hammer2_inode_t *ip, int clindex, hammer2_chain_t **parentp, int how) { hammer2_chain_t *chain; hammer2_chain_t *parent; for (;;) { hammer2_spin_sh(&ip->cluster_spin); if (clindex >= ip->cluster.nchains) chain = NULL; else chain = ip->cluster.array[clindex].chain; if (chain) { hammer2_chain_ref(chain); hammer2_spin_unsh(&ip->cluster_spin); hammer2_chain_lock(chain, how); } else { hammer2_spin_unsh(&ip->cluster_spin); } /* * Get parent, lock order must be (parent, chain). */ parent = chain->parent; if (parent) { hammer2_chain_ref(parent); hammer2_chain_unlock(chain); hammer2_chain_lock(parent, how); hammer2_chain_lock(chain, how); } if (ip->cluster.array[clindex].chain == chain && chain->parent == parent) { break; } /* * Retry */ hammer2_chain_unlock(chain); hammer2_chain_drop(chain); if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } } *parentp = parent; return chain; }
static int hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data) { hammer2_ioc_pfs_t *pfs = data; hammer2_dev_t *hmp; hammer2_chain_t *chain; hammer2_tid_t mtid; int error; if (pfs->name[0] == 0) return(EINVAL); if (pfs->name[sizeof(pfs->name)-1] != 0) return(EINVAL); hmp = ip->pmp->pfs_hmps[0]; if (hmp == NULL) return (EINVAL); hammer2_vfs_sync(ip->pmp->mp, MNT_WAIT); hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH); mtid = hammer2_trans_sub(ip->pmp); hammer2_inode_lock(ip, 0); chain = hammer2_inode_chain(ip, 0, HAMMER2_RESOLVE_ALWAYS); error = hammer2_chain_snapshot(chain, pfs, mtid); hammer2_chain_unlock(chain); hammer2_chain_drop(chain); hammer2_inode_unlock(ip); hammer2_trans_done(ip->pmp); return (error); }
/* * Repoint ip->chain to nchain. Caller must hold the inode exclusively * locked. * * ip->chain is set to nchain. The prior chain in ip->chain is dropped * and nchain is ref'd. */ void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, hammer2_chain_t *nchain) { hammer2_chain_t *ochain; hammer2_inode_t *opip; /* * Repoint ip->chain if requested. */ ochain = ip->chain; ip->chain = nchain; if (nchain) hammer2_chain_ref(nchain); if (ochain) hammer2_chain_drop(ochain); /* * Repoint ip->pip if requested (non-NULL pip). */ if (pip && ip->pip != pip) { opip = ip->pip; hammer2_inode_ref(pip); ip->pip = pip; if (opip) hammer2_inode_drop(opip); } }
/* * Locate next match or overlap under parent, replace cluster */ hammer2_cluster_t * hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, int flags) { hammer2_chain_t *chain; hammer2_key_t key_accum; hammer2_key_t key_next; int null_count; int i; key_accum = *key_nextp; null_count = 0; cluster->focus = NULL; cparent->focus = NULL; for (i = 0; i < cparent->nchains; ++i) { key_next = *key_nextp; chain = cluster->array[i]; if (chain == NULL) { if (cparent->focus == NULL) cparent->focus = cparent->array[i]; ++null_count; continue; } if (cparent->array[i] == NULL) { if (flags & HAMMER2_LOOKUP_NOLOCK) hammer2_chain_drop(chain); else hammer2_chain_unlock(chain); ++null_count; continue; } chain = hammer2_chain_next(&cparent->array[i], chain, &key_next, key_beg, key_end, &cparent->cache_index[i], flags); if (cparent->focus == NULL) cparent->focus = cparent->array[i]; cluster->array[i] = chain; if (chain == NULL) { ++null_count; } else if (cluster->focus == NULL) { cluster->focus = chain; } if (key_accum > key_next) key_accum = key_next; } if (null_count == i) { hammer2_cluster_drop(cluster); cluster = NULL; } return(cluster); }
/* * Destroy an extranious chain. * * Both *parentp and *chainp are locked shared. * * On return, *chainp will be adjusted to point to the next element in the * iteration and locked shared. */ static int hammer2_sync_destroy(hammer2_thread_t *thr, hammer2_chain_t **parentp, hammer2_chain_t **chainp, hammer2_tid_t mtid, int idx) { hammer2_chain_t *chain; hammer2_chain_t *parent; hammer2_key_t key_next; hammer2_key_t save_key; int cache_index = -1; chain = *chainp; #if HAMMER2_THREAD_DEBUG if (hammer2_debug & 1) kprintf("destroy rec %p/%p slave %d %d.%016jx\n", *parentp, chain, idx, chain->bref.type, chain->bref.key); #endif save_key = chain->bref.key; if (save_key != HAMMER2_KEY_MAX) ++save_key; /* * Try to avoid unnecessary I/O. * * XXX accounting not propagated up properly. We might have to do * a RESOLVE_MAYBE here and pass 0 for the flags. */ hammer2_chain_unlock(chain); /* relock exclusive */ hammer2_chain_unlock(*parentp); hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); hammer2_chain_delete(*parentp, chain, mtid, HAMMER2_DELETE_PERMANENT); hammer2_chain_unlock(chain); hammer2_chain_drop(chain); chain = NULL; /* safety */ hammer2_chain_unlock(*parentp); /* relock shared */ hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED | HAMMER2_RESOLVE_ALWAYS); *chainp = hammer2_chain_lookup(&parent, &key_next, save_key, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NODIRECT | HAMMER2_LOOKUP_NODATA); return 0; }
/* * HAMMER2 inode locks * * HAMMER2 offers shared locks and exclusive locks on inodes. * * An inode's ip->chain pointer is resolved and stable while an inode is * locked, and can be cleaned out at any time (become NULL) when an inode * is not locked. * * This function handles duplication races and hardlink replacement races * which can cause ip's cached chain to become stale. * * The underlying chain is also locked and returned. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ hammer2_chain_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_chain_t *ochain; hammer2_chain_core_t *core; int error; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); chain = ip->chain; core = chain->core; for (;;) { if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { spin_lock(&core->cst.spin); while (chain->flags & HAMMER2_CHAIN_DUPLICATED) chain = TAILQ_NEXT(chain, core_entry); hammer2_chain_ref(chain); spin_unlock(&core->cst.spin); hammer2_inode_repoint(ip, NULL, chain); hammer2_chain_drop(chain); } hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) break; hammer2_chain_unlock(chain); } if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK && (chain->flags & HAMMER2_CHAIN_DELETED) == 0) { error = hammer2_hardlink_find(ip->pip, &chain, &ochain); hammer2_chain_drop(ochain); KKASSERT(error == 0); /* XXX error handling */ } return (chain); }
/* * HAMMER2 inode locks * * HAMMER2 offers shared locks and exclusive locks on inodes. * * An inode's ip->chain pointer is resolved and stable while an inode is * locked, and can be cleaned out at any time (become NULL) when an inode * is not locked. * * The underlying chain is also locked and returned. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ hammer2_chain_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) { hammer2_chain_t *chain; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); /* * ip->chain fixup. Certain duplications used to move inodes * into indirect blocks (for example) can cause ip->chain to * become stale. */ again: chain = ip->chain; if (hammer2_chain_refactor_test(chain, 1)) { spin_lock(&chain->core->cst.spin); while (hammer2_chain_refactor_test(chain, 1)) chain = chain->next_parent; if (ip->chain != chain) { hammer2_chain_ref(chain); spin_unlock(&chain->core->cst.spin); hammer2_inode_repoint(ip, NULL, chain); hammer2_chain_drop(chain); } else { spin_unlock(&chain->core->cst.spin); } } KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); /* * Resolve duplication races */ if (hammer2_chain_refactor_test(chain, 1)) { hammer2_chain_unlock(chain); goto again; } return (chain); }
/* * Drop the caller's reference to the cluster. When the ref count drops to * zero this function frees the cluster and drops all underlying chains. */ void hammer2_cluster_drop(hammer2_cluster_t *cluster) { hammer2_chain_t *chain; int i; KKASSERT(cluster->refs > 0); for (i = 0; i < cluster->nchains; ++i) { chain = cluster->array[i]; if (chain) { hammer2_chain_drop(chain); if (cluster->refs == 1) cluster->array[i] = NULL; } } if (atomic_fetchadd_int(&cluster->refs, -1) == 1) { cluster->focus = NULL; kfree(cluster, M_HAMMER2); /* cluster = NULL; safety */ } }
/* * Repoint a single element from the cluster to the ip. Used by the * synchronization threads to piecemeal update inodes. Does not change * focus and requires inode to be re-locked to clean-up flags (XXX). */ void hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster, int idx) { hammer2_chain_t *ochain; hammer2_chain_t *nchain; int i; hammer2_spin_ex(&ip->cluster_spin); KKASSERT(idx < cluster->nchains); if (idx < ip->cluster.nchains) { ochain = ip->cluster.array[idx].chain; nchain = cluster->array[idx].chain; } else { ochain = NULL; nchain = cluster->array[idx].chain; for (i = ip->cluster.nchains; i <= idx; ++i) { bzero(&ip->cluster.array[i], sizeof(ip->cluster.array[i])); ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; } ip->cluster.nchains = idx + 1; } if (ochain != nchain) { /* * Make adjustments. */ ip->cluster.array[idx].chain = nchain; ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID; ip->cluster.array[idx].flags |= cluster->array[idx].flags & HAMMER2_CITEM_INVALID; } hammer2_spin_unex(&ip->cluster_spin); if (ochain != nchain) { if (nchain) hammer2_chain_ref(nchain); if (ochain) hammer2_chain_drop(ochain); } }
/* * XXX initial NULL cluster needs reworking (pass **clusterp ?) * * The raw scan function is similar to lookup/next but does not seek to a key. * Blockrefs are iterated via first_chain = (parent, NULL) and * next_chain = (parent, chain). * * The passed-in parent must be locked and its data resolved. The returned * chain will be locked. Pass chain == NULL to acquire the first sub-chain * under parent and then iterate with the passed-in chain (which this * function will unlock). */ hammer2_cluster_t * hammer2_cluster_scan(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, int flags) { hammer2_chain_t *chain; int null_count; int i; null_count = 0; for (i = 0; i < cparent->nchains; ++i) { chain = cluster->array[i]; if (chain == NULL) { ++null_count; continue; } if (cparent->array[i] == NULL) { if (flags & HAMMER2_LOOKUP_NOLOCK) hammer2_chain_drop(chain); else hammer2_chain_unlock(chain); ++null_count; continue; } chain = hammer2_chain_scan(cparent->array[i], chain, &cparent->cache_index[i], flags); cluster->array[i] = chain; if (chain == NULL) ++null_count; } if (null_count == i) { hammer2_cluster_drop(cluster); cluster = NULL; } return(cluster); }
/* * Unlink the file from the specified directory inode. The directory inode * does not need to be locked. * * isdir determines whether a directory/non-directory check should be made. * No check is made if isdir is set to -1. */ int hammer2_unlink_file(hammer2_inode_t *dip, const uint8_t *name, size_t name_len, int isdir, hammer2_inode_t *retain_ip) { hammer2_mount_t *hmp; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_chain_t *dparent; hammer2_chain_t *dchain; hammer2_key_t lhc; hammer2_inode_t *ip; hammer2_inode_t *oip; int error; uint8_t type; error = 0; oip = NULL; hmp = dip->hmp; lhc = hammer2_dirhash(name, name_len); /* * Search for the filename in the directory */ parent = &dip->chain; hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0); while (chain) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->u.ip && name_len == chain->data->ipdata.name_len && bcmp(name, chain->data->ipdata.filename, name_len) == 0) { break; } chain = hammer2_chain_next(hmp, &parent, chain, lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0); } /* * Not found or wrong type (isdir < 0 disables the type check). */ if (chain == NULL) { hammer2_chain_unlock(hmp, parent); return ENOENT; } if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) type = chain->data->ipdata.target_type; if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { error = ENOTDIR; goto done; } if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { error = EISDIR; goto done; } /* * Hardlink must be resolved. We can't hold parent locked while we * do this or we could deadlock. */ if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { hammer2_chain_unlock(hmp, parent); parent = NULL; error = hammer2_hardlink_find(dip, &chain, &oip); } /* * If this is a directory the directory must be empty. However, if * isdir < 0 we are doing a rename and the directory does not have * to be empty. * * NOTE: We check the full key range here which covers both visible * and invisible entries. Theoretically there should be no * invisible (hardlink target) entries if there are no visible * entries. */ if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) { dparent = chain; hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS); dchain = hammer2_chain_lookup(hmp, &dparent, 0, (hammer2_key_t)-1, HAMMER2_LOOKUP_NODATA); if (dchain) { hammer2_chain_unlock(hmp, dchain); hammer2_chain_unlock(hmp, dparent); error = ENOTEMPTY; goto done; } hammer2_chain_unlock(hmp, dparent); dparent = NULL; /* dchain NULL */ } /* * Ok, we can now unlink the chain. We always decrement nlinks even * if the entry can be deleted in case someone has the file open and * does an fstat(). * * The chain itself will no longer be in the on-media topology but * can still be flushed to the media (e.g. if an open descriptor * remains). When the last vnode/ip ref goes away the chain will * be marked unmodified, avoiding any further (now unnecesary) I/O. */ if (oip) { /* * If this was a hardlink we first delete the hardlink * pointer entry. */ parent = oip->chain.parent; hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_delete(hmp, parent, &oip->chain, (retain_ip == oip)); hammer2_chain_unlock(hmp, &oip->chain); hammer2_chain_unlock(hmp, parent); parent = NULL; /* * Then decrement nlinks on hardlink target. */ ip = chain->u.ip; if (ip->ip_data.nlinks == 1) { dparent = chain->parent; hammer2_chain_ref(hmp, chain); hammer2_chain_unlock(hmp, chain); hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_drop(hmp, chain); hammer2_chain_modify(hmp, chain, 0); --ip->ip_data.nlinks; hammer2_chain_delete(hmp, dparent, chain, 0); hammer2_chain_unlock(hmp, dparent); } else { hammer2_chain_modify(hmp, chain, 0); --ip->ip_data.nlinks; } } else { /* * Otherwise this was not a hardlink and we can just * remove the entry and decrement nlinks. */ ip = chain->u.ip; hammer2_chain_modify(hmp, chain, 0); --ip->ip_data.nlinks; hammer2_chain_delete(hmp, parent, chain, (retain_ip == ip)); } error = 0; done: if (chain) hammer2_chain_unlock(hmp, chain); if (parent) hammer2_chain_unlock(hmp, parent); if (oip) hammer2_chain_drop(oip->hmp, &oip->chain); return error; }
/* * (Frontend) collect a response from a running cluster op. * * Responses are fed from all appropriate nodes concurrently * and collected into a cohesive response >= collect_key. * * The collector will return the instant quorum or other requirements * are met, even if some nodes get behind or become non-responsive. * * HAMMER2_XOP_COLLECT_NOWAIT - Used to 'poll' a completed collection, * usually called synchronously from the * node XOPs for the strategy code to * fake the frontend collection and complete * the BIO as soon as possible. * * HAMMER2_XOP_SYNCHRONIZER - Reqeuest synchronization with a particular * cluster index, prevents looping when that * index is out of sync so caller can act on * the out of sync element. ESRCH and EDEADLK * can be returned if this flag is specified. * * Returns 0 on success plus a filled out xop->cluster structure. * Return ENOENT on normal termination. * Otherwise return an error. */ int hammer2_xop_collect(hammer2_xop_head_t *xop, int flags) { hammer2_xop_fifo_t *fifo; hammer2_chain_t *chain; hammer2_key_t lokey; int error; int keynull; int adv; /* advance the element */ int i; uint32_t check_counter; loop: /* * First loop tries to advance pieces of the cluster which * are out of sync. */ lokey = HAMMER2_KEY_MAX; keynull = HAMMER2_CHECK_NULL; check_counter = xop->check_counter; cpu_lfence(); for (i = 0; i < xop->cluster.nchains; ++i) { chain = xop->cluster.array[i].chain; if (chain == NULL) { adv = 1; } else if (chain->bref.key < xop->collect_key) { adv = 1; } else { keynull &= ~HAMMER2_CHECK_NULL; if (lokey > chain->bref.key) lokey = chain->bref.key; adv = 0; } if (adv == 0) continue; /* * Advance element if possible, advanced element may be NULL. */ if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } fifo = &xop->collect[i]; if (fifo->ri != fifo->wi) { cpu_lfence(); chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; ++fifo->ri; xop->cluster.array[i].chain = chain; if (chain == NULL) { /* XXX */ xop->cluster.array[i].flags |= HAMMER2_CITEM_NULL; } if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2) wakeup(xop); /* XXX optimize */ --i; /* loop on same index */ } else { /* * Retain CITEM_NULL flag. If set just repeat EOF. * If not, the NULL,0 combination indicates an * operation in-progress. */ xop->cluster.array[i].chain = NULL; /* retain any CITEM_NULL setting */ } } /* * Determine whether the lowest collected key meets clustering * requirements. Returns: * * 0 - key valid, cluster can be returned. * * ENOENT - normal end of scan, return ENOENT. * * ESRCH - sufficient elements collected, quorum agreement * that lokey is not a valid element and should be * skipped. * * EDEADLK - sufficient elements collected, no quorum agreement * (and no agreement possible). In this situation a * repair is needed, for now we loop. * * EINPROGRESS - insufficient elements collected to resolve, wait * for event and loop. */ if ((flags & HAMMER2_XOP_COLLECT_WAITALL) && xop->run_mask != HAMMER2_XOPMASK_VOP) { error = EINPROGRESS; } else { error = hammer2_cluster_check(&xop->cluster, lokey, keynull); } if (error == EINPROGRESS) { if (xop->check_counter == check_counter) { if (flags & HAMMER2_XOP_COLLECT_NOWAIT) goto done; tsleep_interlock(&xop->check_counter, 0); cpu_lfence(); if (xop->check_counter == check_counter) { tsleep(&xop->check_counter, PINTERLOCKED, "h2coll", hz*60); } } goto loop; } if (error == ESRCH) { if (lokey != HAMMER2_KEY_MAX) { xop->collect_key = lokey + 1; goto loop; } error = ENOENT; } if (error == EDEADLK) { kprintf("hammer2: no quorum possible lokey %016jx\n", lokey); if (lokey != HAMMER2_KEY_MAX) { xop->collect_key = lokey + 1; goto loop; } error = ENOENT; } if (lokey == HAMMER2_KEY_MAX) xop->collect_key = lokey; else xop->collect_key = lokey + 1; done: return error; }
/* * Find a specific PFS by name */ static int hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data) { const hammer2_inode_data_t *ripdata; hammer2_dev_t *hmp; hammer2_ioc_pfs_t *pfs; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_key_t key_next; hammer2_key_t lhc; int cache_index = -1; int error; size_t len; hmp = ip->pmp->pfs_hmps[0]; if (hmp == NULL) return (EINVAL); pfs = data; error = 0; hammer2_inode_lock(hmp->spmp->iroot, HAMMER2_RESOLVE_SHARED); parent = hammer2_inode_chain(hmp->spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); pfs->name[sizeof(pfs->name) - 1] = 0; len = strlen(pfs->name); lhc = hammer2_dirhash(pfs->name, len); chain = hammer2_chain_lookup(&parent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, &cache_index, HAMMER2_LOOKUP_SHARED); while (chain) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { ripdata = &chain->data->ipdata; if (ripdata->meta.name_len == len && bcmp(ripdata->filename, pfs->name, len) == 0) { break; } ripdata = NULL; /* safety */ } chain = hammer2_chain_next(&parent, chain, &key_next, key_next, lhc + HAMMER2_DIRHASH_LOMASK, &cache_index, HAMMER2_LOOKUP_SHARED); } /* * Load the data being returned by the ioctl. */ if (chain) { ripdata = &chain->data->ipdata; pfs->name_key = ripdata->meta.name_key; pfs->pfs_type = ripdata->meta.pfs_type; pfs->pfs_subtype = ripdata->meta.pfs_subtype; pfs->pfs_clid = ripdata->meta.pfs_clid; pfs->pfs_fsid = ripdata->meta.pfs_fsid; ripdata = NULL; hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } else { error = ENOENT; } if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } hammer2_inode_unlock(hmp->spmp->iroot); return (error); }
/* * Create a new PFS under the super-root */ static int hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data) { hammer2_inode_data_t *nipdata; hammer2_chain_t *nchain; hammer2_dev_t *hmp; hammer2_ioc_pfs_t *pfs; hammer2_inode_t *nip; hammer2_tid_t mtid; int error; hmp = ip->pmp->pfs_hmps[0]; if (hmp == NULL) return (EINVAL); pfs = data; nip = NULL; if (pfs->name[0] == 0) return(EINVAL); pfs->name[sizeof(pfs->name) - 1] = 0; /* ensure 0-termination */ if (hammer2_ioctl_pfs_lookup(ip, pfs) == 0) return(EEXIST); hammer2_trans_init(hmp->spmp, 0); mtid = hammer2_trans_sub(hmp->spmp); nip = hammer2_inode_create(hmp->spmp->iroot, NULL, NULL, pfs->name, strlen(pfs->name), 0, 1, HAMMER2_OBJTYPE_DIRECTORY, 0, HAMMER2_INSERT_PFSROOT, &error); if (error == 0) { hammer2_inode_modify(nip); nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_modify(nchain, mtid, 0); nipdata = &nchain->data->ipdata; nip->meta.pfs_type = pfs->pfs_type; nip->meta.pfs_subtype = pfs->pfs_subtype; nip->meta.pfs_clid = pfs->pfs_clid; nip->meta.pfs_fsid = pfs->pfs_fsid; nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; /* * Set default compression and check algorithm. This * can be changed later. * * Do not allow compression on PFS's with the special name * "boot", the boot loader can't decompress (yet). */ nip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_NEWFS_DEFAULT); nip->meta.check_algo = HAMMER2_ENC_ALGO( HAMMER2_CHECK_ISCSI32); if (strcasecmp(pfs->name, "boot") == 0) { nip->meta.comp_algo = HAMMER2_ENC_ALGO(HAMMER2_COMP_AUTOZERO); } #if 0 hammer2_blockref_t bref; /* XXX new PFS needs to be rescanned / added */ bref = nchain->bref; kprintf("ADD LOCAL PFS (IOCTL): %s\n", nipdata->filename); hammer2_pfsalloc(nchain, nipdata, bref.modify_tid); #endif /* XXX rescan */ hammer2_chain_unlock(nchain); hammer2_chain_drop(nchain); /* * Super-root isn't mounted, fsync it */ hammer2_inode_ref(nip); hammer2_inode_unlock(nip); hammer2_inode_fsync(nip); hammer2_inode_drop(nip); } hammer2_trans_done(hmp->spmp); return (error); }
void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip) { hammer2_chain_drop(ip->hmp, &ip->chain); }
/* * Used to scan and retrieve PFS information. PFS's are directories under * the super-root. * * To scan PFSs pass name_key=0. The function will scan for the next * PFS and set all fields, as well as set name_next to the next key. * When no PFSs remain, name_next is set to (hammer2_key_t)-1. * * To retrieve a particular PFS by key, specify the key but note that * the ioctl will return the lowest key >= specified_key, so the caller * must verify the key. * * To retrieve the PFS associated with the file descriptor, pass * name_key set to (hammer2_key_t)-1. */ static int hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) { const hammer2_inode_data_t *ripdata; hammer2_dev_t *hmp; hammer2_ioc_pfs_t *pfs; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_key_t key_next; hammer2_key_t save_key; int cache_index = -1; int error; hmp = ip->pmp->pfs_hmps[0]; if (hmp == NULL) return (EINVAL); pfs = data; save_key = pfs->name_key; error = 0; /* * Setup */ if (save_key == (hammer2_key_t)-1) { hammer2_inode_lock(ip->pmp->iroot, 0); parent = NULL; chain = hammer2_inode_chain(hmp->spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); } else { hammer2_inode_lock(hmp->spmp->iroot, 0); parent = hammer2_inode_chain(hmp->spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); chain = hammer2_chain_lookup(&parent, &key_next, pfs->name_key, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED); } /* * Locate next PFS */ while (chain) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) break; if (parent == NULL) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); chain = NULL; break; } chain = hammer2_chain_next(&parent, chain, &key_next, key_next, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED); } /* * Load the data being returned by the ioctl. */ if (chain) { ripdata = &chain->data->ipdata; pfs->name_key = ripdata->meta.name_key; pfs->pfs_type = ripdata->meta.pfs_type; pfs->pfs_subtype = ripdata->meta.pfs_subtype; pfs->pfs_clid = ripdata->meta.pfs_clid; pfs->pfs_fsid = ripdata->meta.pfs_fsid; KKASSERT(ripdata->meta.name_len < sizeof(pfs->name)); bcopy(ripdata->filename, pfs->name, ripdata->meta.name_len); pfs->name[ripdata->meta.name_len] = 0; ripdata = NULL; /* safety */ /* * Calculate name_next, if any. */ if (parent == NULL) { pfs->name_next = (hammer2_key_t)-1; } else { chain = hammer2_chain_next(&parent, chain, &key_next, key_next, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED); if (chain) pfs->name_next = chain->bref.key; else pfs->name_next = (hammer2_key_t)-1; } } else { pfs->name_next = (hammer2_key_t)-1; error = ENOENT; } /* * Cleanup */ if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } if (save_key == (hammer2_key_t)-1) { hammer2_inode_unlock(ip->pmp->iroot); } else { hammer2_inode_unlock(hmp->spmp->iroot); } return (error); }
/* * Each out of sync node sync-thread must issue an all-nodes XOP scan of * the inode. This creates a multiplication effect since the XOP scan itself * issues to all nodes. However, this is the only way we can safely * synchronize nodes which might have disparate I/O bandwidths and the only * way we can safely deal with stalled nodes. */ static int hammer2_sync_slaves(hammer2_thread_t *thr, hammer2_inode_t *ip, hammer2_deferred_list_t *list) { hammer2_xop_scanall_t *xop; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_pfs_t *pmp; hammer2_key_t key_next; hammer2_tid_t sync_tid; int cache_index = -1; int needrescan; int wantupdate; int error; int nerror; int idx; int n; pmp = ip->pmp; idx = thr->clindex; /* cluster node we are responsible for */ needrescan = 0; wantupdate = 0; if (ip->cluster.focus == NULL) return (EINPROGRESS); sync_tid = ip->cluster.focus->bref.modify_tid; #if 0 /* * Nothing to do if all slaves are synchronized. * Nothing to do if cluster not authoritatively readable. */ if (pmp->cluster_flags & HAMMER2_CLUSTER_SSYNCED) return(0); if ((pmp->cluster_flags & HAMMER2_CLUSTER_RDHARD) == 0) return(HAMMER2_ERROR_INCOMPLETE); #endif error = 0; /* * The inode is left unlocked during the scan. Issue a XOP * that does *not* include our cluster index to iterate * properly synchronized elements and resolve our cluster index * against it. */ hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); xop->key_beg = HAMMER2_KEY_MIN; xop->key_end = HAMMER2_KEY_MAX; hammer2_xop_start_except(&xop->head, hammer2_xop_scanall, idx); parent = hammer2_inode_chain(ip, idx, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); if (parent->bref.modify_tid != sync_tid) wantupdate = 1; hammer2_inode_unlock(ip); chain = hammer2_chain_lookup(&parent, &key_next, HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NODIRECT | HAMMER2_LOOKUP_NODATA); error = hammer2_xop_collect(&xop->head, 0); kprintf("XOP_INITIAL xop=%p clindex %d on %s\n", xop, thr->clindex, pmp->pfs_names[thr->clindex]); for (;;) { /* * We are done if our scan is done and the XOP scan is done. * We are done if the XOP scan failed (that is, we don't * have authoritative data to synchronize with). */ int advance_local = 0; int advance_xop = 0; int dodefer = 0; hammer2_chain_t *focus; kprintf("loop xop=%p chain[1]=%p lockcnt=%d\n", xop, xop->head.cluster.array[1].chain, (xop->head.cluster.array[1].chain ? xop->head.cluster.array[1].chain->lockcnt : -1) ); if (chain == NULL && error == ENOENT) break; if (error && error != ENOENT) break; /* * Compare */ if (chain && error == ENOENT) { /* * If we have local chains but the XOP scan is done, * the chains need to be deleted. */ n = -1; focus = NULL; } else if (chain == NULL) { /* * If our local scan is done but the XOP scan is not, * we need to create the missing chain(s). */ n = 1; focus = xop->head.cluster.focus; } else { /* * Otherwise compare to determine the action * needed. */ focus = xop->head.cluster.focus; n = hammer2_chain_cmp(chain, focus); } /* * Take action based on comparison results. */ if (n < 0) { /* * Delete extranious local data. This will * automatically advance the chain. */ nerror = hammer2_sync_destroy(thr, &parent, &chain, 0, idx); } else if (n == 0 && chain->bref.modify_tid != focus->bref.modify_tid) { /* * Matching key but local data or meta-data requires * updating. If we will recurse, we still need to * update to compatible content first but we do not * synchronize modify_tid until the entire recursion * has completed successfully. */ if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { nerror = hammer2_sync_replace( thr, parent, chain, 0, idx, focus); dodefer = 1; } else { nerror = hammer2_sync_replace( thr, parent, chain, focus->bref.modify_tid, idx, focus); } } else if (n == 0) { /* * 100% match, advance both */ advance_local = 1; advance_xop = 1; nerror = 0; } else if (n > 0) { /* * Insert missing local data. * * If we will recurse, we still need to update to * compatible content first but we do not synchronize * modify_tid until the entire recursion has * completed successfully. */ if (focus->bref.type == HAMMER2_BREF_TYPE_INODE) { nerror = hammer2_sync_insert( thr, &parent, &chain, 0, idx, focus); dodefer = 2; } else { nerror = hammer2_sync_insert( thr, &parent, &chain, focus->bref.modify_tid, idx, focus); } advance_local = 1; advance_xop = 1; } /* * We cannot recurse depth-first because the XOP is still * running in node threads for this scan. Create a placemarker * by obtaining and record the hammer2_inode. * * We excluded our node from the XOP so we must temporarily * add it to xop->head.cluster so it is properly incorporated * into the inode. * * The deferral is pushed onto a LIFO list for bottom-up * synchronization. */ if (error == 0 && dodefer) { hammer2_inode_t *nip; hammer2_deferred_ip_t *defer; KKASSERT(focus->bref.type == HAMMER2_BREF_TYPE_INODE); defer = kmalloc(sizeof(*defer), M_HAMMER2, M_WAITOK | M_ZERO); KKASSERT(xop->head.cluster.array[idx].chain == NULL); xop->head.cluster.array[idx].flags = HAMMER2_CITEM_INVALID; xop->head.cluster.array[idx].chain = chain; nip = hammer2_inode_get(pmp, ip, &xop->head.cluster, idx); xop->head.cluster.array[idx].chain = NULL; hammer2_inode_ref(nip); hammer2_inode_unlock(nip); defer->next = list->base; defer->ip = nip; list->base = defer; ++list->count; needrescan = 1; } /* * If at least one deferral was added and the deferral * list has grown too large, stop adding more. This * will trigger an EAGAIN return. */ if (needrescan && list->count > 1000) break; /* * Advancements for iteration. */ if (advance_xop) { error = hammer2_xop_collect(&xop->head, 0); } if (advance_local) { chain = hammer2_chain_next(&parent, chain, &key_next, key_next, HAMMER2_KEY_MAX, &cache_index, HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NODIRECT | HAMMER2_LOOKUP_NODATA); } } hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } /* * If we added deferrals we want the caller to synchronize them * and then call us again. * * NOTE: In this situation we do not yet want to synchronize our * inode, setting the error code also has that effect. */ if (error == 0 && needrescan) error = EAGAIN; /* * If no error occurred and work was performed, synchronize the * inode meta-data itself. * * XXX inode lock was lost */ if (error == 0 && wantupdate) { hammer2_xop_ipcluster_t *xop2; hammer2_chain_t *focus; xop2 = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); hammer2_xop_start_except(&xop2->head, hammer2_xop_ipcluster, idx); error = hammer2_xop_collect(&xop2->head, 0); if (error == 0) { focus = xop2->head.cluster.focus; kprintf("syncthr: update inode %p (%s)\n", focus, (focus ? (char *)focus->data->ipdata.filename : "?")); chain = hammer2_inode_chain_and_parent(ip, idx, &parent, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); KKASSERT(parent != NULL); nerror = hammer2_sync_replace( thr, parent, chain, sync_tid, idx, focus); hammer2_chain_unlock(chain); hammer2_chain_drop(chain); hammer2_chain_unlock(parent); hammer2_chain_drop(parent); /* XXX */ } hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP); } return error; }
/* * Create a missing chain by copying the focus from another device. * * On entry *parentp and focus are both locked shared. The chain will be * created and returned in *chainp also locked shared. */ static int hammer2_sync_insert(hammer2_thread_t *thr, hammer2_chain_t **parentp, hammer2_chain_t **chainp, hammer2_tid_t mtid, int idx, hammer2_chain_t *focus) { hammer2_chain_t *chain; #if HAMMER2_THREAD_DEBUG if (hammer2_debug & 1) kprintf("insert rec par=%p/%d.%016jx slave %d %d.%016jx mod=%016jx\n", *parentp, (*parentp)->bref.type, (*parentp)->bref.key, idx, focus->bref.type, focus->bref.key, mtid); #endif /* * Create the missing chain. Exclusive locks are needed. * * Have to be careful to avoid deadlocks. */ if (*chainp) hammer2_chain_unlock(*chainp); hammer2_chain_unlock(*parentp); hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS); /* reissue lookup? */ chain = NULL; hammer2_chain_create(parentp, &chain, thr->pmp, focus->bref.key, focus->bref.keybits, focus->bref.type, focus->bytes, mtid, 0); hammer2_chain_modify(chain, mtid, 0); /* * Copy focus to new chain */ /* type already set */ chain->bref.methods = focus->bref.methods; /* keybits already set */ chain->bref.vradix = focus->bref.vradix; /* mirror_tid set by flush */ KKASSERT(chain->bref.modify_tid == mtid); chain->bref.flags = focus->bref.flags; /* key already present */ /* check code will be recalculated */ /* * Copy data body. */ switch(chain->bref.type) { case HAMMER2_BREF_TYPE_INODE: if ((focus->data->ipdata.meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) { bcopy(focus->data, chain->data, offsetof(hammer2_inode_data_t, u)); break; } /* fall through */ case HAMMER2_BREF_TYPE_DATA: bcopy(focus->data, chain->data, chain->bytes); hammer2_chain_setcheck(chain, chain->data); break; default: KKASSERT(0); break; } hammer2_chain_unlock(chain); /* unlock, leave ref */ if (*chainp) hammer2_chain_drop(*chainp); *chainp = chain; /* will be returned locked */ /* * Avoid ordering deadlock when relocking. */ hammer2_chain_unlock(*parentp); hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_SHARED | HAMMER2_RESOLVE_ALWAYS); hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED | HAMMER2_RESOLVE_ALWAYS); return 0; }
/* * Repoint ip->cluster's chains to cluster's chains and fixup the default * focus. All items, valid or invalid, are repointed. hammer2_xop_start() * filters out invalid or non-matching elements. * * Caller must hold the inode and cluster exclusive locked, if not NULL, * must also be locked. * * Cluster may be NULL to clean out any chains in ip->cluster. */ void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, hammer2_cluster_t *cluster) { hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER]; hammer2_chain_t *ochain; hammer2_chain_t *nchain; int i; bzero(dropch, sizeof(dropch)); /* * Replace chains in ip->cluster with chains from cluster and * adjust the focus if necessary. * * NOTE: nchain and/or ochain can be NULL due to gaps * in the cluster arrays. */ hammer2_spin_ex(&ip->cluster_spin); for (i = 0; cluster && i < cluster->nchains; ++i) { /* * Do not replace elements which are the same. Also handle * element count discrepancies. */ nchain = cluster->array[i].chain; if (i < ip->cluster.nchains) { ochain = ip->cluster.array[i].chain; if (ochain == nchain) continue; } else { ochain = NULL; } /* * Make adjustments */ ip->cluster.array[i].chain = nchain; ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID; ip->cluster.array[i].flags |= cluster->array[i].flags & HAMMER2_CITEM_INVALID; if (nchain) hammer2_chain_ref(nchain); dropch[i] = ochain; } /* * Release any left-over chains in ip->cluster. */ while (i < ip->cluster.nchains) { nchain = ip->cluster.array[i].chain; if (nchain) { ip->cluster.array[i].chain = NULL; ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; } dropch[i] = nchain; ++i; } /* * Fixup fields. Note that the inode-embedded cluster is never * directly locked. */ if (cluster) { ip->cluster.nchains = cluster->nchains; ip->cluster.focus = cluster->focus; ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED; } else { ip->cluster.nchains = 0; ip->cluster.focus = NULL; ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS; } hammer2_spin_unex(&ip->cluster_spin); /* * Cleanup outside of spinlock */ while (--i >= 0) { if (dropch[i]) hammer2_chain_drop(dropch[i]); } }
/* * Drop an inode reference, freeing the inode when the last reference goes * away. */ void hammer2_inode_drop(hammer2_inode_t *ip) { hammer2_chain_drop(ip->hmp, &ip->chain); }
/* * Shift *chainp up to the specified directory, change the filename * to "0xINODENUMBER", and adjust the key. The chain becomes the * invisible hardlink target. * * The original *chainp has already been marked deleted. */ static void hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp, hammer2_inode_t *dip, hammer2_chain_t **dchainp, int nlinks, int *errorp) { hammer2_inode_data_t *nipdata; hammer2_chain_t *chain; hammer2_chain_t *xchain; hammer2_key_t key_dummy; hammer2_key_t lhc; hammer2_blockref_t bref; int cache_index = -1; chain = *chainp; lhc = chain->data->ipdata.inum; KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0); /* * Locate the inode or indirect block to create the new * entry in. lhc represents the inode number so there is * no collision iteration. * * There should be no key collisions with invisible inode keys. * * WARNING! Must use inode_lock_ex() on dip to handle a stale * dip->chain cache. */ retry: *errorp = 0; xchain = hammer2_chain_lookup(dchainp, &key_dummy, lhc, lhc, &cache_index, 0); if (xchain) { kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n", xchain, dip, *dchainp, dip->chain); hammer2_chain_unlock(xchain); xchain = NULL; *errorp = ENOSPC; #if 0 Debugger("X3"); #endif } /* * Create entry in common parent directory using the seek position * calculated above. * * We must refactor chain because it might have been shifted into * an indirect chain by the create. */ if (*errorp == 0) { KKASSERT(xchain == NULL); #if 0 *errorp = hammer2_chain_create(trans, dchainp, &xchain, lhc, 0, HAMMER2_BREF_TYPE_INODE,/* n/a */ HAMMER2_INODE_BYTES); /* n/a */ #endif /*XXX this somehow isn't working on chain XXX*/ /*KKASSERT(xxx)*/ } /* * Cleanup and handle retries. */ if (*errorp == EAGAIN) { kprintf("R"); hammer2_chain_wait(*dchainp); hammer2_chain_drop(*dchainp); goto retry; } /* * Handle the error case */ if (*errorp) { panic("error2"); KKASSERT(xchain == NULL); return; } /* * Use xchain as a placeholder for (lhc). Duplicate chain to the * same target bref as xchain and then delete xchain. The duplication * occurs after xchain in flush order even though xchain is deleted * after the duplication. XXX * * WARNING! Duplications (to a different parent) can cause indirect * blocks to be inserted, refactor xchain. */ bref = chain->bref; bref.key = lhc; /* invisible dir entry key */ bref.keybits = 0; hammer2_chain_duplicate(trans, dchainp, &chain, &bref, 0, 2); /* * chain is now 'live' again.. adjust the filename. * * Directory entries are inodes but this is a hidden hardlink * target. The name isn't used but to ease debugging give it * a name after its inode number. */ hammer2_chain_modify(trans, &chain, 0); nipdata = &chain->data->ipdata; ksnprintf(nipdata->filename, sizeof(nipdata->filename), "0x%016jx", (intmax_t)nipdata->inum); nipdata->name_len = strlen(nipdata->filename); nipdata->name_key = lhc; nipdata->nlinks += nlinks; *chainp = chain; }
/* * Unlink the file from the specified directory inode. The directory inode * does not need to be locked. * * isdir determines whether a directory/non-directory check should be made. * No check is made if isdir is set to -1. * * isopen specifies whether special unlink-with-open-descriptor handling * must be performed. If set to -1 the caller is deleting a PFS and we * check whether the chain is mounted or not (chain->pmp != NULL). 1 is * implied if it is mounted. * * If isopen is 1 and nlinks drops to 0 this function must move the chain * to a special hidden directory until last-close occurs on the file. * * NOTE! The underlying file can still be active with open descriptors * or if the chain is being manually held (e.g. for rename). * * The caller is responsible for fixing up ip->chain if e.g. a * rename occurs (see chain_duplicate()). */ int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, const uint8_t *name, size_t name_len, int isdir, int *hlinkp, struct nchandle *nch) { hammer2_inode_data_t *ipdata; hammer2_chain_t *parent; hammer2_chain_t *ochain; hammer2_chain_t *chain; hammer2_chain_t *dparent; hammer2_chain_t *dchain; hammer2_key_t key_dummy; hammer2_key_t key_next; hammer2_key_t lhc; int error; int cache_index = -1; uint8_t type; error = 0; ochain = NULL; lhc = hammer2_dirhash(name, name_len); /* * Search for the filename in the directory */ if (hlinkp) *hlinkp = 0; parent = hammer2_inode_lock_ex(dip); chain = hammer2_chain_lookup(&parent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, &cache_index, 0); while (chain) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && name_len == chain->data->ipdata.name_len && bcmp(name, chain->data->ipdata.filename, name_len) == 0) { break; } chain = hammer2_chain_next(&parent, chain, &key_next, key_next, lhc + HAMMER2_DIRHASH_LOMASK, &cache_index, 0); } hammer2_inode_unlock_ex(dip, NULL); /* retain parent */ /* * Not found or wrong type (isdir < 0 disables the type check). * If a hardlink pointer, type checks use the hardlink target. */ if (chain == NULL) { error = ENOENT; goto done; } if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) { if (hlinkp) *hlinkp = 1; type = chain->data->ipdata.target_type; } if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { error = ENOTDIR; goto done; } if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) { error = EISDIR; goto done; } /* * Hardlink must be resolved. We can't hold the parent locked * while we do this or we could deadlock. * * On success chain will be adjusted to point at the hardlink target * and ochain will point to the hardlink pointer in the original * directory. Otherwise chain remains pointing to the original. */ if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { hammer2_chain_unlock(parent); parent = NULL; error = hammer2_hardlink_find(dip, &chain, &ochain); } /* * If this is a directory the directory must be empty. However, if * isdir < 0 we are doing a rename and the directory does not have * to be empty, and if isdir > 1 we are deleting a PFS/snapshot * and the directory does not have to be empty. * * NOTE: We check the full key range here which covers both visible * and invisible entries. Theoretically there should be no * invisible (hardlink target) entries if there are no visible * entries. */ if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { dparent = hammer2_chain_lookup_init(chain, 0); dchain = hammer2_chain_lookup(&dparent, &key_dummy, 0, (hammer2_key_t)-1, &cache_index, HAMMER2_LOOKUP_NODATA); if (dchain) { hammer2_chain_unlock(dchain); hammer2_chain_lookup_done(dparent); error = ENOTEMPTY; goto done; } hammer2_chain_lookup_done(dparent); dparent = NULL; /* dchain NULL */ } /* * Ok, we can now unlink the chain. We always decrement nlinks even * if the entry can be deleted in case someone has the file open and * does an fstat(). * * The chain itself will no longer be in the on-media topology but * can still be flushed to the media (e.g. if an open descriptor * remains). When the last vnode/ip ref goes away the chain will * be marked unmodified, avoiding any further (now unnecesary) I/O. * * A non-NULL ochain indicates a hardlink. */ if (ochain) { /* * Delete the original hardlink pointer unconditionally. * (any open descriptors will migrate to the hardlink * target and have no affect on this operation). * * NOTE: parent from above is NULL when ochain != NULL * so we can reuse it. */ hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_delete(trans, ochain, 0); hammer2_chain_unlock(ochain); } /* * Decrement nlinks on the hardlink target (or original file if * there it was not hardlinked). Delete the target when nlinks * reaches 0 with special handling if (isopen) is set. * * NOTE! In DragonFly the vnops function calls cache_unlink() after * calling us here to clean out the namecache association, * (which does not represent a ref for the open-test), and to * force finalization of the vnode if/when the last ref gets * dropped. * * NOTE! Files are unlinked by rename and then relinked. nch will be * passed as NULL in this situation. hammer2_inode_connect() * will bump nlinks. */ KKASSERT(chain != NULL); hammer2_chain_modify(trans, &chain, 0); ipdata = &chain->data->ipdata; --ipdata->nlinks; if ((int64_t)ipdata->nlinks < 0) /* XXX debugging */ ipdata->nlinks = 0; if (ipdata->nlinks == 0) { if ((chain->flags & HAMMER2_CHAIN_PFSROOT) && chain->pmp) { error = EINVAL; kprintf("hammer2: PFS \"%s\" cannot be deleted " "while still mounted\n", ipdata->filename); goto done; } if (nch && cache_isopen(nch)) { kprintf("WARNING: unlinking open file\n"); atomic_set_int(&chain->flags, HAMMER2_CHAIN_UNLINKED); hammer2_inode_move_to_hidden(trans, &chain, ipdata->inum); } else { hammer2_chain_delete(trans, chain, 0); } } error = 0; done: if (chain) hammer2_chain_unlock(chain); if (parent) hammer2_chain_lookup_done(parent); if (ochain) hammer2_chain_drop(ochain); return error; }
/* * Unlink the file from the specified directory inode. The directory inode * does not need to be locked. * * isdir determines whether a directory/non-directory check should be made. * No check is made if isdir is set to -1. * * NOTE! This function does not prevent the underlying file from still * being used if it has other refs (such as from an inode, or if it's * chain is manually held). However, the caller is responsible for * fixing up ip->chain if e.g. a rename occurs (see chain_duplicate()). */ int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, const uint8_t *name, size_t name_len, int isdir, int *hlinkp) { hammer2_inode_data_t *ipdata; hammer2_chain_t *parent; hammer2_chain_t *ochain; hammer2_chain_t *chain; hammer2_chain_t *dparent; hammer2_chain_t *dchain; hammer2_key_t lhc; int error; uint8_t type; error = 0; ochain = NULL; lhc = hammer2_dirhash(name, name_len); /* * Search for the filename in the directory */ if (hlinkp) *hlinkp = 0; parent = hammer2_inode_lock_ex(dip); chain = hammer2_chain_lookup(&parent, lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0); while (chain) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && name_len == chain->data->ipdata.name_len && bcmp(name, chain->data->ipdata.filename, name_len) == 0) { break; } chain = hammer2_chain_next(&parent, chain, lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0); } hammer2_inode_unlock_ex(dip, NULL); /* retain parent */ /* * Not found or wrong type (isdir < 0 disables the type check). * If a hardlink pointer, type checks use the hardlink target. */ if (chain == NULL) { error = ENOENT; goto done; } if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) { if (hlinkp) *hlinkp = 1; type = chain->data->ipdata.target_type; } if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { error = ENOTDIR; goto done; } if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) { error = EISDIR; goto done; } /* * Hardlink must be resolved. We can't hold parent locked while we * do this or we could deadlock. * * On success chain will be adjusted to point at the hardlink target * and ochain will point to the hardlink pointer in the original * directory. Otherwise chain remains pointing to the original. */ if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { hammer2_chain_unlock(parent); parent = NULL; error = hammer2_hardlink_find(dip, &chain, &ochain); } /* * If this is a directory the directory must be empty. However, if * isdir < 0 we are doing a rename and the directory does not have * to be empty, and if isdir > 1 we are deleting a PFS/snapshot * and the directory does not have to be empty. * * NOTE: We check the full key range here which covers both visible * and invisible entries. Theoretically there should be no * invisible (hardlink target) entries if there are no visible * entries. */ if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { dparent = hammer2_chain_lookup_init(chain, 0); dchain = hammer2_chain_lookup(&dparent, 0, (hammer2_key_t)-1, HAMMER2_LOOKUP_NODATA); if (dchain) { hammer2_chain_unlock(dchain); hammer2_chain_lookup_done(dparent); error = ENOTEMPTY; goto done; } hammer2_chain_lookup_done(dparent); dparent = NULL; /* dchain NULL */ } /* * Ok, we can now unlink the chain. We always decrement nlinks even * if the entry can be deleted in case someone has the file open and * does an fstat(). * * The chain itself will no longer be in the on-media topology but * can still be flushed to the media (e.g. if an open descriptor * remains). When the last vnode/ip ref goes away the chain will * be marked unmodified, avoiding any further (now unnecesary) I/O. * * A non-NULL ochain indicates a hardlink. */ if (ochain) { /* * Delete the original hardlink pointer. * * NOTE: parent from above is NULL when ochain != NULL * so we can reuse it. */ hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS); hammer2_chain_delete(trans, ochain); hammer2_chain_unlock(ochain); /* * Then decrement nlinks on hardlink target, deleting * the target when nlinks drops to 0. */ hammer2_chain_modify(trans, &chain, 0); --chain->data->ipdata.nlinks; if (chain->data->ipdata.nlinks == 0) hammer2_chain_delete(trans, chain); } else { /* * Otherwise this was not a hardlink and we can just * remove the entry and decrement nlinks. * * NOTE: *_get() integrates chain's lock into the inode lock. */ hammer2_chain_modify(trans, &chain, 0); ipdata = &chain->data->ipdata; --ipdata->nlinks; hammer2_chain_delete(trans, chain); } error = 0; done: if (chain) hammer2_chain_unlock(chain); if (parent) hammer2_chain_lookup_done(parent); if (ochain) hammer2_chain_drop(ochain); return error; }
/* * Create a new inode in the specified directory using the vattr to * figure out the type of inode. * * If no error occurs the new inode with its chain locked is returned in * *nipp, otherwise an error is returned and *nipp is set to NULL. * * If vap and/or cred are NULL the related fields are not set and the * inode type defaults to a directory. This is used when creating PFSs * under the super-root, so the inode number is set to 1 in this case. * * dip is not locked on entry. */ hammer2_inode_t * hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, struct vattr *vap, struct ucred *cred, const uint8_t *name, size_t name_len, hammer2_chain_t **chainp, int *errorp) { hammer2_inode_data_t *dipdata; hammer2_inode_data_t *nipdata; hammer2_chain_t *chain; hammer2_chain_t *parent; hammer2_inode_t *nip; hammer2_key_t key_dummy; hammer2_key_t lhc; int error; uid_t xuid; uuid_t dip_uid; uuid_t dip_gid; uint32_t dip_mode; uint8_t dip_algo; int cache_index = -1; lhc = hammer2_dirhash(name, name_len); *errorp = 0; /* * Locate the inode or indirect block to create the new * entry in. At the same time check for key collisions * and iterate until we don't get one. * * NOTE: hidden inodes do not have iterators. */ retry: parent = hammer2_inode_lock_ex(dip); dipdata = &dip->chain->data->ipdata; dip_uid = dipdata->uid; dip_gid = dipdata->gid; dip_mode = dipdata->mode; dip_algo = dipdata->comp_algo; error = 0; while (error == 0) { chain = hammer2_chain_lookup(&parent, &key_dummy, lhc, lhc, &cache_index, 0); if (chain == NULL) break; if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) error = ENOSPC; if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) error = ENOSPC; hammer2_chain_unlock(chain); chain = NULL; ++lhc; } if (error == 0) { error = hammer2_chain_create(trans, &parent, &chain, lhc, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); } /* * Cleanup and handle retries. */ if (error == EAGAIN) { hammer2_chain_ref(parent); hammer2_inode_unlock_ex(dip, parent); hammer2_chain_wait(parent); hammer2_chain_drop(parent); goto retry; } hammer2_inode_unlock_ex(dip, parent); if (error) { KKASSERT(chain == NULL); *errorp = error; return (NULL); } /* * Set up the new inode. * * NOTE: *_get() integrates chain's lock into the inode lock. * * NOTE: Only one new inode can currently be created per * transaction. If the need arises we can adjust * hammer2_trans_init() to allow more. * * NOTE: nipdata will have chain's blockset data. */ chain->data->ipdata.inum = trans->inode_tid; nip = hammer2_inode_get(dip->pmp, dip, chain); nipdata = &chain->data->ipdata; if (vap) { KKASSERT(trans->inodes_created == 0); nipdata->type = hammer2_get_obj_type(vap->va_type); nipdata->inum = trans->inode_tid; ++trans->inodes_created; switch (nipdata->type) { case HAMMER2_OBJTYPE_CDEV: case HAMMER2_OBJTYPE_BDEV: nipdata->rmajor = vap->va_rmajor; nipdata->rminor = vap->va_rminor; break; default: break; } } else { nipdata->type = HAMMER2_OBJTYPE_DIRECTORY; nipdata->inum = 1; } /* Inherit parent's inode compression mode. */ nip->comp_heuristic = 0; nipdata->comp_algo = dip_algo; nipdata->version = HAMMER2_INODE_VERSION_ONE; hammer2_update_time(&nipdata->ctime); nipdata->mtime = nipdata->ctime; if (vap) nipdata->mode = vap->va_mode; nipdata->nlinks = 1; if (vap) { if (dip && dip->pmp) { xuid = hammer2_to_unix_xid(&dip_uid); xuid = vop_helper_create_uid(dip->pmp->mp, dip_mode, xuid, cred, &vap->va_mode); } else { /* super-root has no dip and/or pmp */ xuid = 0; } if (vap->va_vaflags & VA_UID_UUID_VALID) nipdata->uid = vap->va_uid_uuid; else if (vap->va_uid != (uid_t)VNOVAL) hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid); else hammer2_guid_to_uuid(&nipdata->uid, xuid); if (vap->va_vaflags & VA_GID_UUID_VALID) nipdata->gid = vap->va_gid_uuid; else if (vap->va_gid != (gid_t)VNOVAL) hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid); else if (dip) nipdata->gid = dip_gid; } /* * Regular files and softlinks allow a small amount of data to be * directly embedded in the inode. This flag will be cleared if * the size is extended past the embedded limit. */ if (nipdata->type == HAMMER2_OBJTYPE_REGFILE || nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) { nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA; } KKASSERT(name_len < HAMMER2_INODE_MAXNAME); bcopy(name, nipdata->filename, name_len); nipdata->name_key = lhc; nipdata->name_len = name_len; *chainp = chain; return (nip); }
/* * ochain represents the target file inode. We need to move it to the * specified common parent directory (dip) and rename it to a special * invisible "0xINODENUMBER" filename. * * We use chain_duplicate and duplicate ochain at the new location, * renaming it appropriately. We create a temporary chain and * then delete it to placemark where the duplicate will go. Both of * these use the inode number for (lhc) (the key), generating the * invisible filename. */ static hammer2_chain_t * hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **ochainp, hammer2_inode_t *dip, int *errorp) { hammer2_inode_data_t *nipdata; hammer2_chain_t *parent; hammer2_chain_t *ochain; hammer2_chain_t *nchain; hammer2_chain_t *tmp; hammer2_key_t lhc; hammer2_blockref_t bref; ochain = *ochainp; *errorp = 0; lhc = ochain->data->ipdata.inum; KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0); /* * Locate the inode or indirect block to create the new * entry in. lhc represents the inode number so there is * no collision iteration. * * There should be no key collisions with invisible inode keys. */ retry: parent = hammer2_chain_lookup_init(dip->chain, 0); nchain = hammer2_chain_lookup(&parent, lhc, lhc, 0); if (nchain) { kprintf("X3 chain %p parent %p dip %p dip->chain %p\n", nchain, parent, dip, dip->chain); hammer2_chain_unlock(nchain); nchain = NULL; *errorp = ENOSPC; #if 1 Debugger("X3"); #endif } /* * Create entry in common parent directory using the seek position * calculated above. */ if (*errorp == 0) { KKASSERT(nchain == NULL); *errorp = hammer2_chain_create(trans, &parent, &nchain, lhc, 0, HAMMER2_BREF_TYPE_INODE,/* n/a */ HAMMER2_INODE_BYTES); /* n/a */ hammer2_chain_refactor(&ochain); *ochainp = ochain; } /* * Cleanup and handle retries. */ if (*errorp == EAGAIN) { hammer2_chain_ref(parent); hammer2_chain_lookup_done(parent); hammer2_chain_wait(parent); hammer2_chain_drop(parent); goto retry; } /* * Handle the error case */ if (*errorp) { KKASSERT(nchain == NULL); hammer2_chain_lookup_done(parent); return (NULL); } /* * Use chain as a placeholder for (lhc), delete it and replace * it with our duplication. * * Gain a second lock on ochain for the duplication function to * unlock, maintain the caller's original lock across the call. * * This is a bit messy. */ hammer2_chain_delete(trans, nchain); hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS); tmp = ochain; bref = tmp->bref; bref.key = lhc; /* invisible dir entry key */ bref.keybits = 0; hammer2_chain_duplicate(trans, parent, nchain->index, &tmp, &bref); hammer2_chain_lookup_done(parent); hammer2_chain_unlock(nchain); /* no longer needed */ /* * Now set chain to our duplicate and modify it appropriately. * * Directory entries are inodes but this is a hidden hardlink * target. The name isn't used but to ease debugging give it * a name after its inode number. */ nchain = tmp; tmp = NULL; /* safety */ hammer2_chain_modify(trans, &nchain, HAMMER2_MODIFY_ASSERTNOCOPY); nipdata = &nchain->data->ipdata; ksnprintf(nipdata->filename, sizeof(nipdata->filename), "0x%016jx", (intmax_t)nipdata->inum); nipdata->name_len = strlen(nipdata->filename); nipdata->name_key = lhc; return (nchain); }
/* * Retire a XOP. Used by both the VOP frontend and by the XOP backend. */ void hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask) { hammer2_xop_group_t *xgrp; hammer2_chain_t *chain; int i; xgrp = xop->xgrp; /* * Remove the frontend or remove a backend feeder. When removing * the frontend we must wakeup any backend feeders who are waiting * for FIFO space. * * XXX optimize wakeup. */ KKASSERT(xop->run_mask & mask); if (atomic_fetchadd_int(&xop->run_mask, -mask) != mask) { if (mask == HAMMER2_XOPMASK_VOP) wakeup(xop); return; } /* * Cleanup the collection cluster. */ for (i = 0; i < xop->cluster.nchains; ++i) { xop->cluster.array[i].flags = 0; chain = xop->cluster.array[i].chain; if (chain) { xop->cluster.array[i].chain = NULL; hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } } /* * Cleanup the fifos, use check_counter to optimize the loop. */ mask = xop->chk_mask; for (i = 0; mask && i < HAMMER2_MAXCLUSTER; ++i) { hammer2_xop_fifo_t *fifo = &xop->collect[i]; while (fifo->ri != fifo->wi) { chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } ++fifo->ri; if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2) wakeup(xop); /* XXX optimize */ } mask &= ~(1U << i); } /* * The inode is only held at this point, simply drop it. */ if (xop->ip) { hammer2_inode_drop(xop->ip); xop->ip = NULL; } if (xop->ip2) { hammer2_inode_drop(xop->ip2); xop->ip2 = NULL; } if (xop->ip3) { hammer2_inode_drop(xop->ip3); xop->ip3 = NULL; } if (xop->name) { kfree(xop->name, M_HAMMER2); xop->name = NULL; xop->name_len = 0; } if (xop->name2) { kfree(xop->name2, M_HAMMER2); xop->name2 = NULL; xop->name2_len = 0; } objcache_put(cache_xops, xop); }
/* * Update LNK_SPAN state */ static void hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state) { const hammer2_inode_data_t *ripdata; hammer2_chain_t *parent; hammer2_chain_t *chain; hammer2_pfs_t *spmp; hammer2_key_t key_next; kdmsg_msg_t *rmsg; size_t name_len; int cache_index = -1; /* * Lookup mount point under the media-localized super-root. * * cluster->pmp will incorrectly point to spmp and must be fixed * up later on. */ spmp = hmp->spmp; hammer2_inode_lock(spmp->iroot, 0); parent = hammer2_inode_chain(spmp->iroot, 0, HAMMER2_RESOLVE_ALWAYS); chain = NULL; if (parent == NULL) goto done; chain = hammer2_chain_lookup(&parent, &key_next, HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, &cache_index, 0); while (chain) { if (chain->bref.type != HAMMER2_BREF_TYPE_INODE) continue; ripdata = &chain->data->ipdata; kprintf("UPDATE SPANS: %s\n", ripdata->filename); rmsg = kdmsg_msg_alloc(&hmp->iocom.state0, DMSG_LNK_SPAN | DMSGF_CREATE, hammer2_lnk_span_reply, NULL); rmsg->any.lnk_span.peer_id = ripdata->meta.pfs_clid; rmsg->any.lnk_span.pfs_id = ripdata->meta.pfs_fsid; rmsg->any.lnk_span.pfs_type = ripdata->meta.pfs_type; rmsg->any.lnk_span.peer_type = DMSG_PEER_HAMMER2; rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1; name_len = ripdata->meta.name_len; if (name_len >= sizeof(rmsg->any.lnk_span.peer_label)) name_len = sizeof(rmsg->any.lnk_span.peer_label) - 1; bcopy(ripdata->filename, rmsg->any.lnk_span.peer_label, name_len); kdmsg_msg_write(rmsg); chain = hammer2_chain_next(&parent, chain, &key_next, key_next, HAMMER2_KEY_MAX, &cache_index, 0); } hammer2_inode_unlock(spmp->iroot); done: if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } if (parent) { hammer2_chain_unlock(parent); hammer2_chain_drop(parent); } }