/* * Acquire synchronization TID */ static int hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_synctid *std) { hammer_mount_t hmp = ip->hmp; int error = 0; switch(std->op) { case HAMMER_SYNCTID_NONE: std->tid = hmp->flusher.tid; /* inaccurate */ break; case HAMMER_SYNCTID_ASYNC: hammer_queue_inodes_flusher(hmp, MNT_NOWAIT); hammer_flusher_async(hmp, NULL); std->tid = hmp->flusher.tid; /* inaccurate */ break; case HAMMER_SYNCTID_SYNC1: hammer_queue_inodes_flusher(hmp, MNT_WAIT); hammer_flusher_sync(hmp); std->tid = hmp->flusher.tid; break; case HAMMER_SYNCTID_SYNC2: hammer_queue_inodes_flusher(hmp, MNT_WAIT); hammer_flusher_sync(hmp); std->tid = hmp->flusher.tid; hammer_flusher_sync(hmp); break; default: error = EOPNOTSUPP; break; } return(error); }
/* * Sync all inodes pending on the flusher. * * All flush groups will be flushed. This does not queue dirty inodes * to the flush groups, it just flushes out what has already been queued! */ void hammer_flusher_sync(hammer_mount_t hmp) { int seq; seq = hammer_flusher_async(hmp, NULL); hammer_flusher_wait(hmp, seq); }
/* * Flush the current/next flushable flg. This function is typically called * in a loop along with hammer_flusher_wait(hmp, returned_seq) to iterate * flush groups until specific conditions are met. * * If a flush is currently in progress its seq is returned. * * If no flush is currently in progress the next available flush group * will be flushed and its seq returned. * * If no flush groups are present a dummy seq will be allocated and * returned and the flusher will be activated (e.g. to flush the * undo/redo and the volume header). */ int hammer_flusher_async_one(hammer_mount_t hmp) { hammer_flush_group_t flg; int seq; if (hmp->flusher.td) { flg = TAILQ_FIRST(&hmp->flush_group_list); seq = hammer_flusher_async(hmp, flg); } else { seq = hmp->flusher.done; } return(seq); }
/* * Copy records from userland to the target mirror. * * The PFS is identified in the mirror structure. The passed ip is just * some directory in the overall HAMMER filesystem and has nothing to * do with the PFS. In fact, there might not even be a root directory for * the PFS yet! */ int hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_mirror_rw *mirror) { union hammer_ioc_mrecord_any mrec; struct hammer_cursor cursor; u_int32_t localization; int checkspace_count = 0; int error; int bytes; char *uptr; int seq; localization = (u_int32_t)mirror->pfs_id << 16; seq = trans->hmp->flusher.done; /* * Validate the mirror structure and relocalize the tracking keys. */ if (mirror->size < 0 || mirror->size > 0x70000000) return(EINVAL); mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK; mirror->key_beg.localization += localization; mirror->key_end.localization &= HAMMER_LOCALIZE_MASK; mirror->key_end.localization += localization; mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; mirror->key_cur.localization += localization; /* * Set up our tracking cursor for the loop. The tracking cursor * is used to delete records that are no longer present on the * master. The last handled record at key_cur must be skipped. */ error = hammer_init_cursor(trans, &cursor, NULL, NULL); cursor.key_beg = mirror->key_cur; cursor.key_end = mirror->key_end; cursor.flags |= HAMMER_CURSOR_BACKEND; error = hammer_btree_first(&cursor); if (error == 0) cursor.flags |= HAMMER_CURSOR_ATEDISK; if (error == ENOENT) error = 0; /* * Loop until our input buffer has been exhausted. */ while (error == 0 && mirror->count + sizeof(mrec.head) <= mirror->size) { /* * Don't blow out the buffer cache. Leave room for frontend * cache as well. * * WARNING: See warnings in hammer_unlock_cursor() function. */ while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } /* * If there is insufficient free space it may be due to * reserved bigblocks, which flushing might fix. */ if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) { if (++checkspace_count == 10) { error = ENOSPC; break; } hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async(trans->hmp, NULL); } /* * Acquire and validate header */ if ((bytes = mirror->size - mirror->count) > sizeof(mrec)) bytes = sizeof(mrec); uptr = (char *)mirror->ubuf + mirror->count; error = copyin(uptr, &mrec, bytes); if (error) break; if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) { error = EINVAL; break; } if (mrec.head.rec_size < sizeof(mrec.head) || mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE || mirror->count + mrec.head.rec_size > mirror->size) { error = EINVAL; break; } switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) { case HAMMER_MREC_TYPE_SKIP: if (mrec.head.rec_size != sizeof(mrec.skip)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization); break; case HAMMER_MREC_TYPE_REC: if (mrec.head.rec_size < sizeof(mrec.rec)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec)); break; case HAMMER_MREC_TYPE_REC_NODATA: case HAMMER_MREC_TYPE_REC_BADCRC: /* * Records with bad data payloads are ignored XXX. * Records with no data payload have to be skipped * (they shouldn't have been written in the first * place). */ if (mrec.head.rec_size < sizeof(mrec.rec)) error = EINVAL; break; case HAMMER_MREC_TYPE_PASS: if (mrec.head.rec_size != sizeof(mrec.rec)) error = EINVAL; if (error == 0) error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization); break; default: error = EINVAL; break; } /* * Retry the current record on deadlock, otherwise setup * for the next loop. */ if (error == EDEADLK) { while (error == EDEADLK) { hammer_sync_lock_sh(trans); hammer_recover_cursor(&cursor); error = hammer_cursor_upgrade(&cursor); hammer_sync_unlock(trans); } } else { if (error == EALREADY) error = 0; if (error == 0) { mirror->count += HAMMER_HEAD_DOALIGN(mrec.head.rec_size); } } } hammer_done_cursor(&cursor); /* * cumulative error */ if (error) { mirror->head.flags |= HAMMER_IOC_HEAD_ERROR; mirror->head.error = error; } /* * ioctls don't update the RW data structure if an error is returned, * always return 0. */ return(0); }
int hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_reblock *reblock) { struct hammer_cursor cursor; hammer_btree_elm_t elm; int checkspace_count; int error; int seq; int slop; /* * A fill level <= 20% is considered an emergency. free_level is * inverted from fill_level. */ if (reblock->free_level >= HAMMER_LARGEBLOCK_SIZE * 8 / 10) slop = HAMMER_CHKSPC_EMERGENCY; else slop = HAMMER_CHKSPC_REBLOCK; if ((reblock->key_beg.localization | reblock->key_end.localization) & HAMMER_LOCALIZE_PSEUDOFS_MASK) { return(EINVAL); } if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) return(EINVAL); if (reblock->free_level < 0) return(EINVAL); reblock->key_cur = reblock->key_beg; reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; reblock->key_cur.localization += ip->obj_localization; checkspace_count = 0; seq = trans->hmp->flusher.done; retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); goto failed; } cursor.key_beg.localization = reblock->key_cur.localization; cursor.key_beg.obj_id = reblock->key_cur.obj_id; cursor.key_beg.key = HAMMER_MIN_KEY; cursor.key_beg.create_tid = 1; cursor.key_beg.delete_tid = 0; cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; cursor.key_beg.obj_type = 0; cursor.key_end.localization = (reblock->key_end.localization & HAMMER_LOCALIZE_MASK) + ip->obj_localization; cursor.key_end.obj_id = reblock->key_end.obj_id; cursor.key_end.key = HAMMER_MAX_KEY; cursor.key_end.create_tid = HAMMER_MAX_TID - 1; cursor.key_end.delete_tid = 0; cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; cursor.key_end.obj_type = 0; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; cursor.flags |= HAMMER_CURSOR_BACKEND; cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; /* * This flag allows the btree scan code to return internal nodes, * so we can reblock them in addition to the leafs. Only specify it * if we intend to reblock B-Tree nodes. */ if (reblock->head.flags & HAMMER_IOC_DO_BTREE) cursor.flags |= HAMMER_CURSOR_REBLOCKING; error = hammer_btree_first(&cursor); while (error == 0) { /* * Internal or Leaf node */ KKASSERT(cursor.index < cursor.node->ondisk->count); elm = &cursor.node->ondisk->elms[cursor.index]; reblock->key_cur.obj_id = elm->base.obj_id; reblock->key_cur.localization = elm->base.localization; /* * Yield to more important tasks */ if ((error = hammer_signal_check(trans->hmp)) != 0) break; /* * If there is insufficient free space it may be due to * reserved bigblocks, which flushing might fix. * * We must force a retest in case the unlocked cursor is * moved to the end of the leaf, or moved to an internal * node. * * WARNING: See warnings in hammer_unlock_cursor() function. */ if (hammer_checkspace(trans->hmp, slop)) { if (++checkspace_count == 10) { error = ENOSPC; break; } hammer_unlock_cursor(&cursor); cursor.flags |= HAMMER_CURSOR_RETEST; hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async(trans->hmp, NULL); goto skip; } /* * Acquiring the sync_lock prevents the operation from * crossing a synchronization boundary. * * NOTE: cursor.node may have changed on return. * * WARNING: See warnings in hammer_unlock_cursor() function. */ hammer_sync_lock_sh(trans); error = hammer_reblock_helper(reblock, &cursor, elm); hammer_sync_unlock(trans); while (hammer_flusher_meta_halflimit(trans->hmp) || hammer_flusher_undo_exhausted(trans, 2)) { hammer_unlock_cursor(&cursor); hammer_flusher_wait(trans->hmp, seq); hammer_lock_cursor(&cursor); seq = hammer_flusher_async_one(trans->hmp); } /* * Setup for iteration, our cursor flags may be modified by * other threads while we are unlocked. */ cursor.flags |= HAMMER_CURSOR_ATEDISK; /* * We allocate data buffers, which atm we don't track * dirty levels for because we allow the kernel to write * them. But if we allocate too many we can still deadlock * the buffer cache. * * WARNING: See warnings in hammer_unlock_cursor() function. * (The cursor's node and element may change!) */ if (bd_heatup()) { hammer_unlock_cursor(&cursor); bwillwrite(HAMMER_XBUFSIZE); hammer_lock_cursor(&cursor); } /* XXX vm_wait_nominal(); */ skip: if (error == 0) { error = hammer_btree_iterate(&cursor); } } if (error == ENOENT) error = 0; hammer_done_cursor(&cursor); if (error == EWOULDBLOCK) { hammer_flusher_sync(trans->hmp); goto retry; } if (error == EDEADLK) goto retry; if (error == EINTR) { reblock->head.flags |= HAMMER_IOC_HEAD_INTR; error = 0; } failed: reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; return(error); }