struct KeyValue *db_lookup(struct DB *const db, uint16_t klen, const uint8_t *const key) { uint8_t hash[HASHBYTES] __attribute__((aligned(8))); SHA1(key, klen, hash); stat_inc(&(db->stat.nr_get)); const uint64_t ticket = rwlock_reader_lock(&(db->rwlock)); // 1st lookup at active table[0] // 2nd lookup at active table[1] for (uint64_t i = 0; i < 2; i++) { struct Table *t = db->active_table[i]; if (!t) continue; // immutable item struct KeyValue *const kv = table_lookup(t, klen, key, hash); if (kv) { rwlock_reader_unlock(&(db->rwlock), ticket); stat_inc(&(db->stat.nr_get_at_hit[i])); return kv; } } // 3rd lookup into vcroot struct KeyValue *const kv2 = recursive_lookup(&(db->stat), db->vcroot, klen, key, hash); rwlock_reader_unlock(&(db->rwlock), ticket); if (!kv2) { stat_inc(&(db->stat.nr_get_miss)); } return kv2; }
// backup db metadata static bool db_dump_meta(struct DB *const db) { char path_meta[256]; char path_sym[256]; const double sec0 = debug_time_sec(); // prepare files sprintf(path_meta, "%s/%s/%s-%018.6lf", db->persist_dir, DB_META_BACKUP_DIR, DB_META_MAIN, sec0); FILE *const meta_out = fopen(path_meta, "w"); assert(meta_out); const uint64_t ticket = rwlock_reader_lock(&(db->rwlock)); // dump meta // write vc const bool r_meta = recursive_dump(db->vcroot, meta_out); assert(r_meta); // write mtid const uint64_t db_next_mtid = db->next_mtid; fprintf(meta_out, "%" PRIu64 "\n", db_next_mtid); fclose(meta_out); // create symlink for newest meta sprintf(path_sym, "%s/%s", db->persist_dir, DB_META_MAIN); if (access(path_sym, F_OK) == 0) { const int ru = unlink(path_sym); assert(ru == 0); } const int rsm = symlink(path_meta, path_sym); assert(rsm == 0); // dump container-maps for (int i = 0; db->cms_dump[i]; i++) { char path_cm_dump[256]; sprintf(path_cm_dump, "%s/%s/%s-%01d-%018.6lf", db->persist_dir, DB_META_BACKUP_DIR, DB_META_CMAP_PREFIX, i, sec0); containermap_dump(db->cms_dump[i], path_cm_dump); // create symlink for newest meta sprintf(path_sym, "%s/%s-%01d", db->persist_dir, DB_META_CMAP_PREFIX, i); if (access(path_sym, F_OK) == 0) { const int ru = unlink(path_sym); assert(ru == 0); } const int rs = symlink(path_cm_dump, path_sym); assert(rs == 0); } // done rwlock_reader_unlock(&(db->rwlock), ticket); db_log_diff(db, sec0, "Dumping Metadata Finished (%06" PRIx64 ")", db_next_mtid); fflush(db->log); return true; }
/* Sync the filesystem (pointed to by the variable CONTROL_PORT above) every INTERVAL seconds, as long as it's in the thread pointed to by the global variable PERIODIC_SYNC_THREAD. */ static void periodic_sync (int interval) { for (;;) { error_t err; struct rpc_info link; /* This acts as a lock against creation of a new sync thread while we are in the process of syncing. */ err = ports_begin_rpc (pi, 0, &link); if (periodic_sync_thread != cthread_self ()) { /* We've been superseded as the sync thread. Just die silently. */ ports_end_rpc (pi, &link); return; } if (! err) { if (! diskfs_readonly) { rwlock_reader_lock (&diskfs_fsys_lock); /* Only sync if we need to, to avoid clearing the clean flag when it's just been set. Any other thread doing a sync will have held the lock while it did its work. */ if (_diskfs_diskdirty) { diskfs_sync_everything (0); diskfs_set_hypermetadata (0, 0); } rwlock_reader_unlock (&diskfs_fsys_lock); } ports_end_rpc (pi, &link); } /* Wait until next time. */ sleep (interval); } }
void root_update_disable () { rwlock_reader_lock (&update_rwlock); }
/* Find the location on disk of page OFFSET in pager UPI. Return the disk address (in disk block) in *ADDR. If *NPLOCK is set on return, then release that mutex after I/O on the data has completed. Set DISKSIZE to be the amount of valid data on disk. (If this is an unallocated block, then set *ADDR to zero.) ISREAD is non-zero iff this is for a pagein. */ static error_t find_address (struct user_pager_info *upi, vm_address_t offset, daddr_t *addr, int *disksize, struct rwlock **nplock, int isread) { error_t err; struct rwlock *lock; assert (upi->type == DISK || upi->type == FILE_DATA); if (upi->type == DISK) { *disksize = __vm_page_size; *addr = offset / DEV_BSIZE; *nplock = 0; return 0; } else { struct iblock_spec indirs[NIADDR + 1]; struct node *np; np = upi->np; if (isread) { try_again: /* If we should allow an unlocked pagein, do so. (This still has a slight race; there could be a pageout in progress which is blocked on NP->np->allocptrlock itself. In that case the pagein that should proceed unimpeded is blocked in the pager library waiting for the pageout to complete. I think this is sufficiently rare to put it off for the time being.) */ spin_lock (&unlocked_pagein_lock); if (offset >= upi->allow_unlocked_pagein && (offset + vm_page_size <= upi->allow_unlocked_pagein + upi->unlocked_pagein_length)) { spin_unlock (&unlocked_pagein_lock); *nplock = 0; goto have_lock; } spin_unlock (&unlocked_pagein_lock); /* Block on the rwlock if necessary; but when we wake up, don't acquire it; check again from the top. This is mutated inline from rwlock.h. */ lock = &np->dn->allocptrlock; mutex_lock (&lock->master); if (lock->readers == -1 || lock->writers_waiting) { lock->readers_waiting++; condition_wait (&lock->wakeup, &lock->master); lock->readers_waiting--; mutex_unlock (&lock->master); goto try_again; } lock->readers++; mutex_unlock (&lock->master); *nplock = lock; } else { rwlock_reader_lock (&np->dn->allocptrlock); *nplock = &np->dn->allocptrlock; } have_lock: if (offset >= np->allocsize) { if (*nplock) rwlock_reader_unlock (*nplock); if (isread) return EIO; else { *addr = 0; *disksize = 0; return 0; } } if (offset + __vm_page_size > np->allocsize) *disksize = np->allocsize - offset; else *disksize = __vm_page_size; err = fetch_indir_spec (np, lblkno (sblock, offset), indirs); if (err && *nplock) rwlock_reader_unlock (*nplock); else { if (indirs[0].bno) *addr = (fsbtodb (sblock, indirs[0].bno) + blkoff (sblock, offset) / DEV_BSIZE); else *addr = 0; } return err; } }