int32 _append_file (int32 dir_no, directory *p_file_dir, const void *p_addr, int32 len) { int32 i_first_page_alloc = PAGE_NULL, i_pre_page_no = PAGE_NULL, i_page_offset, i_remained_bytes, _len = len; if (len <= 0) return 0; INIT_SUPER_PAGE (); if (g_p_super_page->idle_pages_num < PAGES_OCCUPIED (len)) return E_LACK_SPACE; i_page_offset = p_file_dir->parent_filesize & ~(PAGE_SIZE - 1); if (i_page_offset) { i_remained_bytes = PAGE_SIZE - i_page_offset; if (_write_page_offset (p_file_dir->first_child_lastpage, i_page_offset, p_addr, MIN (len, i_remained_bytes)) < 0) { return E_WT; } p_addr += MIN (len, i_remained_bytes); p_file_dir->parent_filesize += MIN (len, i_remained_bytes); len -= MIN (len, i_remained_bytes); } while (len > 0) { if ((i_page_offset = _alloc_page (1, &i_remained_bytes)) < 0) { _free_page (i_remained_bytes); return i_page_offset; } if ((i_page_offset = _write_page (i_remained_bytes, p_addr, MIN (PAGE_SIZE, len))) < 0) goto error; if ((i_page_offset = _mark_page (i_remained_bytes, PAGE_NULL)) < 0) goto error; if (PAGE_NULL != i_first_page_alloc) i_first_page_alloc = i_remained_bytes; if (PAGE_NULL != i_pre_page_no) { if ((i_page_offset = _mark_page (i_pre_page_no, i_remained_bytes)) < 0) goto error; } p_addr += PAGE_SIZE; p_file_dir->parent_filesize += PAGE_SIZE; len -= MIN (len, PAGE_SIZE); i_pre_page_no = i_remained_bytes; } if (PAGE_NULL != i_first_page_alloc) { if ((i_page_offset = _mark_page (p_file_dir->first_child_lastpage, i_first_page_alloc)) < 0) return i_page_offset; p_file_dir->first_child_lastpage = i_remained_bytes; } if ((i_page_offset = _write_directory_node_value (dir_no, p_file_dir)) < 0) return i_page_offset; return _len; error: _free_page (i_remained_bytes); return i_page_offset; }
// TODO: Don't over-evict pages if waiting on refcounts to drop static void *extstore_maint_thread(void *arg) { store_maint_thread *me = (store_maint_thread *)arg; store_engine *e = me->e; struct extstore_page_data *pd = calloc(e->page_count, sizeof(struct extstore_page_data)); pthread_mutex_lock(&me->mutex); while (1) { int i; bool do_evict = false; unsigned int low_page = 0; uint64_t low_version = ULLONG_MAX; pthread_cond_wait(&me->cond, &me->mutex); pthread_mutex_lock(&e->mutex); // default freelist requires at least one page free. // specialized freelists fall back to default once full. if (e->page_free == 0 || e->page_freelist == NULL) { do_evict = true; } pthread_mutex_unlock(&e->mutex); memset(pd, 0, sizeof(struct extstore_page_data) * e->page_count); for (i = 0; i < e->page_count; i++) { store_page *p = &e->pages[i]; pthread_mutex_lock(&p->mutex); pd[p->id].free_bucket = p->free_bucket; if (p->active || p->free) { pthread_mutex_unlock(&p->mutex); continue; } if (p->obj_count > 0 && !p->closed) { pd[p->id].version = p->version; pd[p->id].bytes_used = p->bytes_used; pd[p->id].bucket = p->bucket; // low_version/low_page are only used in the eviction // scenario. when we evict, it's only to fill the default page // bucket again. // TODO: experiment with allowing evicting up to a single page // for any specific free bucket. this is *probably* required // since it could cause a load bias on default-only devices? if (p->free_bucket == 0 && p->version < low_version) { low_version = p->version; low_page = i; } } if ((p->obj_count == 0 || p->closed) && p->refcount == 0) { _free_page(e, p); // Found a page to free, no longer need to evict. do_evict = false; } pthread_mutex_unlock(&p->mutex); } if (do_evict && low_version != ULLONG_MAX) { store_page *p = &e->pages[low_page]; E_DEBUG("EXTSTORE: evicting page [%d] [v: %llu]\n", p->id, (unsigned long long) p->version); pthread_mutex_lock(&p->mutex); if (!p->closed) { p->closed = true; STAT_L(e); e->stats.page_evictions++; e->stats.objects_evicted += p->obj_count; e->stats.bytes_evicted += p->bytes_used; STAT_UL(e); if (p->refcount == 0) { _free_page(e, p); } } pthread_mutex_unlock(&p->mutex); } // copy the page data into engine context so callers can use it from // the stats lock. STAT_L(e); memcpy(e->stats.page_data, pd, sizeof(struct extstore_page_data) * e->page_count); STAT_UL(e); } return NULL; }