/** * Remove all extended keys for the given @a page from the * extended key cache. */ static ham_status_t my_fun_free_page_extkeys(ham_btree_t *be, ham_page_t *page, ham_u32_t flags) { ham_db_t *db=be_get_db(be); ham_assert(page_get_owner(page) == db, (0)); ham_assert(0 == (flags & ~DB_MOVE_TO_FREELIST), (0)); /* * if this page has a header, and it's either a B-Tree root page or * a B-Tree index page: remove all extended keys from the cache, * and/or free their blobs */ if (page_get_pers(page) && (!(page_get_npers_flags(page)&PAGE_NPERS_NO_HEADER)) && (page_get_type(page)==PAGE_TYPE_B_ROOT || page_get_type(page)==PAGE_TYPE_B_INDEX)) { ham_size_t i; ham_offset_t blobid; int_key_t *bte; btree_node_t *node=ham_page_get_btree_node(page); extkey_cache_t *c; ham_assert(db, ("Must be set as page owner when this is a Btree page")); ham_assert(db=page_get_owner(page), ("")); c=db_get_extkey_cache(db); for (i=0; i<btree_node_get_count(node); i++) { bte=btree_node_get_key(db, node, i); if (key_get_flags(bte)&KEY_IS_EXTENDED) { blobid=key_get_extended_rid(db, bte); if (env_get_rt_flags(db_get_env(db))&HAM_IN_MEMORY_DB) { /* delete the blobid to prevent that it's freed twice */ *(ham_offset_t *)(key_get_key(bte)+ (db_get_keysize(db)-sizeof(ham_offset_t)))=0; } //(void)key_erase_record(db, bte, 0, BLOB_FREE_ALL_DUPES); if (c) (void)extkey_cache_remove(c, blobid); } } } return (HAM_SUCCESS); }
void put_page_type(struct page_info *page) { unsigned long nx, x, y = page->u.inuse.type_info; do { x = y; nx = x - 1; ASSERT((x & PGT_count_mask) != 0); /* * The page should always be validated while a reference is held. The * exception is during domain destruction, when we forcibly invalidate * page-table pages if we detect a referential loop. * See domain.c:relinquish_list(). */ ASSERT((x & PGT_validated) || page_get_owner(page)->is_dying); if ( unlikely((nx & PGT_count_mask) == 0) ) { /* Record TLB information for flush later. */ page->tlbflush_timestamp = tlbflush_current_time(); } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); }
/* get_page() to prevent another vcpu freeing the page. */ static int xencomm_get_page(unsigned long paddr, struct page_info **page) { unsigned long maddr = paddr_to_maddr(paddr); if ( maddr == 0 ) return -EFAULT; *page = maddr_to_page(maddr); if ( get_page(*page, current->domain) == 0 ) { if ( page_get_owner(*page) != current->domain ) { /* * This page might be a page granted by another domain, or * this page is freed with decrease reservation hypercall at * the same time. */ gdprintk(XENLOG_WARNING, "bad page is passed. paddr 0x%lx maddr 0x%lx\n", paddr, maddr); return -EFAULT; } /* Try again. */ cpu_relax(); return -EAGAIN; } return 0; }
void share_xen_page_with_guest(struct page_info *page, struct domain *d, int readonly) { if ( page_get_owner(page) == d ) return; spin_lock(&d->page_alloc_lock); /* The incremented type count pins as writable or read-only. */ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); page->u.inuse.type_info |= PGT_validated | 1; page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ ASSERT((page->count_info & ~PGC_xen_heap) == 0); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) { page->count_info |= PGC_allocated | 1; if ( unlikely(d->xenheap_pages++ == 0) ) get_knownalive_domain(d); page_list_add_tail(page, &d->xenpage_list); } spin_unlock(&d->page_alloc_lock); }
static int fpswa_get_domain_addr(struct vcpu *v, unsigned long gpaddr, size_t size, void **virt, struct page_info **page, const char *name) { int cross_page_boundary; if (gpaddr == 0) { *virt = 0; return 0; } cross_page_boundary = (((gpaddr & ~PAGE_MASK) + size) > PAGE_SIZE); if (unlikely(cross_page_boundary)) { /* this case isn't implemented */ gdprintk(XENLOG_ERR, "%s: fpswa hypercall is called with " "page crossing argument %s 0x%lx\n", __func__, name, gpaddr); return -ENOSYS; } again: *virt = domain_mpa_to_imva(v->domain, gpaddr); *page = virt_to_page(*virt); if (get_page(*page, current->domain) == 0) { if (page_get_owner(*page) != current->domain) { *page = NULL; return -EFAULT; } goto again; } return 0; }
void share_xen_page_with_guest( struct page_info *page, struct domain *d, int readonly) { if ( page_get_owner(page) == d ) return; /* this causes us to leak pages in the Domain and reuslts in * Zombie domains, I think we are missing a piece, until we find * it we disable the following code */ set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); spin_lock(&d->page_alloc_lock); /* The incremented type count pins as writable or read-only. */ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); page->u.inuse.type_info |= PGT_validated | 1; page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ ASSERT(page->count_info == 0); /* Only add to the allocation list if the domain isn't dying. */ if ( !d->is_dying ) { page->count_info |= PGC_allocated | 1; if ( unlikely(d->xenheap_pages++ == 0) ) get_knownalive_domain(d); list_add_tail(&page->list, &d->xenpage_list); } spin_unlock(&d->page_alloc_lock); }
static void xenoprof_shared_gmfn_with_guest( struct domain *d, unsigned long maddr, unsigned long gmaddr, int npages) { int i; for ( i = 0; i < npages; i++, maddr += PAGE_SIZE, gmaddr += PAGE_SIZE ) { BUG_ON(page_get_owner(maddr_to_page(maddr)) != d); xenoprof_shared_gmfn(d, gmaddr, maddr); } }
static void unshare_xenoprof_page_with_guest(struct xenoprof *x) { int i, npages = x->npages; unsigned long mfn = virt_to_mfn(x->rawbuf); for ( i = 0; i < npages; i++ ) { struct page_info *page = mfn_to_page(mfn + i); BUG_ON(page_get_owner(page) != current->domain); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } }
void hap_free_p2m_page(struct domain *d, struct page_info *pg) { hap_lock(d); ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ if ( (pg->count_info & PGC_count_mask) != 1 ) HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", pg->count_info, pg->u.inuse.type_info); pg->count_info = 0; /* Free should not decrement domain's total allocation, since * these pages were allocated without an owner. */ page_set_owner(pg, NULL); free_domheap_page(pg); d->arch.paging.hap.p2m_pages--; ASSERT(d->arch.paging.hap.p2m_pages >= 0); hap_unlock(d); }
ham_status_t txn_abort(ham_txn_t *txn, ham_u32_t flags) { ham_status_t st; ham_env_t *env=txn_get_env(txn); /* * are cursors attached to this txn? if yes, fail */ if (txn_get_cursor_refcount(txn)) { ham_trace(("transaction cannot be aborted till all attached " "cursors are closed")); return HAM_CURSOR_STILL_OPEN; } if (env_get_log(env) && !(txn_get_flags(txn)&HAM_TXN_READ_ONLY)) { st=ham_log_append_txn_abort(env_get_log(env), txn); if (st) return st; } env_set_txn(env, 0); /* * undo all operations from this transaction * * this includes allocated pages (they're moved to the freelist), * deleted pages (they're un-deleted) and other modifications (will * re-create the original page from the logfile) * * keep txn_get_pagelist(txn) intact during every round, so no * local var for this one. */ while (txn_get_pagelist(txn)) { ham_page_t *head = txn_get_pagelist(txn); if (!(flags & DO_NOT_NUKE_PAGE_STATS)) { /* * nuke critical statistics, such as tracked outer bounds; imagine, * for example, a failing erase transaction which, through erasing * the top-most key, lowers the actual upper bound, after which * the transaction fails at some later point in life. Now if we * wouldn't 'rewind' our bounds-statistics, we would have a * situation where a subsequent out-of-bounds insert (~ append) * would possibly FAIL due to the hinter using incorrect bounds * information then! * * Hence we 'reverse' our statistics here and the easiest route * is to just nuke the critical bits; subsequent find/insert/erase * operations will ensure that the stats will get updated again, * anyhow. All we loose then is a few subsequent operations, which * might have been hinted if we had played a smarter game of * statistics 'reversal'. Soit. */ ham_db_t *db = page_get_owner(head); /* * only need to do this for index pages anyhow, and those are the * ones which have their 'ownership' set. */ if (db) { stats_page_is_nuked(db, head, HAM_FALSE); } } ham_assert(page_is_in_list(txn_get_pagelist(txn), head, PAGE_LIST_TXN), (0)); txn_get_pagelist(txn) = page_list_remove(head, PAGE_LIST_TXN, head); /* if this page was allocated by this transaction, then we can * move the whole page to the freelist */ if (page_get_alloc_txn_id(head)==txn_get_id(txn)) { (void)freel_mark_free(env, 0, page_get_self(head), env_get_pagesize(env), HAM_TRUE); } else { /* remove the 'delete pending' flag */ page_set_npers_flags(head, page_get_npers_flags(head)&~PAGE_NPERS_DELETE_PENDING); /* if the page is dirty, and RECOVERY is enabled: recreate * the original, unmodified page from the log */ if (env_get_log(env) && page_is_dirty(head)) { st=ham_log_recreate(env_get_log(env), head); if (st) return (st); /*page_set_undirty(head); */ } } /* page is no longer in use */ page_release_ref(head); } ham_assert(txn_get_pagelist(txn)==0, (0)); return (0); }
static ham_status_t __insert_split(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { int cmp; ham_status_t st; ham_page_t *newpage, *oldsib; int_key_t *nbte, *obte; btree_node_t *nbtp, *obtp, *sbtp; ham_size_t count, keysize; ham_db_t *db=page_get_owner(page); ham_env_t *env = db_get_env(db); ham_key_t pivotkey, oldkey; ham_offset_t pivotrid; ham_u16_t pivot; ham_bool_t pivot_at_end=HAM_FALSE; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); ham_assert(hints->force_append == HAM_FALSE, (0)); keysize=db_get_keysize(db); /* * allocate a new page */ hints->cost++; st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; ham_assert(page_get_owner(newpage), ("")); /* clear the node header */ memset(page_get_payload(newpage), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, page, HAM_TRUE); /* * move half of the key/rid-tuples to the new page * * !! recno: keys are sorted; we do a "lazy split" */ nbtp=ham_page_get_btree_node(newpage); nbte=btree_node_get_key(db, nbtp, 0); obtp=ham_page_get_btree_node(page); obte=btree_node_get_key(db, obtp, 0); count=btree_node_get_count(obtp); /* * for databases with sequential access (this includes recno databases): * do not split in the middle, but at the very end of the page * * if this page is the right-most page in the index, and this key is * inserted at the very end, then we select the same pivot as for * sequential access */ if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT) pivot_at_end=HAM_TRUE; else if (btree_node_get_right(obtp)==0) { cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1); if (cmp>0) pivot_at_end=HAM_TRUE; } /* * internal pages set the count of the new page to count-pivot-1 (because * the pivot element will become ptr_left of the new page). * by using pivot=count-2 we make sure that at least 1 element will remain * in the new node. */ if (pivot_at_end) { pivot=count-2; } else { pivot=count/2; } /* * uncouple all cursors */ st=bt_uncouple_all_cursors(page, pivot); if (st) return (st); /* * if we split a leaf, we'll insert the pivot element in the leaf * page, too. in internal nodes, we don't insert it, but propagate * it to the parent node only. */ if (btree_node_is_leaf(obtp)) { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, (db_get_int_key_header_size()+keysize)*(count-pivot)); } else { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), (db_get_int_key_header_size()+keysize)*(count-pivot-1)); } /* * store the pivot element, we'll need it later to propagate it * to the parent page */ nbte=btree_node_get_key(db, obtp, pivot); memset(&pivotkey, 0, sizeof(pivotkey)); memset(&oldkey, 0, sizeof(oldkey)); oldkey.data=key_get_key(nbte); oldkey.size=key_get_size(nbte); oldkey._flags=key_get_flags(nbte); st = util_copy_key(db, &oldkey, &pivotkey); if (st) { (void)db_free_page(newpage, DB_MOVE_TO_FREELIST); goto fail_dramatically; } pivotrid=page_get_self(newpage); /* * adjust the page count */ if (btree_node_is_leaf(obtp)) { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot); } else { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot-1); } /* * if we're in an internal page: fix the ptr_left of the new page * (it points to the ptr of the pivot key) */ if (!btree_node_is_leaf(obtp)) { /* * nbte still contains the pivot key */ btree_node_set_ptr_left(nbtp, key_get_ptr(nbte)); } /* * insert the new element */ hints->cost++; cmp=key_compare_pub_to_int(db, page, key, pivot); if (cmp < -1) { st = (ham_status_t)cmp; goto fail_dramatically; } if (cmp>=0) st=__insert_nosplit(newpage, key, rid, scratchpad->record, scratchpad->cursor, hints); else st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); if (st) { goto fail_dramatically; } scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ /* * fix the double-linked list of pages, and mark the pages as dirty */ if (btree_node_get_right(obtp)) { st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0); if (st) goto fail_dramatically; } else { oldsib=0; } if (oldsib) { st=ham_log_add_page_before(oldsib); if (st) goto fail_dramatically; } btree_node_set_left (nbtp, page_get_self(page)); btree_node_set_right(nbtp, btree_node_get_right(obtp)); btree_node_set_right(obtp, page_get_self(newpage)); if (oldsib) { sbtp=ham_page_get_btree_node(oldsib); btree_node_set_left(sbtp, page_get_self(newpage)); page_set_dirty(oldsib, env); } page_set_dirty(newpage, env); page_set_dirty(page, env); /* * propagate the pivot key to the parent page */ ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad->key.data) allocator_free(env_get_allocator(env), scratchpad->key.data); scratchpad->key=pivotkey; scratchpad->rid=pivotrid; ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); return (SPLIT); fail_dramatically: ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0)); if (pivotkey.data) allocator_free(env_get_allocator(env), pivotkey.data); return st; }
static ham_status_t __insert_nosplit(ham_page_t *page, ham_key_t *key, ham_offset_t rid, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_u16_t count; ham_size_t keysize; ham_size_t new_dupe_id = 0; int_key_t *bte = 0; btree_node_t *node; ham_db_t *db=page_get_owner(page); ham_bool_t exists = HAM_FALSE; ham_s32_t slot; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); node=ham_page_get_btree_node(page); count=btree_node_get_count(node); keysize=db_get_keysize(db); if (btree_node_get_count(node)==0) { slot = 0; } else if (hints->force_append) { slot = count; } else if (hints->force_prepend) { /* insert at beginning; shift all up by one */ slot = 0; } else { int cmp; hints->cost++; st=btree_get_slot(db, page, key, &slot, &cmp); if (st) return (st); /* insert the new key at the beginning? */ if (slot == -1) { slot = 0; } else { /* * key exists already */ if (cmp == 0) { if (hints->flags & HAM_OVERWRITE) { /* * no need to overwrite the key - it already exists! * however, we have to overwrite the data! */ if (!btree_node_is_leaf(node)) return (HAM_SUCCESS); } else if (!(hints->flags & HAM_DUPLICATE)) return (HAM_DUPLICATE_KEY); /* do NOT shift keys up to make room; just overwrite the current [slot] */ exists = HAM_TRUE; } else { /* * otherwise, if the new key is > then the slot key, move to * the next slot */ if (cmp > 0) { slot++; } } } } /* * in any case, uncouple the cursors and see if we must shift any elements to the * right */ bte=btree_node_get_key(db, node, slot); ham_assert(bte, (0)); if (!exists) { if (count > slot) { /* uncouple all cursors & shift any elements following [slot] */ st=bt_uncouple_all_cursors(page, slot); if (st) return (st); hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-slot)); memmove(((char *)bte)+db_get_int_key_header_size()+keysize, bte, (db_get_int_key_header_size()+keysize)*(count-slot)); } /* * if a new key is created or inserted: initialize it with zeroes */ memset(bte, 0, db_get_int_key_header_size()+keysize); } /* * if we're in the leaf: insert, overwrite or append the blob * (depends on the flags) */ if (btree_node_is_leaf(node)) { ham_status_t st; hints->cost++; st=key_set_record(db, bte, record, cursor ? bt_cursor_get_dupe_id(cursor) : 0, hints->flags, &new_dupe_id); if (st) return (st); hints->processed_leaf_page = page; hints->processed_slot = slot; } else { key_set_ptr(bte, rid); } page_set_dirty(page, db_get_env(db)); key_set_size(bte, key->size); /* * set a flag if the key is extended, and does not fit into the * btree */ if (key->size > db_get_keysize(db)) key_set_flags(bte, key_get_flags(bte)|KEY_IS_EXTENDED); /* * if we have a cursor: couple it to the new key * * the cursor always points to NIL. */ if (cursor) { if ((st=bt_cursor_set_to_nil(cursor))) return (st); ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), ("coupling an uncoupled cursor, but need a nil-cursor")); ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), ("coupling a coupled cursor, but need a nil-cursor")); bt_cursor_set_flags(cursor, bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED); bt_cursor_set_coupled_page(cursor, page); bt_cursor_set_coupled_index(cursor, slot); bt_cursor_set_dupe_id(cursor, new_dupe_id); memset(bt_cursor_get_dupe_cache(cursor), 0, sizeof(dupe_entry_t)); page_add_cursor(page, (ham_cursor_t *)cursor); } /* * if we've overwritten a key: no need to continue, we're done */ if (exists) return (0); /* * we insert the extended key, if necessary */ key_set_key(bte, key->data, db_get_keysize(db) < key->size ? db_get_keysize(db) : key->size); /* * if we need an extended key, allocate a blob and store * the blob-id in the key */ if (key->size > db_get_keysize(db)) { ham_offset_t blobid; key_set_key(bte, key->data, db_get_keysize(db)); st=key_insert_extended(&blobid, db, page, key); ham_assert(st ? blobid == 0 : 1, (0)); if (!blobid) return st ? st : HAM_INTERNAL_ERROR; key_set_extended_rid(db, bte, blobid); } /* * update the btree node-header */ btree_node_set_count(node, count+1); return (0); }
static ham_status_t __insert_recursive(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { ham_status_t st; ham_page_t *child; ham_db_t *db=page_get_owner(page); btree_node_t *node=ham_page_get_btree_node(page); /* * if we've reached a leaf: insert the key */ if (btree_node_is_leaf(node)) return (__insert_in_page(page, key, rid, scratchpad, hints)); /* * otherwise traverse the root down to the leaf */ hints->cost += 2; st=btree_traverse_tree(&child, 0, db, page, key); if (!child) return st ? st : HAM_INTERNAL_ERROR; /* * and call this function recursively */ st=__insert_recursive(child, key, rid, scratchpad, hints); switch (st) { /* * if we're done, we're done */ case HAM_SUCCESS: break; /* * if we tried to insert a duplicate key, we're done, too */ case HAM_DUPLICATE_KEY: break; /* * the child was split, and we have to insert a new key/rid-pair. */ case SPLIT: hints->flags |= HAM_OVERWRITE; st=__insert_in_page(page, &scratchpad->key, scratchpad->rid, scratchpad, hints); ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); hints->flags = hints->original_flags; break; /* * every other return value is unexpected and shouldn't happen */ default: break; } return (st); }
static ham_status_t __insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_page_t *root; ham_db_t *db=be_get_db(be); ham_env_t *env = db_get_env(db); insert_scratchpad_t scratchpad; ham_assert(hints->force_append == HAM_FALSE, (0)); ham_assert(hints->force_prepend == HAM_FALSE, (0)); /* * initialize the scratchpad */ memset(&scratchpad, 0, sizeof(scratchpad)); scratchpad.be=be; scratchpad.record=record; scratchpad.cursor=cursor; /* * get the root-page... */ ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page")); st=db_fetch_page(&root, db, btree_get_rootpage(be), 0); ham_assert(st ? root == NULL : 1, (0)); if (st) return st; /* * ... and start the recursion */ st=__insert_recursive(root, key, 0, &scratchpad, hints); /* * if the root page was split, we have to create a new * root page. */ if (st==SPLIT) { ham_page_t *newroot; btree_node_t *node; /* * the root-page will be changed... */ st=ham_log_add_page_before(root); if (st) return (st); /* * allocate a new root page */ st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); ham_assert(st ? newroot == NULL : 1, (0)); if (st) return (st); ham_assert(page_get_owner(newroot), ("")); /* clear the node header */ memset(page_get_payload(newroot), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, root, HAM_TRUE); /* * insert the pivot element and the ptr_left */ node=ham_page_get_btree_node(newroot); btree_node_set_ptr_left(node, btree_get_rootpage(be)); st=__insert_nosplit(newroot, &scratchpad.key, scratchpad.rid, scratchpad.record, scratchpad.cursor, hints); ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ if (st) { ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); } /* * set the new root page * * !! * do NOT delete the old root page - it's still in use! * * also don't forget to flush the backend - otherwise the header * page of the database will not contain the updated information. * The backend is flushed when the database is closed, but if * recovery is enabled then the flush here is critical. */ btree_set_rootpage(be, page_get_self(newroot)); be_set_dirty(be, HAM_TRUE); be->_fun_flush(be); /* * As we re-purpose a page, we will reset its pagecounter * as well to signal its first use as the new type assigned * here. */ if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX)) cache_update_page_access_counter(root, env_get_cache(env), 0); page_set_type(root, PAGE_TYPE_B_INDEX); page_set_dirty(root, env); page_set_dirty(newroot, env); /* the root page was modified (btree_set_rootpage) - make sure that * it's logged */ if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) { st=txn_add_page(env_get_txn(env), env_get_header_page(env), HAM_TRUE); if (st) return (st); } } /* * release the scratchpad-memory and return to caller */ ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); }
int get_page_type(struct page_info *page, unsigned long type) { unsigned long nx, x, y = page->u.inuse.type_info; ASSERT(!(type & ~PGT_type_mask)); again: do { x = y; nx = x + 1; if ( unlikely((nx & PGT_count_mask) == 0) ) { MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); return 0; } else if ( unlikely((x & PGT_count_mask) == 0) ) { if ( (x & PGT_type_mask) != type ) { /* * On type change we check to flush stale TLB entries. This * may be unnecessary (e.g., page was GDT/LDT) but those * circumstances should be very rare. */ cpumask_t mask = page_get_owner(page)->domain_dirty_cpumask; tlbflush_filter(mask, page->tlbflush_timestamp); if ( unlikely(!cpus_empty(mask)) ) { perfc_incr(need_flush_tlb_flush); flush_tlb_mask(mask); } /* We lose existing type, back pointer, and validity. */ nx &= ~(PGT_type_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ /* Page tables and GDT/LDT need to be scanned for validity. */ if ( type == PGT_writable_page ) nx |= PGT_validated; } } else if ( unlikely((x & PGT_type_mask) != type) ) { return 0; } else if ( unlikely(!(x & PGT_validated)) ) { /* Someone else is updating validation of this page. Wait... */ while ( (y = page->u.inuse.type_info) == x ) cpu_relax(); goto again; } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); if ( unlikely(!(nx & PGT_validated)) ) { /* Noone else is updating simultaneously. */ __set_bit(_PGT_validated, &page->u.inuse.type_info); } return 1; }
static ham_status_t __insert_in_page(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { ham_status_t st; ham_size_t maxkeys=btree_get_maxkeys(scratchpad->be); btree_node_t *node=ham_page_get_btree_node(page); ham_assert(maxkeys>1, ("invalid result of db_get_maxkeys(): %d", maxkeys)); ham_assert(hints->force_append == HAM_FALSE, (0)); ham_assert(hints->force_prepend == HAM_FALSE, (0)); /* * prepare the page for modifications */ st=ham_log_add_page_before(page); if (st) return (st); /* * if we can insert the new key without splitting the page: * __insert_nosplit() will do the work for us */ if (btree_node_get_count(node)<maxkeys) { st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ return (st); } /* * otherwise, we have to split the page. * but BEFORE we split, we check if the key already exists! */ if (btree_node_is_leaf(node)) { ham_s32_t idx; hints->cost++; idx = btree_node_search_by_key(page_get_owner(page), page, key, HAM_FIND_EXACT_MATCH); /* key exists! */ if (idx>=0) { ham_assert((hints->flags & (HAM_DUPLICATE_INSERT_BEFORE |HAM_DUPLICATE_INSERT_AFTER |HAM_DUPLICATE_INSERT_FIRST |HAM_DUPLICATE_INSERT_LAST)) ? (hints->flags & HAM_DUPLICATE) : 1, (0)); if (!(hints->flags & (HAM_OVERWRITE | HAM_DUPLICATE))) return (HAM_DUPLICATE_KEY); st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); /* don't overwrite cursor if __insert_nosplit is called again */ scratchpad->cursor=0; return (st); } } return (__insert_split(page, key, rid, scratchpad, hints)); }
int guest_physmap_add_entry(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int page_order, p2m_type_t t) { struct p2m_domain *p2m = p2m_get_hostp2m(d); unsigned long i, ogfn; p2m_type_t ot; p2m_access_t a; mfn_t omfn; int pod_count = 0; int rc = 0; if ( !paging_mode_translate(d) ) { if ( need_iommu(d) && t == p2m_ram_rw ) { for ( i = 0; i < (1 << page_order); i++ ) { rc = iommu_map_page( d, mfn + i, mfn + i, IOMMUF_readable|IOMMUF_writable); if ( rc != 0 ) { while ( i-- > 0 ) iommu_unmap_page(d, mfn + i); return rc; } } } return 0; } p2m_lock(p2m); P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); /* First, remove m->p mappings for existing p->m mappings */ for ( i = 0; i < (1UL << page_order); i++ ) { omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL); if ( p2m_is_shared(ot) ) { /* Do an unshare to cleanly take care of all corner * cases. */ int rc; rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0); if ( rc ) { p2m_unlock(p2m); /* NOTE: Should a guest domain bring this upon itself, * there is not a whole lot we can do. We are buried * deep in locks from most code paths by now. So, fail * the call and don't try to sleep on a wait queue * while placing the mem event. * * However, all current (changeset 3432abcf9380) code * paths avoid this unsavoury situation. For now. * * Foreign domains are okay to place an event as they * won't go to sleep. */ (void)mem_sharing_notify_enomem(p2m->domain, gfn + i, 0); return rc; } omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL); ASSERT(!p2m_is_shared(ot)); } if ( p2m_is_grant(ot) ) { /* Really shouldn't be unmapping grant maps this way */ domain_crash(d); p2m_unlock(p2m); return -EINVAL; } else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } else if ( ot == p2m_populate_on_demand ) { /* Count how man PoD entries we'll be replacing if successful */ pod_count++; } else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) ) { /* We're plugging a hole in the physmap where a paged out page was */ atomic_dec(&d->paged_pages); } } /* Then, look for m->p mappings for this range and deal with them */ for ( i = 0; i < (1UL << page_order); i++ ) { if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) == dom_cow ) { /* This is no way to add a shared page to your physmap! */ gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom %hu " "physmap not allowed.\n", mfn+i, d->domain_id); p2m_unlock(p2m); return -EINVAL; } if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d ) continue; ogfn = mfn_to_gfn(d, _mfn(mfn+i)); if ( (ogfn != INVALID_M2P_ENTRY) && (ogfn != gfn + i) ) { /* This machine frame is already mapped at another physical * address */ P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn + i, ogfn, gfn + i); omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL); if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) { ASSERT(mfn_valid(omfn)); P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == (mfn + i) ) p2m_remove_page(p2m, ogfn, mfn + i, 0); } } } /* Now, actually do the two-way mapping */ if ( mfn_valid(_mfn(mfn)) ) { if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t, p2m->default_access) ) { rc = -EINVAL; goto out; /* Failed to update p2m, bail without updating m2p. */ } if ( !p2m_is_grant(t) ) { for ( i = 0; i < (1UL << page_order); i++ ) set_gpfn_from_mfn(mfn+i, gfn+i); } } else { gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", gfn, mfn); if ( !set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access) ) rc = -EINVAL; else { pod_lock(p2m); p2m->pod.entry_count -= pod_count; BUG_ON(p2m->pod.entry_count < 0); pod_unlock(p2m); } } out: p2m_unlock(p2m); return rc; }