void wts_load(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM key, value; WT_SESSION *session; uint8_t *keybuf, *valbuf; int is_bulk, ret; conn = g.wts_conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) die(ret, "connection.open_session"); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== bulk load start ==============="); /* * Avoid bulk load with KVS (there's no bulk load support for a * data-source); avoid bulk load with a custom collator, because * the order of insertion will not match the collation order. */ is_bulk = !g.c_reverse && !DATASOURCE("kvsbdb") && !DATASOURCE("helium"); if ((ret = session->open_cursor(session, g.uri, NULL, is_bulk ? "bulk" : NULL, &cursor)) != 0) die(ret, "session.open_cursor"); /* Set up the default key buffer. */ key_gen_setup(&keybuf); val_gen_setup(&valbuf); for (;;) { if (++g.key_cnt > g.c_rows) { g.key_cnt = g.rows = g.c_rows; break; } /* Report on progress every 100 inserts. */ if (g.key_cnt % 100 == 0) track("bulk load", g.key_cnt, NULL); key_gen(keybuf, &key.size, (uint64_t)g.key_cnt, 0); key.data = keybuf; value_gen(valbuf, &value.size, (uint64_t)g.key_cnt); value.data = valbuf; switch (g.type) { case FIX: if (!is_bulk) cursor->set_key(cursor, g.key_cnt); cursor->set_value(cursor, *(uint8_t *)value.data); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, "%-10s %" PRIu32 " {0x%02" PRIx8 "}", "bulk V", g.key_cnt, ((uint8_t *)value.data)[0]); break; case VAR: if (!is_bulk) cursor->set_key(cursor, g.key_cnt); cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, "%-10s %" PRIu32 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; case ROW: cursor->set_key(cursor, &key); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, "%-10s %" PRIu32 " {%.*s}", "bulk K", g.key_cnt, (int)key.size, (char *)key.data); cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, "%-10s %" PRIu32 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; } if ((ret = cursor->insert(cursor)) != 0) die(ret, "cursor.insert"); if (!SINGLETHREADED) continue; /* Insert the item into BDB. */ bdb_insert(key.data, key.size, value.data, value.size); } if ((ret = cursor->close(cursor)) != 0) die(ret, "cursor.close"); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== bulk load stop ==============="); if ((ret = session->close(session, NULL)) != 0) die(ret, "session.close"); free(keybuf); free(valbuf); }
/* * load_dump -- * Load from the WiredTiger dump format. */ static int load_dump(WT_SESSION *session) { WT_CURSOR *cursor; WT_DECL_RET; int hex, tret; char **list, **tlist, *uri, config[64]; cursor = NULL; list = NULL; /* -Wuninitialized */ hex = 0; /* -Wuninitialized */ uri = NULL; /* Read the metadata file. */ if ((ret = config_read(&list, &hex)) != 0) return (ret); /* Reorder and check the list. */ if ((ret = config_reorder(list)) != 0) goto err; /* Update the config based on any command-line configuration. */ if ((ret = config_update(session, list)) != 0) goto err; uri = list[0]; /* Create the items in the list. */ if ((ret = config_exec(session, list)) != 0) goto err; /* Open the insert cursor. */ (void)snprintf(config, sizeof(config), "dump=%s%s%s", hex ? "hex" : "print", append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(ret, "%s: session.open", uri); goto err; } /* * Check the append flag (it only applies to objects where the primary * key is a record number). */ if (append && strcmp(cursor->key_format, "r") != 0) { fprintf(stderr, "%s: %s: -a option illegal unless the primary key is a " "record number\n", progname, uri); ret = 1; } else ret = insert(cursor, uri); err: /* * Technically, we don't have to close the cursor because the session * handle will do it for us, but I'd like to see the flush to disk and * the close succeed, it's better to fail early when loading files. */ if (cursor != NULL && (tret = cursor->close(cursor)) != 0) { tret = util_err(tret, "%s: cursor.close", uri); if (ret == 0) ret = tret; } if (ret == 0) ret = util_flush(session, uri); for (tlist = list; *tlist != NULL; ++tlist) free(*tlist); free(list); return (ret == 0 ? 0 : 1); }
/* * dump_table_config -- * Dump the config for a table. */ static int dump_table_config( WT_SESSION *session, WT_CURSOR *cursor, const char *uri, bool json) { WT_CONFIG_ITEM cval; WT_CURSOR *srch; WT_DECL_RET; size_t len; int tret; bool complex_table; const char *name, *v; char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL}; p = NULL; cfg = &_cfg[3]; /* Get the table name. */ if ((name = strchr(uri, ':')) == NULL) { fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri); return (1); } ++name; /* * Dump out the config information: first, dump the uri entry itself, * it overrides all subsequent configurations. */ cursor->set_key(cursor, uri); if ((ret = cursor->search(cursor)) != 0) WT_ERR(util_cerr(cursor, "search", ret)); if ((ret = cursor->get_value(cursor, &v)) != 0) WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) WT_ERR(util_err(session, errno, NULL)); /* * Workaround for WiredTiger "simple" table handling. Simple tables * have column-group entries, but they aren't listed in the metadata's * table entry, and the name is different from other column-groups. * Figure out if it's a simple table and in that case, retrieve the * column-group's configuration value and the column-group's "source" * entry, where the column-group entry overrides the source's. */ complex_table = false; if (WT_PREFIX_MATCH(uri, "table:")) { len = strlen("colgroup:") + strlen(name) + 1; if ((p = malloc(len)) == NULL) WT_ERR(util_err(session, errno, NULL)); (void)snprintf(p, len, "colgroup:%s", name); cursor->set_key(cursor, p); if ((ret = cursor->search(cursor)) == 0) { if ((ret = cursor->get_value(cursor, &v)) != 0) WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) WT_ERR(util_err(session, errno, NULL)); if ((ret =__wt_config_getones( (WT_SESSION_IMPL *)session, *cfg, "source", &cval)) != 0) WT_ERR(util_err( session, ret, "%s: source entry", p)); free(p); len = cval.len + 10; if ((p = malloc(len)) == NULL) WT_ERR(util_err(session, errno, NULL)); (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); cursor->set_key(cursor, p); if ((ret = cursor->search(cursor)) != 0) WT_ERR(util_cerr(cursor, "search", ret)); if ((ret = cursor->get_value(cursor, &v)) != 0) WT_ERR(util_cerr(cursor, "get_value", ret)); if ((*--cfg = strdup(v)) == NULL) WT_ERR(util_err(session, errno, NULL)); } else complex_table = true; } WT_ERR(print_config(session, uri, cfg, json, true)); if (complex_table) { /* * The underlying table configuration function needs a second * cursor: open one before calling it, it makes error handling * hugely simpler. */ if ((ret = session->open_cursor( session, "metadata:", NULL, NULL, &srch)) != 0) WT_ERR(util_cerr(cursor, "open_cursor", ret)); if ((ret = dump_table_config_complex( session, cursor, srch, name, "colgroup:", json)) == 0) ret = dump_table_config_complex( session, cursor, srch, name, "index:", json); if ((tret = srch->close(srch)) != 0) { tret = util_cerr(cursor, "close", tret); if (ret == 0) ret = tret; } } else if (json && printf( " \"colgroups\" : [],\n" " \"indices\" : []\n") < 0) WT_ERR(util_cerr(cursor, NULL, EIO)); err: free(p); free(_cfg[0]); free(_cfg[1]); free(_cfg[2]); return (ret); }
int session_ops(WT_SESSION *session) { int ret; /*! [Reconfigure a session] */ ret = session->reconfigure(session, "isolation=snapshot"); /*! [Reconfigure a session] */ /*! [Create a table] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S"); /*! [Create a table] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a column-store table] */ ret = session->create(session, "table:mytable", "key_format=r,value_format=S"); /*! [Create a column-store table] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a table with columns] */ /* * Create a table with columns: keys are record numbers, values are * (string, signed 32-bit integer, unsigned 16-bit integer). */ ret = session->create(session, "table:mytable", "key_format=r,value_format=SiH," "columns=(id,department,salary,year-started)"); /*! [Create a table with columns] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a table and configure the page size] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S," "internal_page_max=16KB,leaf_page_max=1MB,leaf_value_max=64KB"); /*! [Create a table and configure the page size] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a table and configure a large leaf value max] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S," "leaf_page_max=16KB,leaf_value_max=256KB"); /*! [Create a table and configure a large leaf value max] */ ret = session->drop(session, "table:mytable", NULL); /* * This example code gets run, and the compression libraries might not * be loaded, causing the create to fail. The documentation requires * the code snippets, use #ifdef's to avoid running it. */ #ifdef MIGHT_NOT_RUN /*! [Create a lz4 compressed table] */ ret = session->create(session, "table:mytable", "block_compressor=lz4,key_format=S,value_format=S"); /*! [Create a lz4 compressed table] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a snappy compressed table] */ ret = session->create(session, "table:mytable", "block_compressor=snappy,key_format=S,value_format=S"); /*! [Create a snappy compressed table] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a zlib compressed table] */ ret = session->create(session, "table:mytable", "block_compressor=zlib,key_format=S,value_format=S"); /*! [Create a zlib compressed table] */ ret = session->drop(session, "table:mytable", NULL); #endif /*! [Configure checksums to uncompressed] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S,checksum=uncompressed"); /*! [Configure checksums to uncompressed] */ ret = session->drop(session, "table:mytable", NULL); /*! [Configure dictionary compression on] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S,dictionary=1000"); /*! [Configure dictionary compression on] */ ret = session->drop(session, "table:mytable", NULL); /*! [Configure key prefix compression on] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S,prefix_compression=true"); /*! [Configure key prefix compression on] */ ret = session->drop(session, "table:mytable", NULL); #ifdef MIGHT_NOT_RUN /* Requires sync_file_range */ /*! [os_cache_dirty_max configuration] */ ret = session->create( session, "table:mytable", "os_cache_dirty_max=500MB"); /*! [os_cache_dirty_max configuration] */ ret = session->drop(session, "table:mytable", NULL); /* Requires posix_fadvise */ /*! [os_cache_max configuration] */ ret = session->create(session, "table:mytable", "os_cache_max=1GB"); /*! [os_cache_max configuration] */ ret = session->drop(session, "table:mytable", NULL); #endif /*! [Configure block_allocation] */ ret = session->create(session, "table:mytable", "key_format=S,value_format=S,block_allocation=first"); /*! [Configure block_allocation] */ ret = session->drop(session, "table:mytable", NULL); /*! [Create a cache-resident object] */ ret = session->create(session, "table:mytable", "key_format=r,value_format=S,cache_resident=true"); /*! [Create a cache-resident object] */ ret = session->drop(session, "table:mytable", NULL); { /* Create a table for the session operations. */ ret = session->create( session, "table:mytable", "key_format=S,value_format=S"); /*! [Compact a table] */ ret = session->compact(session, "table:mytable", NULL); /*! [Compact a table] */ /*! [Rebalance a table] */ ret = session->rebalance(session, "table:mytable", NULL); /*! [Rebalance a table] */ /*! [Rename a table] */ ret = session->rename(session, "table:old", "table:new", NULL); /*! [Rename a table] */ /*! [Salvage a table] */ ret = session->salvage(session, "table:mytable", NULL); /*! [Salvage a table] */ /*! [Truncate a table] */ ret = session->truncate(session, "table:mytable", NULL, NULL, NULL); /*! [Truncate a table] */ /*! [Transaction sync] */ ret = session->transaction_sync(session, NULL); /*! [Transaction sync] */ /*! [Reset the session] */ ret = session->reset(session); /*! [Reset the session] */ { /* * Insert a pair of keys so we can truncate a range. */ WT_CURSOR *cursor; ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); cursor->set_key(cursor, "June01"); cursor->set_value(cursor, "value"); ret = cursor->update(cursor); cursor->set_key(cursor, "June30"); cursor->set_value(cursor, "value"); ret = cursor->update(cursor); ret = cursor->close(cursor); { /*! [Truncate a range] */ WT_CURSOR *start, *stop; ret = session->open_cursor( session, "table:mytable", NULL, NULL, &start); start->set_key(start, "June01"); ret = start->search(start); ret = session->open_cursor( session, "table:mytable", NULL, NULL, &stop); stop->set_key(stop, "June30"); ret = stop->search(stop); ret = session->truncate(session, NULL, start, stop, NULL); /*! [Truncate a range] */ } } /*! [Upgrade a table] */ ret = session->upgrade(session, "table:mytable", NULL); /*! [Upgrade a table] */ /*! [Verify a table] */ ret = session->verify(session, "table:mytable", NULL); /*! [Verify a table] */ /*! [Drop a table] */ ret = session->drop(session, "table:mytable", NULL); /*! [Drop a table] */ } /*! [Close a session] */ ret = session->close(session, NULL); /*! [Close a session] */ return (ret); }
int transaction_ops(WT_CONNECTION *conn, WT_SESSION *session) { WT_CURSOR *cursor; int ret; /*! [transaction commit/rollback] */ /* * Cursors may be opened before or after the transaction begins, and in * either case, subsequent operations are included in the transaction. * Opening cursors before the transaction begins allows applications to * cache cursors and use them for multiple operations. */ ret = session->open_cursor(session, "table:mytable", NULL, NULL, &cursor); ret = session->begin_transaction(session, NULL); cursor->set_key(cursor, "key"); cursor->set_value(cursor, "value"); switch (ret = cursor->update(cursor)) { case 0: /* Update success */ ret = session->commit_transaction(session, NULL); /* * If commit_transaction succeeds, cursors remain positioned; if * commit_transaction fails, the transaction was rolled-back and * and all cursors are reset. */ break; case WT_ROLLBACK: /* Update conflict */ default: /* Other error */ ret = session->rollback_transaction(session, NULL); /* The rollback_transaction call resets all cursors. */ break; } /* * Cursors remain open and may be used for multiple transactions. */ /*! [transaction commit/rollback] */ ret = cursor->close(cursor); /*! [transaction isolation] */ /* A single transaction configured for snapshot isolation. */ ret = session->open_cursor(session, "table:mytable", NULL, NULL, &cursor); ret = session->begin_transaction(session, "isolation=snapshot"); cursor->set_key(cursor, "some-key"); cursor->set_value(cursor, "some-value"); ret = cursor->update(cursor); ret = session->commit_transaction(session, NULL); /*! [transaction isolation] */ /*! [session isolation configuration] */ /* Open a session configured for read-uncommitted isolation. */ ret = conn->open_session( conn, NULL, "isolation=read_uncommitted", &session); /*! [session isolation configuration] */ /*! [session isolation re-configuration] */ /* Re-configure a session for snapshot isolation. */ ret = session->reconfigure(session, "isolation=snapshot"); /*! [session isolation re-configuration] */ { /*! [transaction pinned range] */ /* Check the transaction ID range pinned by the session handle. */ uint64_t range; ret = session->transaction_pinned_range(session, &range); /*! [transaction pinned range] */ } return (ret); }
/* * __wt_las_sweep -- * Sweep the lookaside table. */ int __wt_las_sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_ITEM(las_addr); WT_DECL_ITEM(las_key); WT_DECL_RET; WT_ITEM *key; uint64_t cnt, las_counter, las_txnid; int64_t remove_cnt; uint32_t las_id, session_flags; int notused; conn = S2C(session); cursor = NULL; key = &conn->las_sweep_key; remove_cnt = 0; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_ERR(__wt_scr_alloc(session, 0, &las_addr)); WT_ERR(__wt_scr_alloc(session, 0, &las_key)); WT_ERR(__wt_las_cursor(session, &cursor, &session_flags)); /* * If we're not starting a new sweep, position the cursor using the key * from the last call (we don't care if we're before or after the key, * just roughly in the same spot is fine). */ if (key->size != 0) { __wt_cursor_set_raw_key(cursor, key); ret = cursor->search_near(cursor, ¬used); /* * Don't search for the same key twice; if we don't set a new * key below, it's because we've reached the end of the table * and we want the next pass to start at the beginning of the * table. Searching for the same key could leave us stuck at * the end of the table, repeatedly checking the same rows. */ key->size = 0; if (ret != 0) goto srch_notfound; } /* * The sweep server wakes up every 10 seconds (by default), it's a slow * moving thread. Try to review the entire lookaside table once every 5 * minutes, or every 30 calls. * * The reason is because the lookaside table exists because we're seeing * cache/eviction pressure (it allows us to trade performance and disk * space for cache space), and it's likely lookaside blocks are being * evicted, and reading them back in doesn't help things. A trickier, * but possibly better, alternative might be to review all lookaside * blocks in the cache in order to get rid of them, and slowly review * lookaside blocks that have already been evicted. */ cnt = (uint64_t)WT_MAX(100, conn->las_record_cnt / 30); /* Discard pages we read as soon as we're done with them. */ F_SET(session, WT_SESSION_NO_CACHE); /* Walk the file. */ for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) { /* * If the loop terminates after completing a work unit, we will * continue the table sweep next time. Get a local copy of the * sweep key, we're going to reset the cursor; do so before * calling cursor.remove, cursor.remove can discard our hazard * pointer and the page could be evicted from underneath us. */ if (cnt == 1) { WT_ERR(__wt_cursor_get_raw_key(cursor, key)); if (!WT_DATA_IN_ITEM(key)) WT_ERR(__wt_buf_set( session, key, key->data, key->size)); } WT_ERR(cursor->get_key(cursor, &las_id, las_addr, &las_counter, &las_txnid, las_key)); /* * If the on-page record transaction ID associated with the * record is globally visible, the record can be discarded. * * Cursor opened overwrite=true: won't return WT_NOTFOUND should * another thread remove the record before we do, and the cursor * remains positioned in that case. */ if (__wt_txn_visible_all(session, las_txnid)) { WT_ERR(cursor->remove(cursor)); ++remove_cnt; } } srch_notfound: WT_ERR_NOTFOUND_OK(ret); if (0) { err: __wt_buf_free(session, key); } WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); /* * If there were races to remove records, we can over-count. All * arithmetic is signed, so underflow isn't fatal, but check anyway so * we don't skew low over time. */ if (remove_cnt > S2C(session)->las_record_cnt) S2C(session)->las_record_cnt = 0; else if (remove_cnt > 0) (void)__wt_atomic_subi64(&conn->las_record_cnt, remove_cnt); F_CLR(session, WT_SESSION_NO_CACHE); __wt_scr_free(session, &las_addr); __wt_scr_free(session, &las_key); return (ret); }
int util_stat(WT_SESSION *session, int argc, char *argv[]) { WT_CURSOR *cursor; WT_DECL_RET; size_t urilen; int ch; bool objname_free; const char *config, *pval, *desc; char *objname, *uri; objname_free = false; objname = uri = NULL; config = NULL; while ((ch = __wt_getopt(progname, argc, argv, "af")) != EOF) switch (ch) { case 'a': /* * Historically, the -a option meant include all of the * statistics; because we are opening the database with * statistics=(all), that is now the default, allow the * option for compatibility. */ config = NULL; break; case 'f': config = "statistics=(fast)"; break; case '?': default: return (usage()); } argc -= __wt_optind; argv += __wt_optind; /* * If there are no arguments, the statistics cursor operates on the * connection, otherwise, the optional remaining argument is a file * or LSM name. */ switch (argc) { case 0: objname = (char *)""; break; case 1: if ((objname = util_name(session, *argv, "table")) == NULL) return (1); objname_free = true; break; default: return (usage()); } urilen = strlen("statistics:") + strlen(objname) + 1; if ((uri = calloc(urilen, 1)) == NULL) { fprintf(stderr, "%s: %s\n", progname, strerror(errno)); goto err; } snprintf(uri, urilen, "statistics:%s", objname); if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) { fprintf(stderr, "%s: cursor open(%s) failed: %s\n", progname, uri, session->strerror(session, ret)); goto err; } /* List the statistics. */ while ( (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, NULL)) == 0) if (printf("%s=%s\n", desc, pval) < 0) { ret = errno; break; } if (ret == WT_NOTFOUND) ret = 0; if (ret != 0) { fprintf(stderr, "%s: cursor get(%s) failed: %s\n", progname, objname, session->strerror(session, ret)); goto err; } if (0) { err: ret = 1; } if (objname_free) free(objname); free(uri); return (ret); }
int main(int argc, char *argv[]) { WT_CONNECTION *conn, *conn2, *conn3, *conn4; WT_CURSOR *cursor; WT_ITEM data; WT_SESSION *session; uint64_t i; int ch, status, op, ret; bool child; const char *working_dir; char cmd[512]; uint8_t buf[MAX_VAL]; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; else ++progname; /* * Needed unaltered for system command later. */ saved_argv0 = argv[0]; working_dir = "WT_RD"; child = false; op = OP_READ; while ((ch = __wt_getopt(progname, argc, argv, "Rh:W")) != EOF) switch (ch) { case 'R': child = true; op = OP_READ; break; case 'W': child = true; op = OP_WRITE; break; case 'h': working_dir = __wt_optarg; break; default: usage(); } argc -= __wt_optind; argv += __wt_optind; if (argc != 0) usage(); /* * Set up all the directory names. */ testutil_work_dir_from_path(home, sizeof(home), working_dir); (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX); (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX); (void)snprintf( home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX); if (!child) { testutil_make_work_dir(home); testutil_make_work_dir(home_wr); testutil_make_work_dir(home_rd); testutil_make_work_dir(home_rd2); } else /* * We are a child process, we just want to call * the open_dbs with the directories we have. * The child function will exit. */ open_dbs(op, home, home_wr, home_rd, home_rd2); /* * Parent creates a database and table. Then cleanly shuts down. * Then copy database to read-only directory and chmod. * Also copy database to read-only directory and remove the lock * file. One read-only database will have a lock file in the * file system and the other will not. * Parent opens all databases with read-only configuration flag. * Parent forks off child who tries to also open all databases * with the read-only flag. It should error on the writeable * directory, but allow it on the read-only directories. * The child then confirms it can read all the data. */ /* * Run in the home directory and create the table. */ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->create(session, uri, "key_format=Q,value_format=u")) != 0) testutil_die(ret, "WT_SESSION.create: %s", uri); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); /* * Write data into the table and then cleanly shut down connection. */ memset(buf, 0, sizeof(buf)); data.data = buf; data.size = MAX_VAL; for (i = 0; i < MAX_KV; ++i) { cursor->set_key(cursor, i); cursor->set_value(cursor, &data); if ((ret = cursor->insert(cursor)) != 0) testutil_die(ret, "WT_CURSOR.insert"); } if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); /* * Copy the database. Remove any lock file from one copy * and chmod the copies to be read-only permissions. */ (void)snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock", home, home_wr, home_wr); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); (void)snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*", home, home_rd, home_rd, home_rd); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); (void)snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; " "chmod 0555 %s; chmod -R 0444 %s/*", home, home_rd2, home_rd2, home_rd2, home_rd2); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); /* * Run four scenarios. Sometimes expect errors, sometimes success. * The writable database directories should always fail to allow the * child to open due to the lock file. The read-only ones will only * succeed when the child attempts read-only. * * 1. Parent has read-only handle to all databases. Child opens * read-only also. * 2. Parent has read-only handle to all databases. Child opens * read-write. * 3. Parent has read-write handle to writable databases and * read-only to read-only databases. Child opens read-only. * 4. Parent has read-write handle to writable databases and * read-only to read-only databases. Child opens read-write. */ /* * Open a connection handle to all databases. */ fprintf(stderr, " *** Expect several error messages from WT ***\n"); /* * Scenario 1. */ if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0) testutil_die(ret, "wiredtiger_open original home"); if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0) testutil_die(ret, "wiredtiger_open write nolock"); if ((ret = wiredtiger_open(home_rd, NULL, ENV_CONFIG_RD, &conn3)) != 0) testutil_die(ret, "wiredtiger_open readonly"); if ((ret = wiredtiger_open(home_rd2, NULL, ENV_CONFIG_RD, &conn4)) != 0) testutil_die(ret, "wiredtiger_open readonly nolock"); /* * Create a child to also open a connection handle to the databases. * We cannot use fork here because using fork the child inherits the * same memory image. Therefore the WT process structure is set in * the child even though it should not be. So use 'system' to spawn * an entirely new process. * * The child will exit with success if its test passes. */ (void)snprintf( cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) testutil_die(WEXITSTATUS(status), "system: %s", cmd); /* * Scenario 2. Run child with writable config. */ (void)snprintf( cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) testutil_die(WEXITSTATUS(status), "system: %s", cmd); /* * Reopen the two writable directories and rerun the child. */ if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if ((ret = conn2->close(conn2, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if ((ret = wiredtiger_open(home, NULL, ENV_CONFIG_RD, &conn)) != 0) testutil_die(ret, "wiredtiger_open original home"); if ((ret = wiredtiger_open(home_wr, NULL, ENV_CONFIG_RD, &conn2)) != 0) testutil_die(ret, "wiredtiger_open write nolock"); /* * Scenario 3. Child read-only. */ (void)snprintf( cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) testutil_die(WEXITSTATUS(status), "system: %s", cmd); /* * Scenario 4. Run child with writable config. */ (void)snprintf( cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) testutil_die(WEXITSTATUS(status), "system: %s", cmd); /* * Clean-up. */ if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if ((ret = conn2->close(conn2, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if ((ret = conn3->close(conn3, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if ((ret = conn4->close(conn4, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); /* * We need to chmod the read-only databases back so that they can * be removed by scripts. */ (void)snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); (void)snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); printf(" *** Readonly test successful ***\n"); return (EXIT_SUCCESS); }
/* * build -- * Build a row- or column-store page in a file. */ void build(int ikey, int ivalue, int cnt) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM key, value; WT_SESSION *session; char config[256], kbuf[64], vbuf[64]; int new_slvg; assert(wiredtiger_open(NULL, NULL, "create", &conn) == 0); assert(conn->open_session(conn, NULL, NULL, &session) == 0); assert(session->drop(session, "file:" LOAD, "force") == 0); switch (page_type) { case WT_PAGE_COL_FIX: (void)snprintf(config, sizeof(config), "key_format=r,value_format=7t," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; case WT_PAGE_COL_VAR: (void)snprintf(config, sizeof(config), "key_format=r," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; case WT_PAGE_ROW_LEAF: (void)snprintf(config, sizeof(config), "key_format=u," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; default: assert(0); } assert(session->create(session, "file:" LOAD, config) == 0); assert(session->open_cursor( session, "file:" LOAD, NULL, "bulk", &cursor) == 0); for (; cnt > 0; --cnt, ++ikey, ++ivalue) { switch (page_type) { /* Build the key. */ case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: break; case WT_PAGE_ROW_LEAF: snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); break; } switch (page_type) { /* Build the value. */ case WT_PAGE_COL_FIX: cursor->set_value(cursor, ivalue & 0x7f); break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_LEAF: snprintf(vbuf, sizeof(vbuf), "%010d VALUE----", value_unique ? ivalue : 37); value.data = vbuf; value.size = 20; cursor->set_value(cursor, &value); } assert(cursor->insert(cursor) == 0); } /* * The first time through this routine we put a matching configuration * in for the salvage file. */ new_slvg = (access(SLVG, F_OK) != 0); if (new_slvg) { assert(session->drop(session, "file:" SLVG, "force") == 0); assert(session->create(session, "file:" SLVG, config) == 0); } assert(conn->close(conn, 0) == 0); /* * We created the salvage file above, but all we want is the schema, * we're creating the salvage file by hand. */ if (new_slvg) (void)remove(SLVG); }
/* * __wt_schema_open_table -- * Open a named table. */ int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, WT_TABLE **tablep) { WT_CONFIG cparser; WT_CONFIG_ITEM ckey, cval; WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM buf; WT_TABLE *table; const char *tconfig; char *tablename; cursor = NULL; table = NULL; WT_CLEAR(buf); WT_RET(__wt_buf_fmt(session, &buf, "table:%.*s", (int)namelen, name)); tablename = __wt_buf_steal(session, &buf, NULL); WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tablename); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &tconfig)); WT_ERR(__wt_calloc_def(session, 1, &table)); table->name = tablename; tablename = NULL; WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval)); WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format)); WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format)); WT_ERR(__wt_strdup(session, tconfig, &table->config)); /* Point to some items in the copy to save re-parsing. */ WT_ERR(__wt_config_getones(session, table->config, "columns", &table->colconf)); /* * Count the number of columns: tables are "simple" if the columns * are not named. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf)); table->is_simple = 1; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) table->is_simple = 0; if (ret != WT_NOTFOUND) goto err; /* Check that the columns match the key and value formats. */ if (!table->is_simple) WT_ERR(__wt_schema_colcheck(session, table->key_format, table->value_format, &table->colconf, &table->nkey_columns, NULL)); WT_ERR(__wt_config_getones(session, table->config, "colgroups", &table->cgconf)); /* Count the number of column groups. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf)); table->ncolgroups = 0; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) ++table->ncolgroups; if (ret != WT_NOTFOUND) goto err; WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups)); WT_ERR(__wt_schema_open_colgroups(session, table)); *tablep = table; if (0) { err: if (table != NULL) __wt_schema_destroy_table(session, table); } if (cursor != NULL) WT_TRET(cursor->close(cursor)); __wt_free(session, tablename); return (ret); }
/* * __wt_las_sweep -- * Sweep the lookaside table. */ int __wt_las_sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_ITEM(las_addr); WT_DECL_ITEM(las_key); WT_DECL_RET; WT_ITEM *key; uint64_t cnt, las_counter, las_txnid; uint32_t las_id, session_flags; int notused; conn = S2C(session); cursor = NULL; key = &conn->las_sweep_key; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_ERR(__wt_scr_alloc(session, 0, &las_addr)); WT_ERR(__wt_scr_alloc(session, 0, &las_key)); WT_ERR(__wt_las_cursor(session, &cursor, &session_flags)); /* * If we're not starting a new sweep, position the cursor using the key * from the last call (we don't care if we're before or after the key, * just roughly in the same spot is fine). */ if (conn->las_sweep_call != 0 && key->data != NULL) { __wt_cursor_set_raw_key(cursor, key); if ((ret = cursor->search_near(cursor, ¬used)) != 0) goto srch_notfound; } /* * The sweep server wakes up every 10 seconds (by default), it's a slow * moving thread. Try to review the entire lookaside table once every 5 * minutes, or every 30 calls. * * The reason is because the lookaside table exists because we're seeing * cache/eviction pressure (it allows us to trade performance and disk * space for cache space), and it's likely lookaside blocks are being * evicted, and reading them back in doesn't help things. A trickier, * but possibly better, alternative might be to review all lookaside * blocks in the cache in order to get rid of them, and slowly review * lookaside blocks that have already been evicted. * * We can't know for sure how many records are in the lookaside table, * the cursor insert and remove statistics aren't updated atomically. * Start with reviewing 100 rows, and if it takes more than the target * number of calls to finish, increase the number of rows checked on * each call; if it takes less than the target calls to finish, then * decrease the number of rows reviewed on each call (but never less * than 100). */ #define WT_SWEEP_LOOKASIDE_MIN_CNT 100 #define WT_SWEEP_LOOKASIDE_PASS_TARGET 30 ++conn->las_sweep_call; if ((cnt = conn->las_sweep_cnt) < WT_SWEEP_LOOKASIDE_MIN_CNT) cnt = conn->las_sweep_cnt = WT_SWEEP_LOOKASIDE_MIN_CNT; /* Walk the file. */ for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) { /* * If the loop terminates after completing a work unit, we will * continue the table sweep next time. Get a local copy of the * sweep key, we're going to reset the cursor; do so before * calling cursor.remove, cursor.remove can discard our hazard * pointer and the page could be evicted from underneath us. */ if (cnt == 1) { WT_ERR(__wt_cursor_get_raw_key(cursor, key)); if (!WT_DATA_IN_ITEM(key)) WT_ERR(__wt_buf_set( session, key, key->data, key->size)); } WT_ERR(cursor->get_key(cursor, &las_id, las_addr, &las_counter, &las_txnid, las_key)); /* * If the on-page record transaction ID associated with the * record is globally visible, the record can be discarded. * * Cursor opened overwrite=true: won't return WT_NOTFOUND should * another thread remove the record before we do, and the cursor * remains positioned in that case. */ if (__wt_txn_visible_all(session, las_txnid)) WT_ERR(cursor->remove(cursor)); } /* * When reaching the lookaside table end or the target number of calls, * adjust the row count. Decrease/increase the row count depending on * if the number of calls is less/more than the target. */ if (ret == WT_NOTFOUND || conn->las_sweep_call > WT_SWEEP_LOOKASIDE_PASS_TARGET) { if (conn->las_sweep_call < WT_SWEEP_LOOKASIDE_PASS_TARGET && conn->las_sweep_cnt > WT_SWEEP_LOOKASIDE_MIN_CNT) conn->las_sweep_cnt -= WT_SWEEP_LOOKASIDE_MIN_CNT; if (conn->las_sweep_call > WT_SWEEP_LOOKASIDE_PASS_TARGET) conn->las_sweep_cnt += WT_SWEEP_LOOKASIDE_MIN_CNT; } srch_notfound: if (ret == WT_NOTFOUND) conn->las_sweep_call = 0; WT_ERR_NOTFOUND_OK(ret); if (0) { err: __wt_buf_free(session, key); } WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); __wt_scr_free(session, &las_addr); __wt_scr_free(session, &las_key); return (ret); }
/* * __wt_schema_open_indices -- * Open the indices for a table. */ int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) { WT_CURSOR *cursor; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_INDEX *idx; u_int i; int cmp, match; const char *idxconf, *name, *tablename, *uri; /* Check if we've already done the work. */ if (idxname == NULL && table->idx_complete) return (0); cursor = NULL; idx = NULL; /* Build a search key. */ tablename = table->name; (void)WT_PREFIX_SKIP(tablename, "table:"); WT_ERR(__wt_scr_alloc(session, 512, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename)); /* Find matching indices. */ WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tmp->data); if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0) ret = cursor->next(cursor); for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &uri)); name = uri; if (!WT_PREFIX_SKIP(name, tmp->data)) break; /* Is this the index we are looking for? */ match = idxname == NULL || WT_STRING_MATCH(name, idxname, len); /* * Ensure there is space, including if we have to make room for * a new entry in the middle of the list. */ WT_ERR(__wt_realloc_def(session, &table->idx_alloc, WT_MAX(i, table->nindices) + 1, &table->indices)); /* Keep the in-memory list in sync with the metadata. */ cmp = 0; while (table->indices[i] != NULL && (cmp = strcmp(uri, table->indices[i]->name)) > 0) { /* Index no longer exists, remove it. */ __wt_free(session, table->indices[i]); memmove(&table->indices[i], &table->indices[i + 1], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[--table->nindices] = NULL; } if (cmp < 0) { /* Make room for a new index. */ memmove(&table->indices[i + 1], &table->indices[i], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[i] = NULL; ++table->nindices; } if (!match) continue; if (table->indices[i] == NULL) { WT_ERR(cursor->get_value(cursor, &idxconf)); WT_ERR(__wt_calloc_def(session, 1, &idx)); WT_ERR(__wt_strdup(session, uri, &idx->name)); WT_ERR(__wt_strdup(session, idxconf, &idx->config)); WT_ERR(__open_index(session, table, idx)); table->indices[i] = idx; idx = NULL; } /* If we were looking for a single index, we're done. */ if (indexp != NULL) *indexp = table->indices[i]; if (idxname != NULL) break; } WT_ERR_NOTFOUND_OK(ret); /* If we did a full pass, we won't need to do it again. */ if (idxname == NULL) { table->nindices = i; table->idx_complete = 1; } err: __wt_scr_free(&tmp); if (idx != NULL) __wt_schema_destroy_index(session, idx); if (cursor != NULL) WT_TRET(cursor->close(cursor)); return (ret); }
int main(void) { int count, exact, ret; WT_CONNECTION *conn; WT_SESSION *session; WT_CURSOR *cursor; CUSTOMER cust, *custp, cust_sample[] = { { 0, "Professor Oak", "LeafGreen Avenue", "123-456-7890" }, { 0, "Lorelei", "Sevii Islands", "098-765-4321" }, { 0, NULL, NULL, NULL } }; CALL call, *callp, call_sample[] = { { 0, 32, 1, 2, "billing", "unavailable" }, { 0, 33, 1, 2, "billing", "available" }, { 0, 34, 1, 2, "reminder", "unavailable" }, { 0, 35, 1, 2, "reminder", "available" }, { 0, 0, 0, 0, NULL, NULL } }; ret = wiredtiger_open(home, NULL, "create", &conn); if (ret != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home, wiredtiger_strerror(ret)); return (1); } /* Note: further error checking omitted for clarity. */ /*! [call-center work] */ ret = conn->open_session(conn, NULL, NULL, &session); /* * Create the customers table, give names and types to the columns. * The columns will be stored in two groups: "main" and "address", * created below. */ ret = session->create(session, "table:customers", "key_format=r," "value_format=SSS," "columns=(id,name,address,phone)," "colgroups=(main,address)"); /* Create the main column group with value columns except address. */ ret = session->create(session, "colgroup:customers:main", "columns=(name,phone)"); /* Create the address column group with just the address. */ ret = session->create(session, "colgroup:customers:address", "columns=(address)"); /* Create an index on the customer table by phone number. */ ret = session->create(session, "index:customers:phone", "columns=(phone)"); /* Populate the customers table with some data. */ ret = session->open_cursor( session, "table:customers", NULL, "append", &cursor); for (custp = cust_sample; custp->name != NULL; custp++) { cursor->set_value(cursor, custp->name, custp->address, custp->phone); ret = cursor->insert(cursor); } ret = cursor->close(cursor); /* * Create the calls table, give names and types to the columns. All the * columns will be stored together, so no column groups are declared. */ ret = session->create(session, "table:calls", "key_format=r," "value_format=qrrSS," "columns=(id,call_date,cust_id,emp_id,call_type,notes)"); /* * Create an index on the calls table with a composite key of cust_id * and call_date. */ ret = session->create(session, "index:calls:cust_date", "columns=(cust_id,call_date)"); /* Populate the calls table with some data. */ ret = session->open_cursor( session, "table:calls", NULL, "append", &cursor); for (callp = call_sample; callp->call_type != NULL; callp++) { cursor->set_value(cursor, callp->call_date, callp->cust_id, callp->emp_id, callp->call_type, callp->notes); ret = cursor->insert(cursor); } ret = cursor->close(cursor); /* * First query: a call arrives. In SQL: * * SELECT id, name FROM Customers WHERE phone=? * * Use the cust_phone index, lookup by phone number to fill the * customer record. The cursor will have a key format of "S" for a * string because the cust_phone index has a single column ("phone"), * which is of type "S". * * Specify the columns we want: the customer ID and the name. This * means the cursor's value format will be "rS". */ ret = session->open_cursor(session, "index:customers:phone(id,name)", NULL, NULL, &cursor); cursor->set_key(cursor, "123-456-7890"); ret = cursor->search(cursor); if (ret == 0) { ret = cursor->get_value(cursor, &cust.id, &cust.name); printf("Read customer record for %s (ID %" PRIu64 ")\n", cust.name, cust.id); } ret = cursor->close(cursor); /* * Next query: get the recent order history. In SQL: * * SELECT * FROM Calls WHERE cust_id=? ORDER BY call_date DESC LIMIT 3 * * Use the call_cust_date index to find the matching calls. Since it is * is in increasing order by date for a given customer, we want to start * with the last record for the customer and work backwards. * * Specify a subset of columns to be returned. (Note that if these were * all covered by the index, the primary would not have to be accessed.) * Stop after getting 3 records. */ ret = session->open_cursor(session, "index:calls:cust_date(cust_id,call_type,notes)", NULL, NULL, &cursor); /* * The keys in the index are (cust_id,call_date) -- we want the largest * call date for a given cust_id. Search for (cust_id+1,0), then work * backwards. */ cust.id = 1; cursor->set_key(cursor, cust.id + 1, 0); ret = cursor->search_near(cursor, &exact); /* * If the table is empty, search_near will return WT_NOTFOUND, else the * cursor will be positioned on a matching key if one exists, or an * adjacent key if one does not. If the positioned key is equal to or * larger than the search key, go back one. */ if (ret == 0 && exact >= 0) ret = cursor->prev(cursor); for (count = 0; ret == 0 && count < 3; ++count) { ret = cursor->get_value(cursor, &call.cust_id, &call.call_type, &call.notes); if (call.cust_id != cust.id) break; printf("Call record: customer %" PRIu64 " (%s: %s)\n", call.cust_id, call.call_type, call.notes); ret = cursor->prev(cursor); } /*! [call-center work] */ ret = conn->close(conn, NULL); return (ret); }
int util_dump(WT_SESSION *session, int argc, char *argv[]) { WT_CURSOR *cursor; WT_DECL_RET; size_t len; int ch, i; char *checkpoint, *config, *p, *simpleuri, *uri; bool hex, json, reverse; hex = json = reverse = false; checkpoint = config = simpleuri = uri = NULL; cursor = NULL; while ((ch = __wt_getopt(progname, argc, argv, "c:f:jrx")) != EOF) switch (ch) { case 'c': checkpoint = __wt_optarg; break; case 'f': /* output file */ if (freopen(__wt_optarg, "w", stdout) == NULL) return (util_err( session, errno, "%s: reopen", __wt_optarg)); break; case 'j': json = true; break; case 'r': reverse = true; break; case 'x': hex = true; break; case '?': default: return (usage()); } argc -= __wt_optind; argv += __wt_optind; /* -j and -x are incompatible. */ if (hex && json) { fprintf(stderr, "%s: the -j and -x dump options are incompatible\n", progname); goto err; } /* The remaining argument is the uri. */ if (argc < 1 || (argc != 1 && !json)) return (usage()); if (json && (dump_json_begin(session) != 0 || dump_prefix(session, hex, json) != 0)) goto err; for (i = 0; i < argc; i++) { if (json && i > 0) if (dump_json_separator(session) != 0) goto err; free(uri); free(simpleuri); uri = simpleuri = NULL; if ((uri = util_uri(session, argv[i], "table")) == NULL) goto err; len = checkpoint == NULL ? 0 : strlen("checkpoint=") + strlen(checkpoint) + 1; len += strlen(json ? "dump=json" : (hex ? "dump=hex" : "dump=print")); if ((config = malloc(len + 10)) == NULL) goto err; if (checkpoint == NULL) config[0] = '\0'; else { (void)strcpy(config, "checkpoint="); (void)strcat(config, checkpoint); (void)strcat(config, ","); } (void)strcat(config, json ? "dump=json" : (hex ? "dump=hex" : "dump=print")); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { fprintf(stderr, "%s: cursor open(%s) failed: %s\n", progname, uri, session->strerror(session, ret)); goto err; } if ((simpleuri = strdup(uri)) == NULL) { (void)util_err(session, errno, NULL); goto err; } if ((p = strchr(simpleuri, '(')) != NULL) *p = '\0'; if (dump_config(session, simpleuri, cursor, hex, json) != 0) goto err; if (dump_record(cursor, reverse, json) != 0) goto err; if (json && dump_json_table_end(session) != 0) goto err; ret = cursor->close(cursor); cursor = NULL; if (ret != 0) { (void)util_err(session, ret, NULL); goto err; } } if (json && dump_json_end(session) != 0) goto err; if (0) { err: ret = 1; } free(config); free(uri); free(simpleuri); if (cursor != NULL && (ret = cursor->close(cursor)) != 0) { (void)util_err(session, ret, NULL); ret = 1; } return (ret); }
int main(int argc, char *argv[]) { FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; WT_RAND_STATE rnd; uint64_t key; uint32_t absent, count, timeout; int ch, status, ret; pid_t pid; char *working_dir; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; else ++progname; working_dir = NULL; timeout = 10; while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF) switch (ch) { case 'h': working_dir = __wt_optarg; break; case 't': timeout = (uint32_t)atoi(__wt_optarg); break; default: usage(); } argc -= __wt_optind; argv += __wt_optind; if (argc != 0) usage(); testutil_work_dir_from_path(home, 512, working_dir); testutil_make_work_dir(home); /* * Fork a child to insert as many items. We will then randomly * kill the child, run recovery and make sure all items we wrote * exist after recovery runs. */ if ((pid = fork()) < 0) testutil_die(errno, "fork"); if (pid == 0) { /* child */ fill_db(); return (EXIT_SUCCESS); } /* parent */ __wt_random_init(&rnd); /* Sleep for the configured amount of time before killing the child. */ printf("Parent: sleep %" PRIu32 " seconds, then kill child\n", timeout); sleep(timeout); /* * !!! It should be plenty long enough to make sure more than one * log file exists. If wanted, that check would be added here. */ printf("Kill child\n"); if (kill(pid, SIGKILL) != 0) testutil_die(errno, "kill"); waitpid(pid, &status, 0); /* * !!! If we wanted to take a copy of the directory before recovery, * this is the place to do it. */ chdir(home); printf("Open database, run recovery and verify content\n"); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); if ((fp = fopen(RECORDS_FILE, "r")) == NULL) testutil_die(errno, "fopen"); /* * For every key in the saved file, verify that the key exists * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ for (absent = count = 0;; ++count) { ret = fscanf(fp, "%" SCNu64 "\n", &key); if (ret != EOF && ret != 1) testutil_die(errno, "fscanf"); if (ret == EOF) break; cursor->set_key(cursor, key); if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) testutil_die(ret, "search"); printf("no record with key %" PRIu64 "\n", key); ++absent; } } fclose(fp); if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (absent) { printf("%u record(s) absent from %u\n", absent, count); return (EXIT_FAILURE); } printf("%u records verified\n", count); return (EXIT_SUCCESS); }
Status WiredTigerUtil::exportTableToBSON(WT_SESSION* session, const std::string& uri, const std::string& config, BSONObjBuilder* bob) { invariant(session); invariant(bob); WT_CURSOR* c = NULL; const char *cursorConfig = config.empty() ? NULL : config.c_str(); int ret = session->open_cursor(session, uri.c_str(), NULL, cursorConfig, &c); if (ret != 0) { return Status(ErrorCodes::CursorNotFound, str::stream() << "unable to open cursor at URI " << uri << ". reason: " << wiredtiger_strerror(ret)); } bob->append("uri", uri); invariant(c); ON_BLOCK_EXIT(c->close, c); std::map<string,BSONObjBuilder*> subs; const char *desc, *pvalue; uint64_t value; while (c->next(c) == 0 && c->get_value(c, &desc, &pvalue, &value) == 0) { StringData key( desc ); StringData prefix; StringData suffix; size_t idx = key.find( ':' ); if ( idx != string::npos ) { prefix = key.substr( 0, idx ); suffix = key.substr( idx + 1 ); } else { idx = key.find( ' ' ); } if ( idx != string::npos ) { prefix = key.substr( 0, idx ); suffix = key.substr( idx + 1 ); } else { prefix = key; suffix = "num"; } // Convert unsigned 64-bit integral value of statistic to BSON-friendly long long. // If there is an overflow, set statistic value to max(long long). const long long maxLL = std::numeric_limits<long long>::max(); long long v = value > static_cast<uint64_t>(maxLL) ? maxLL : static_cast<long long>(value); if ( prefix.size() == 0 ) { bob->appendNumber(desc, v); } else { BSONObjBuilder*& sub = subs[prefix.toString()]; if ( !sub ) sub = new BSONObjBuilder(); sub->appendNumber(mongoutils::str::ltrim(suffix.toString()), v); } } for ( std::map<string,BSONObjBuilder*>::const_iterator it = subs.begin(); it != subs.end(); ++it ) { const std::string& s = it->first; bob->append( s, it->second->obj() ); delete it->second; } return Status::OK(); }
/* * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ static void fill_db(void) { FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *session; uint64_t i; int ret; uint8_t buf[MAX_VAL]; __wt_random_init(&rnd); memset(buf, 0, sizeof(buf)); /* * Initialize the first 25% to random values. Leave a bunch of data * space at the end to emphasize zero data. */ for (i = 0; i < MAX_VAL/4; i++) buf[i] = (uint8_t)__wt_random(&rnd); /* * Run in the home directory so that the records file is in there too. */ chdir(home); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->create(session, uri, "key_format=Q,value_format=u")) != 0) testutil_die(ret, "WT_SESSION.create: %s", uri); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); /* * Keep a separate file with the records we wrote for checking. */ (void)unlink(RECORDS_FILE); if ((fp = fopen(RECORDS_FILE, "w")) == NULL) testutil_die(errno, "fopen"); /* * Set to no buffering. */ setvbuf(fp, NULL, _IONBF, 0); /* * Write data into the table until we are killed by the parent. * The data in the buffer is already set to random content. */ data.data = buf; for (i = 0;; ++i) { data.size = __wt_random(&rnd) % MAX_VAL; cursor->set_key(cursor, i); cursor->set_value(cursor, &data); if ((ret = cursor->insert(cursor)) != 0) testutil_die(ret, "WT_CURSOR.insert"); /* * Save the key separately for checking later. */ if (fprintf(fp, "%" PRIu64 "\n", i) == -1) testutil_die(errno, "fprintf"); if (i % 5000) __wt_yield(); } }
int util_write(WT_SESSION *session, int argc, char *argv[]) { WT_CURSOR *cursor; WT_DECL_RET; uint64_t recno; int append, ch, overwrite, rkey; const char *uri; char config[100]; append = overwrite = 0; while ((ch = __wt_getopt(progname, argc, argv, "ao")) != EOF) switch (ch) { case 'a': append = 1; break; case 'o': overwrite = 1; break; case '?': default: return (usage()); } argc -= __wt_optind; argv += __wt_optind; /* * The remaining arguments are a uri followed by a list of values (if * append is set), or key/value pairs (if append is not set). */ if (append) { if (argc < 2) return (usage()); } else if (argc < 3 || ((argc - 1) % 2 != 0)) return (usage()); if ((uri = util_name(session, *argv, "table")) == NULL) return (1); /* Open the object. */ (void)snprintf(config, sizeof(config), "%s,%s", append ? "append=true" : "", overwrite ? "overwrite=true" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) return (util_err(session, ret, "%s: session.open", uri)); /* * A simple search only makes sense if the key format is a string or a * record number, and the value format is a single string. */ if (strcmp(cursor->key_format, "r") != 0 && strcmp(cursor->key_format, "S") != 0) { fprintf(stderr, "%s: write command only possible when the key format is " "a record number or string\n", progname); return (1); } rkey = strcmp(cursor->key_format, "r") == 0 ? 1 : 0; if (strcmp(cursor->value_format, "S") != 0) { fprintf(stderr, "%s: write command only possible when the value format is " "a string\n", progname); return (1); } /* Run through the values or key/value pairs. */ while (*++argv != NULL) { if (!append) { if (rkey) { if (util_str2recno(session, *argv, &recno)) return (1); cursor->set_key(cursor, recno); } else cursor->set_key(cursor, *argv); ++argv; } cursor->set_value(cursor, *argv); if ((ret = cursor->insert(cursor)) != 0) return (util_cerr(cursor, "search", ret)); } return (0); }
/* * build -- * Build a row- or column-store page in a file. */ void build(int ikey, int ivalue, int cnt) { WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM key, value; WT_SESSION *session; char config[256], kbuf[64], vbuf[64]; int new_slvg; /* * Disable logging: we're modifying files directly, we don't want to * run recovery. */ CHECK(wiredtiger_open( NULL, NULL, "create,log=(enabled=false)", &conn) == 0); CHECK(conn->open_session(conn, NULL, NULL, &session) == 0); CHECK(session->drop(session, "file:" LOAD, "force") == 0); switch (page_type) { case WT_PAGE_COL_FIX: (void)snprintf(config, sizeof(config), "key_format=r,value_format=7t," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; case WT_PAGE_COL_VAR: (void)snprintf(config, sizeof(config), "key_format=r," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; case WT_PAGE_ROW_LEAF: (void)snprintf(config, sizeof(config), "key_format=u," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); break; default: assert(0); } CHECK(session->create(session, "file:" LOAD, config) == 0); CHECK(session->open_cursor( session, "file:" LOAD, NULL, "bulk,append", &cursor) == 0); for (; cnt > 0; --cnt, ++ikey, ++ivalue) { switch (page_type) { /* Build the key. */ case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: break; case WT_PAGE_ROW_LEAF: snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); break; } switch (page_type) { /* Build the value. */ case WT_PAGE_COL_FIX: cursor->set_value(cursor, ivalue & 0x7f); break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_LEAF: snprintf(vbuf, sizeof(vbuf), "%010d VALUE----", value_unique ? ivalue : 37); value.data = vbuf; value.size = 20; cursor->set_value(cursor, &value); } CHECK(cursor->insert(cursor) == 0); } /* * The first time through this routine we create the salvage file and * then remove it (all we want is the appropriate schema entry, we're * creating the salvage file itself by hand). */ new_slvg = !file_exists(SLVG); if (new_slvg) { CHECK(session->drop(session, "file:" SLVG, "force") == 0); CHECK(session->create(session, "file:" SLVG, config) == 0); } CHECK(conn->close(conn, 0) == 0); if (new_slvg) (void)remove(SLVG); }
/* * __wt_txn_recover -- * Run recovery. */ int __wt_txn_recover(WT_CONNECTION_IMPL *conn) { WT_CURSOR *metac; WT_DECL_RET; WT_RECOVERY r; WT_SESSION_IMPL *session; struct WT_RECOVERY_FILE *metafile; char *config; int was_backup; WT_CLEAR(r); INIT_LSN(&r.ckpt_lsn); was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP) ? 1 : 0; /* We need a real session for recovery. */ WT_RET(__wt_open_session(conn, NULL, NULL, &session)); F_SET(session, WT_SESSION_NO_LOGGING); r.session = session; WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config)); WT_ERR(__wt_metadata_cursor(session, NULL, &metac)); metafile = &r.files[WT_METAFILE_ID]; metafile->c = metac; /* * First, do a pass through the log to recover the metadata, and * establish the last checkpoint LSN. Skip this when opening a hot * backup: we already have the correct metadata in that case. */ if (!was_backup) { r.metadata_only = 1; if (IS_INIT_LSN(&metafile->ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r)); else WT_ERR(__wt_log_scan(session, &metafile->ckpt_lsn, 0, __txn_log_recover, &r)); WT_ASSERT(session, LOG_CMP(&r.ckpt_lsn, &conn->log->first_lsn) >= 0); } /* Scan the metadata to find the live files and their IDs. */ WT_ERR(__recovery_file_scan(&r)); /* * We no longer need the metadata cursor: close it to avoid pinning any * resources that could block eviction during recovery. */ r.files[0].c = NULL; WT_ERR(metac->close(metac)); /* * Now, recover all the files apart from the metadata. * Pass WT_LOGSCAN_RECOVER so that old logs get truncated. */ r.metadata_only = 0; WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY, "Main recovery loop: starting at %u/%" PRIuMAX, r.ckpt_lsn.file, (uintmax_t)r.ckpt_lsn.offset)); if (IS_INIT_LSN(&r.ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER, __txn_log_recover, &r)); else WT_ERR(__wt_log_scan(session, &r.ckpt_lsn, WT_LOGSCAN_RECOVER, __txn_log_recover, &r)); conn->next_file_id = r.max_fileid; /* * If recovery ran successfully forcibly log a checkpoint so the next * open is fast and keep the metadata up to date with the checkpoint * LSN and archiving. */ WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); WT_TRET(session->iface.close(&session->iface, NULL)); return (ret); }
/* * __wt_curlog_open -- * Initialize a log cursor. */ int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) { WT_CONNECTION_IMPL *conn; WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_get_value, /* get-value */ __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curlog_compare, /* compare */ __wt_cursor_equals, /* equals */ __curlog_next, /* next */ __wt_cursor_notsup, /* prev */ __curlog_reset, /* reset */ __curlog_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curlog_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_LOG *cl; WT_DECL_RET; WT_LOG *log; WT_STATIC_ASSERT(offsetof(WT_CURSOR_LOG, iface) == 0); conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) WT_RET_MSG(session, EINVAL, "Cannot open a log cursor without logging enabled"); log = conn->log; cl = NULL; WT_RET(__wt_calloc_one(session, &cl)); cursor = &cl->iface; *cursor = iface; cursor->session = &session->iface; WT_ERR(__wt_calloc_one(session, &cl->cur_lsn)); WT_ERR(__wt_calloc_one(session, &cl->next_lsn)); WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec)); WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey)); WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue)); cursor->key_format = WT_LOGC_KEY_FORMAT; cursor->value_format = WT_LOGC_VALUE_FORMAT; WT_INIT_LSN(cl->cur_lsn); WT_INIT_LSN(cl->next_lsn); WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); /* * The user may be trying to read a log record they just wrote. * Log records may be buffered, so force out any now. */ WT_ERR(__wt_log_force_write(session, 1)); /* Log cursors block archiving. */ WT_ERR(__wt_readlock(session, log->log_archive_lock)); if (0) { err: if (F_ISSET(cursor, WT_CURSTD_OPEN)) WT_TRET(cursor->close(cursor)); else { __wt_free(session, cl->cur_lsn); __wt_free(session, cl->next_lsn); __wt_scr_free(session, &cl->logrec); __wt_scr_free(session, &cl->opkey); __wt_scr_free(session, &cl->opvalue); /* * NOTE: We cannot get on the error path with the * readlock held. No need to unlock it unless that * changes above. */ __wt_free(session, cl); } *cursorp = NULL; } return (ret); }
int main(int argc, char *argv[]) { TEST_OPTS *opts, _opts; WT_CURSOR *c; WT_SESSION *session; clock_t ce, cs; pthread_t id[100]; uint64_t current_value; int i; opts = &_opts; if (testutil_disable_long_tests()) return (0); memset(opts, 0, sizeof(*opts)); opts->nthreads = 10; opts->nrecords = 1000; opts->table_type = TABLE_ROW; testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); testutil_check(wiredtiger_open(opts->home, NULL, "create," "cache_size=2G," "eviction=(threads_max=5)," "statistics=(fast)", &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); testutil_check(session->create(session, opts->uri, "key_format=Q,value_format=Q," "leaf_page_max=32k,")); /* Create the single record. */ testutil_check( session->open_cursor(session, opts->uri, NULL, NULL, &c)); c->set_key(c, 1); c->set_value(c, 0); testutil_check(c->insert(c)); testutil_check(c->close(c)); cs = clock(); for (i = 0; i < (int)opts->nthreads; ++i) { testutil_check(pthread_create( &id[i], NULL, thread_insert_race, (void *)opts)); } while (--i >= 0) testutil_check(pthread_join(id[i], NULL)); testutil_check( session->open_cursor(session, opts->uri, NULL, NULL, &c)); c->set_key(c, 1); testutil_check(c->search(c)); testutil_check(c->get_value(c, ¤t_value)); if (current_value != opts->nthreads * opts->nrecords) { fprintf(stderr, "ERROR: didn't get expected number of changes\n"); fprintf(stderr, "got: %" PRIu64 ", expected: %" PRIu64 "\n", current_value, opts->nthreads * opts->nrecords); return (EXIT_FAILURE); } testutil_check(session->close(session, NULL)); ce = clock(); printf("%" PRIu64 ": %.2lf\n", opts->nrecords, (ce - cs) / (double)CLOCKS_PER_SEC); testutil_cleanup(opts); return (EXIT_SUCCESS); }
int cursor_ops(WT_SESSION *session) { WT_CURSOR *cursor; int ret; /*! [Open a cursor] */ ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); /*! [Open a cursor] */ /*! [Open a cursor on the metadata] */ ret = session->open_cursor( session, "metadata:", NULL, NULL, &cursor); /*! [Open a cursor on the metadata] */ { WT_CURSOR *duplicate; const char *key = "some key"; /*! [Duplicate a cursor] */ ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); cursor->set_key(cursor, key); ret = cursor->search(cursor); /* Duplicate the cursor. */ ret = session->open_cursor(session, NULL, cursor, NULL, &duplicate); /*! [Duplicate a cursor] */ } { const char *key = "some key", *value = "some value"; /*! [Reconfigure a cursor] */ ret = session->open_cursor( session, "table:mytable", NULL, "overwrite=false", &cursor); cursor->set_key(cursor, key); cursor->set_value(cursor, value); /* Reconfigure the cursor to overwrite the record. */ ret = cursor->reconfigure(cursor, "overwrite=true"); ret = cursor->insert(cursor); /*! [Reconfigure a cursor] */ } { /*! [boolean configuration string example] */ ret = session->open_cursor(session, "table:mytable", NULL, "overwrite", &cursor); ret = session->open_cursor(session, "table:mytable", NULL, "overwrite=true", &cursor); ret = session->open_cursor(session, "table:mytable", NULL, "overwrite=1", &cursor); /*! [boolean configuration string example] */ } { /*! [open a named checkpoint] */ ret = session->open_cursor(session, "table:mytable", NULL, "checkpoint=midnight", &cursor); /*! [open a named checkpoint] */ } { /*! [open the default checkpoint] */ ret = session->open_cursor(session, "table:mytable", NULL, "checkpoint=WiredTigerCheckpoint", &cursor); /*! [open the default checkpoint] */ } { /*! [Get the cursor's string key] */ const char *key; /* Get the cursor's string key. */ ret = cursor->get_key(cursor, &key); /*! [Get the cursor's string key] */ } { /*! [Set the cursor's string key] */ /* Set the cursor's string key. */ const char *key = "another key"; cursor->set_key(cursor, key); /*! [Set the cursor's string key] */ } { /*! [Get the cursor's record number key] */ uint64_t recno; /* Get the cursor's record number key. */ ret = cursor->get_key(cursor, &recno); /*! [Get the cursor's record number key] */ } { /*! [Set the cursor's record number key] */ uint64_t recno = 37; /* Set the cursor's record number key. */ cursor->set_key(cursor, recno); /*! [Set the cursor's record number key] */ } { /*! [Get the cursor's composite key] */ /* Get the cursor's "SiH" format composite key. */ const char *first; int32_t second; uint16_t third; ret = cursor->get_key(cursor, &first, &second, &third); /*! [Get the cursor's composite key] */ } { /*! [Set the cursor's composite key] */ /* Set the cursor's "SiH" format composite key. */ cursor->set_key(cursor, "first", (int32_t)5, (uint16_t)7); /*! [Set the cursor's composite key] */ } { /*! [Get the cursor's string value] */ const char *value; /* Get the cursor's string value. */ ret = cursor->get_value(cursor, &value); /*! [Get the cursor's string value] */ } { /*! [Set the cursor's string value] */ /* Set the cursor's string value. */ const char *value = "another value"; cursor->set_value(cursor, value); /*! [Set the cursor's string value] */ } { /*! [Get the cursor's raw value] */ WT_ITEM value; /* Get the cursor's raw value. */ ret = cursor->get_value(cursor, &value); /*! [Get the cursor's raw value] */ } { /*! [Set the cursor's raw value] */ WT_ITEM value; /* Set the cursor's raw value. */ value.data = "another value"; value.size = strlen("another value"); cursor->set_value(cursor, &value); /*! [Set the cursor's raw value] */ } /*! [Return the next record] */ ret = cursor->next(cursor); /*! [Return the next record] */ /*! [Return the previous record] */ ret = cursor->prev(cursor); /*! [Return the previous record] */ /*! [Reset the cursor] */ ret = cursor->reset(cursor); /*! [Reset the cursor] */ { WT_CURSOR *other = NULL; /*! [Cursor comparison] */ int compare; ret = cursor->compare(cursor, other, &compare); if (compare == 0) { /* Cursors reference the same key */ } else if (compare < 0) { /* Cursor key less than other key */ } else if (compare > 0) { /* Cursor key greater than other key */ } /*! [Cursor comparison] */ } { WT_CURSOR *other = NULL; /*! [Cursor equality] */ int equal; ret = cursor->equals(cursor, other, &equal); if (equal) { /* Cursors reference the same key */ } else { /* Cursors don't reference the same key */ } /*! [Cursor equality] */ } { /*! [Search for an exact match] */ const char *key = "some key"; cursor->set_key(cursor, key); ret = cursor->search(cursor); /*! [Search for an exact match] */ } ret = cursor_search_near(cursor); { /*! [Insert a new record or overwrite an existing record] */ /* Insert a new record or overwrite an existing record. */ const char *key = "some key", *value = "some value"; ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); cursor->set_key(cursor, key); cursor->set_value(cursor, value); ret = cursor->insert(cursor); /*! [Insert a new record or overwrite an existing record] */ } { /*! [Insert a new record and fail if the record exists] */ /* Insert a new record and fail if the record exists. */ const char *key = "some key", *value = "some value"; ret = session->open_cursor( session, "table:mytable", NULL, "overwrite=false", &cursor); cursor->set_key(cursor, key); cursor->set_value(cursor, value); ret = cursor->insert(cursor); /*! [Insert a new record and fail if the record exists] */ } { /*! [Insert a new record and assign a record number] */ /* Insert a new record and assign a record number. */ uint64_t recno; const char *value = "some value"; ret = session->open_cursor( session, "table:mytable", NULL, "append", &cursor); cursor->set_value(cursor, value); ret = cursor->insert(cursor); if (ret == 0) ret = cursor->get_key(cursor, &recno); /*! [Insert a new record and assign a record number] */ } { /*! [Update an existing record or insert a new record] */ const char *key = "some key", *value = "some value"; ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); cursor->set_key(cursor, key); cursor->set_value(cursor, value); ret = cursor->update(cursor); /*! [Update an existing record or insert a new record] */ } { /*! [Update an existing record and fail if DNE] */ const char *key = "some key", *value = "some value"; ret = session->open_cursor( session, "table:mytable", NULL, "overwrite=false", &cursor); cursor->set_key(cursor, key); cursor->set_value(cursor, value); ret = cursor->update(cursor); /*! [Update an existing record and fail if DNE] */ } { /*! [Remove a record] */ const char *key = "some key"; ret = session->open_cursor( session, "table:mytable", NULL, NULL, &cursor); cursor->set_key(cursor, key); ret = cursor->remove(cursor); /*! [Remove a record] */ } { /*! [Remove a record and fail if DNE] */ const char *key = "some key"; ret = session->open_cursor( session, "table:mytable", NULL, "overwrite=false", &cursor); cursor->set_key(cursor, key); ret = cursor->remove(cursor); /*! [Remove a record and fail if DNE] */ } { /*! [Display an error] */ const char *key = "non-existent key"; cursor->set_key(cursor, key); if ((ret = cursor->remove(cursor)) != 0) { fprintf(stderr, "cursor.remove: %s\n", wiredtiger_strerror(ret)); return (ret); } /*! [Display an error] */ } { /*! [Display an error thread safe] */ const char *key = "non-existent key"; cursor->set_key(cursor, key); if ((ret = cursor->remove(cursor)) != 0) { fprintf(stderr, "cursor.remove: %s\n", cursor->session->strerror(cursor->session, ret)); return (ret); } /*! [Display an error thread safe] */ } /*! [Close the cursor] */ ret = cursor->close(cursor); /*! [Close the cursor] */ return (ret); }
/* * __curjoin_next -- * WT_CURSOR::next for join cursors. */ static int __curjoin_next(WT_CURSOR *cursor) { WT_CURSOR *c; WT_CURSOR_JOIN *cjoin; WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; int tret; cjoin = (WT_CURSOR_JOIN *)cursor; JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL); if (F_ISSET(cjoin, WT_CURJOIN_ERROR)) WT_ERR_MSG(session, WT_ERROR, "join cursor encountered previous error"); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) WT_ERR(__curjoin_init_next(session, cjoin, true)); if (cjoin->iter == NULL) WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter)); iter = cjoin->iter; F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); while ((ret = __curjoin_iter_next(iter, cursor)) == 0) { if ((ret = __curjoin_entries_in_range(session, cjoin, iter->curkey, iter)) != WT_NOTFOUND) break; } iter->positioned = (ret == 0); if (ret != 0 && ret != WT_NOTFOUND) WT_ERR(ret); if (ret == 0) { /* * Position the 'main' cursor, this will be used to retrieve * values from the cursor join. The key we have is raw, but * the main cursor may not be raw. */ c = cjoin->main; __wt_cursor_set_raw_key(c, iter->curkey); /* * A failed search is not expected, convert WT_NOTFOUND into a * generic error. */ iter->entry->stats.main_access++; if ((ret = c->search(c)) != 0) { if (ret == WT_NOTFOUND) ret = WT_ERROR; WT_ERR_MSG(session, ret, "join cursor failed search"); } F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if (ret == WT_NOTFOUND && (tret = __curjoin_iter_close_all(iter)) != 0) WT_ERR(tret); if (0) { err: F_SET(cjoin, WT_CURJOIN_ERROR); } API_END_RET(session, ret); }
static int indexer( Operation *op, wt_ctx *wc, AttributeDescription *ad, struct berval *atname, BerVarray vals, ID id, int opid, slap_mask_t mask ) { int rc, i; struct berval *keys; WT_CURSOR *cursor = NULL; WT_SESSION *session = wc->session; assert( mask != 0 ); cursor = wt_ctx_index_cursor(wc, atname, 1); if( !cursor ) { Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(indexer) ": open index cursor failed: %s\n", atname->bv_val, 0, 0 ); goto done; } if( IS_SLAP_INDEX( mask, SLAP_INDEX_PRESENT ) ) { rc = wt_key_change( op->o_bd, cursor, &presence_key, id, opid ); if( rc ) { goto done; } } if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) ) { rc = ad->ad_type->sat_equality->smr_indexer( LDAP_FILTER_EQUALITY, mask, ad->ad_type->sat_syntax, ad->ad_type->sat_equality, atname, vals, &keys, op->o_tmpmemctx ); if( rc == LDAP_SUCCESS && keys != NULL ) { for( i=0; keys[i].bv_val != NULL; i++ ) { rc = wt_key_change( op->o_bd, cursor, &keys[i], id, opid ); if( rc ) { ber_bvarray_free_x( keys, op->o_tmpmemctx ); goto done; } } ber_bvarray_free_x( keys, op->o_tmpmemctx ); } rc = LDAP_SUCCESS; } if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) ) { rc = ad->ad_type->sat_approx->smr_indexer( LDAP_FILTER_APPROX, mask, ad->ad_type->sat_syntax, ad->ad_type->sat_approx, atname, vals, &keys, op->o_tmpmemctx ); if( rc == LDAP_SUCCESS && keys != NULL ) { for( i=0; keys[i].bv_val != NULL; i++ ) { rc = wt_key_change( op->o_bd, cursor, &keys[i], id, opid ); if( rc ) { ber_bvarray_free_x( keys, op->o_tmpmemctx ); goto done; } } ber_bvarray_free_x( keys, op->o_tmpmemctx ); } rc = LDAP_SUCCESS; } if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) ) { rc = ad->ad_type->sat_substr->smr_indexer( LDAP_FILTER_SUBSTRINGS, mask, ad->ad_type->sat_syntax, ad->ad_type->sat_substr, atname, vals, &keys, op->o_tmpmemctx ); if( rc == LDAP_SUCCESS && keys != NULL ) { for( i=0; keys[i].bv_val != NULL; i++ ) { rc = wt_key_change( op->o_bd, cursor, &keys[i], id, opid ); if( rc ) { ber_bvarray_free_x( keys, op->o_tmpmemctx ); goto done; } } ber_bvarray_free_x( keys, op->o_tmpmemctx ); } rc = LDAP_SUCCESS; } done: if(cursor){ cursor->close(cursor); } return rc; }
/* * __curjoin_entry_member -- * Do a membership check for a particular index that was joined, * if not a member, returns WT_NOTFOUND. */ static int __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter) { WT_CURJOIN_EXTRACTOR extract_cursor; WT_CURSOR *c; WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_get_value, /* get-value */ __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __wt_cursor_compare_notsup, /* compare */ __wt_cursor_equals_notsup, /* equals */ __wt_cursor_notsup, /* next */ __wt_cursor_notsup, /* prev */ __wt_cursor_notsup, /* reset */ __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curjoin_extract_insert, /* insert */ __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __wt_cursor_notsup, /* cache */ __wt_cursor_reopen_notsup, /* reopen */ __wt_cursor_notsup); /* close */ WT_DECL_RET; WT_INDEX *idx; WT_ITEM v; bool bloom_found; if (entry->subjoin == NULL && iter != NULL && (iter->end_pos + iter->end_skip >= entry->ends_next || (iter->end_skip > 0 && F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)))) return (0); /* no checks to make */ entry->stats.membership_check++; bloom_found = false; if (entry->bloom != NULL) { /* * If the item is not in the Bloom filter, we return * immediately, otherwise, we still may need to check the * long way, since it may be a false positive. * * If we don't own the Bloom filter, we must be sharing one * in a previous entry. So the shared filter has already * been checked and passed, we don't need to check it again. * We'll still need to check the long way. */ if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_FALSE_POSITIVES)) return (0); bloom_found = true; } if (entry->subjoin != NULL) { WT_ASSERT(session, iter == NULL || entry->subjoin == iter->child->cjoin); ret = __curjoin_entries_in_range(session, entry->subjoin, key, iter == NULL ? NULL : iter->child); if (iter != NULL && WT_CURJOIN_ITER_CONSUMED(iter->child)) { WT_ERR(__curjoin_iter_bump(iter)); ret = WT_NOTFOUND; } return (ret); } if (entry->index != NULL) { /* * If this entry is used by the iterator, then we already * have the index key, and we won't have to do any * extraction either. */ if (iter != NULL && entry == iter->entry) WT_ITEM_SET(v, iter->idxkey); else { memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ c = entry->main; c->set_key(c, key); entry->stats.main_access++; if ((ret = c->search(c)) == 0) ret = c->get_value(c, &v); else if (ret == WT_NOTFOUND) { __wt_err(session, ret, "main table for join is missing entry"); ret = WT_ERROR; } WT_TRET(c->reset(c)); WT_ERR(ret); } } else WT_ITEM_SET(v, *key); if ((idx = entry->index) != NULL && idx->extractor != NULL && (iter == NULL || entry != iter->entry)) { WT_CLEAR(extract_cursor); extract_cursor.iface = iface; extract_cursor.iface.session = &session->iface; extract_cursor.iface.key_format = idx->exkey_format; extract_cursor.ismember = false; extract_cursor.entry = entry; WT_ERR(idx->extractor->extract(idx->extractor, &session->iface, key, &v, &extract_cursor.iface)); __wt_buf_free(session, &extract_cursor.iface.key); __wt_buf_free(session, &extract_cursor.iface.value); if (!extract_cursor.ismember) WT_ERR(WT_NOTFOUND); } else WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter)); if (0) { err: if (ret == WT_NOTFOUND && bloom_found) entry->stats.bloom_false_positive++; } return (ret); }
/* * __lsm_bloom_create -- * Create a bloom filter for a chunk of the LSM tree that has been * checkpointed but not yet been merged. */ static int __lsm_bloom_create(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk, u_int chunk_off) { WT_BLOOM *bloom; WT_CURSOR *src; WT_DECL_RET; WT_ITEM key; uint64_t insert_count; WT_RET(__wt_lsm_tree_setup_bloom(session, lsm_tree, chunk)); bloom = NULL; /* * This is merge-like activity, and we don't want compacts to give up * because we are creating a bunch of bloom filters before merging. */ ++lsm_tree->merge_progressing; WT_RET(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, chunk->count, lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom)); /* Open a special merge cursor just on this chunk. */ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src)); F_SET(src, WT_CURSTD_RAW); WT_ERR(__wt_clsm_init_merge(src, chunk_off, chunk->id, 1)); /* * Setup so that we don't hold pages we read into cache, and so * that we don't get stuck if the cache is full. If we allow * ourselves to get stuck creating bloom filters, the entire tree * can stall since there may be no worker threads available to flush. */ F_SET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { WT_ERR(src->get_key(src, &key)); WT_ERR(__wt_bloom_insert(bloom, &key)); } WT_ERR_NOTFOUND_OK(ret); WT_TRET(src->close(src)); WT_TRET(__wt_bloom_finalize(bloom)); WT_ERR(ret); F_CLR(session, WT_SESSION_NO_CACHE); /* Load the new Bloom filter into cache. */ WT_CLEAR(key); WT_ERR_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); __wt_verbose(session, WT_VERB_LSM, "LSM worker created bloom filter %s. " "Expected %" PRIu64 " items, got %" PRIu64, chunk->bloom_uri, chunk->count, insert_count); /* Ensure the bloom filter is in the metadata. */ __wt_lsm_tree_writelock(session, lsm_tree); F_SET(chunk, WT_LSM_CHUNK_BLOOM); ret = __wt_lsm_meta_write(session, lsm_tree); ++lsm_tree->dsk_gen; __wt_lsm_tree_writeunlock(session, lsm_tree); if (ret != 0) WT_ERR_MSG(session, ret, "LSM bloom worker metadata write"); err: if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_ITEM(uribuf); WT_DECL_RET; WT_ITEM curkey, curvalue; size_t size; u_int skip; int cmp; const char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; c = NULL; skip = 0; if (entry->index != NULL) /* * Open the raw index. We're avoiding any references * to the main table, they may be expensive. */ uri = entry->index->source; else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->iface.name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->iface.name)); uri = uribuf->data; } WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax) { if (F_ISSET(end, WT_CURJOIN_END_GT) || WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) skip = 1; } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { if ((ret = c->next(c)) == WT_NOTFOUND) goto done; WT_ERR(ret); } else WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state"); } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); entry->stats.iterated++; if (entry->index != NULL) { /* * Repack so it's comparable to the * reference endpoints. */ WT_ERR(__wt_struct_repack(session, c->key_format, (entry->repack_format != NULL ? entry->repack_format : entry->index->idxkey_format), &c->key, &curkey)); } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { /* if condition satisfied, insert immediately */ switch (WT_CURJOIN_END_RANGE(end)) { case WT_CURJOIN_END_EQ: if (cmp == 0) goto insert; break; case WT_CURJOIN_END_GT: if (cmp > 0) { /* skip this check next time */ skip = entry->ends_next; goto insert; } break; case WT_CURJOIN_END_GE: if (cmp >= 0) goto insert; break; case WT_CURJOIN_END_LT: if (cmp < 0) goto insert; break; case WT_CURJOIN_END_LE: if (cmp <= 0) goto insert; break; } } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } /* * Either it's a disjunction that hasn't satisfied any * condition, or it's a conjunction that has satisfied all * conditions. */ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) goto advance; insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; WT_ASSERT(session, c->key.size > curkey.size); curvalue.size = c->key.size - curkey.size; } else WT_ERR(c->get_key(c, &curvalue)); __wt_bloom_insert(bloom, &curvalue); entry->stats.bloom_insert++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); return (ret); }
/* * __las_page_instantiate -- * Instantiate lookaside update records in a recently read page. */ static int __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t read_id, const uint8_t *addr, size_t addr_size) { WT_CURSOR *cursor; WT_CURSOR_BTREE cbt; WT_DECL_ITEM(current_key); WT_DECL_ITEM(las_addr); WT_DECL_ITEM(las_key); WT_DECL_ITEM(las_value); WT_DECL_RET; WT_PAGE *page; WT_UPDATE *first_upd, *last_upd, *upd; size_t incr, total_incr; uint64_t current_recno, las_counter, las_txnid, recno, upd_txnid; uint32_t las_id, upd_size, session_flags; int exact; const uint8_t *p; cursor = NULL; page = ref->page; first_upd = last_upd = upd = NULL; total_incr = 0; current_recno = recno = WT_RECNO_OOB; session_flags = 0; /* [-Werror=maybe-uninitialized] */ __wt_btcur_init(session, &cbt); __wt_btcur_open(&cbt); WT_ERR(__wt_scr_alloc(session, 0, ¤t_key)); WT_ERR(__wt_scr_alloc(session, 0, &las_addr)); WT_ERR(__wt_scr_alloc(session, 0, &las_key)); WT_ERR(__wt_scr_alloc(session, 0, &las_value)); /* Open a lookaside table cursor. */ WT_ERR(__wt_las_cursor(session, &cursor, &session_flags)); /* * The lookaside records are in key and update order, that is, there * will be a set of in-order updates for a key, then another set of * in-order updates for a subsequent key. We process all of the updates * for a key and then insert those updates into the page, then all the * updates for the next key, and so on. * * Search for the block's unique prefix, stepping through any matching * records. */ las_addr->data = addr; las_addr->size = addr_size; las_key->size = 0; cursor->set_key( cursor, read_id, las_addr, (uint64_t)0, (uint32_t)0, las_key); if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0) ret = cursor->next(cursor); for (; ret == 0; ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_id, las_addr, &las_counter, &las_txnid, las_key)); /* * Confirm the search using the unique prefix; if not a match, * we're done searching for records for this page. */ if (las_id != read_id || las_addr->size != addr_size || memcmp(las_addr->data, addr, addr_size) != 0) break; /* * If the on-page value has become globally visible, this record * is no longer needed. */ if (__wt_txn_visible_all(session, las_txnid)) continue; /* Allocate the WT_UPDATE structure. */ WT_ERR(cursor->get_value( cursor, &upd_txnid, &upd_size, las_value)); WT_ERR(__wt_update_alloc(session, (upd_size == WT_UPDATE_DELETED_VALUE) ? NULL : las_value, &upd, &incr)); total_incr += incr; upd->txnid = upd_txnid; switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: p = las_key->data; WT_ERR(__wt_vunpack_uint(&p, 0, &recno)); if (current_recno == recno) break; WT_ASSERT(session, current_recno < recno); if (first_upd != NULL) { WT_ERR(__col_instantiate(session, current_recno, ref, &cbt, first_upd)); first_upd = NULL; } current_recno = recno; break; case WT_PAGE_ROW_LEAF: if (current_key->size == las_key->size && memcmp(current_key->data, las_key->data, las_key->size) == 0) break; if (first_upd != NULL) { WT_ERR(__row_instantiate(session, current_key, ref, &cbt, first_upd)); first_upd = NULL; } WT_ERR(__wt_buf_set(session, current_key, las_key->data, las_key->size)); break; WT_ILLEGAL_VALUE_ERR(session); } /* Append the latest update to the list. */ if (first_upd == NULL) first_upd = last_upd = upd; else { last_upd->next = upd; last_upd = upd; } upd = NULL; } WT_ERR_NOTFOUND_OK(ret); /* Insert the last set of updates, if any. */ if (first_upd != NULL) switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: WT_ERR(__col_instantiate(session, current_recno, ref, &cbt, first_upd)); first_upd = NULL; break; case WT_PAGE_ROW_LEAF: WT_ERR(__row_instantiate(session, current_key, ref, &cbt, first_upd)); first_upd = NULL; break; WT_ILLEGAL_VALUE_ERR(session); } /* Discard the cursor. */ WT_ERR(__wt_las_cursor_close(session, &cursor, session_flags)); if (total_incr != 0) { __wt_cache_page_inmem_incr(session, page, total_incr); /* * We've modified/dirtied the page, but that's not necessary and * if we keep the page clean, it's easier to evict. We leave the * lookaside table updates in place, so if we evict this page * without dirtying it, any future instantiation of it will find * the records it needs. If the page is dirtied before eviction, * then we'll write any needed lookaside table records for the * new location of the page. */ __wt_page_modify_clear(session, page); } err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); WT_TRET(__wt_btcur_close(&cbt, 1)); /* * On error, upd points to a single unlinked WT_UPDATE structure, * first_upd points to a list. */ if (upd != NULL) __wt_free(session, upd); if (first_upd != NULL) __wt_free_update_list(session, first_upd); __wt_scr_free(session, ¤t_key); __wt_scr_free(session, &las_addr); __wt_scr_free(session, &las_key); __wt_scr_free(session, &las_value); return (ret); }
int main(int argc, char *argv[]) { FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_SESSION *session; uint64_t new_offset, offset; uint32_t count, max_key; int ch, status, ret; pid_t pid; const char *working_dir; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; else ++progname; working_dir = "WT_TEST.truncated-log"; while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF) switch (ch) { case 'h': working_dir = __wt_optarg; break; default: usage(); } argc -= __wt_optind; argv += __wt_optind; if (argc != 0) usage(); testutil_work_dir_from_path(home, 512, working_dir); testutil_make_work_dir(home); /* * Fork a child to insert as many items. We will then randomly * kill the child, run recovery and make sure all items we wrote * exist after recovery runs. */ if ((pid = fork()) < 0) testutil_die(errno, "fork"); if (pid == 0) { /* child */ fill_db(); return (EXIT_SUCCESS); } /* parent */ /* Wait for child to kill itself. */ if (waitpid(pid, &status, 0) == -1) testutil_die(errno, "waitpid"); /* * !!! If we wanted to take a copy of the directory before recovery, * this is the place to do it. */ if (chdir(home) != 0) testutil_die(errno, "chdir: %s", home); printf("Open database, run recovery and verify content\n"); if ((fp = fopen(RECORDS_FILE, "r")) == NULL) testutil_die(errno, "fopen"); ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key); if (ret != 2) testutil_die(errno, "fscanf"); if (fclose(fp) != 0) testutil_die(errno, "fclose"); /* * The offset is the beginning of the last record. Truncate to * the middle of that last record (i.e. ahead of that offset). */ if (offset > UINT64_MAX - V_SIZE) testutil_die(ERANGE, "offset"); new_offset = offset + V_SIZE; printf("Parent: Truncate to %" PRIu64 "\n", new_offset); if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0) testutil_die(errno, "truncate"); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "WT_CONNECTION:open_session"); if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); /* * For every key in the saved file, verify that the key exists * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ count = 0; while ((ret = cursor->next(cursor)) == 0) ++count; if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (count > max_key) { printf("expected %" PRIu32 " records found %" PRIu32 "\n", max_key, count); return (EXIT_FAILURE); } printf("%" PRIu32 " records verified\n", count); return (EXIT_SUCCESS); }