/* * __wt_lsm_tree_switch -- * Switch to a new in-memory tree. */ int __wt_lsm_tree_switch( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk; uint32_t new_id; new_id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_VERBOSE_RET(session, lsm, "Tree switch to: %d", new_id); if ((lsm_tree->nchunks + 1) * sizeof(*lsm_tree->chunk) > lsm_tree->chunk_alloc) WT_ERR(__wt_realloc(session, &lsm_tree->chunk_alloc, WT_MAX(10 * sizeof(*lsm_tree->chunk), 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = new_id; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); ++lsm_tree->dsk_gen; WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); err: /* TODO: mark lsm_tree bad on error(?) */ return (ret); }
/* * __wt_lsm_tree_switch -- * Switch to a new in-memory tree. */ int __wt_lsm_tree_switch( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk, **cp; uint32_t in_memory, new_id; new_id = WT_ATOMIC_ADD(lsm_tree->last, 1); if ((lsm_tree->nchunks + 1) * sizeof(*lsm_tree->chunk) > lsm_tree->chunk_alloc) WT_ERR(__wt_realloc(session, &lsm_tree->chunk_alloc, WT_MAX(10 * sizeof(*lsm_tree->chunk), 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); /* * In the steady state, we expect that the checkpoint worker thread * will keep up with inserts. If not, we throttle the insert rate to * avoid filling the cache with in-memory chunks. Threads sleep every * 100 operations, so take that into account in the calculation. */ for (in_memory = 1, cp = lsm_tree->chunk + lsm_tree->nchunks - 1; in_memory < lsm_tree->nchunks && !F_ISSET(*cp, WT_LSM_CHUNK_ONDISK); ++in_memory, --cp) ; if (!F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) || in_memory <= 2) lsm_tree->throttle_sleep = 0; else if (in_memory == lsm_tree->nchunks || F_ISSET(*cp, WT_LSM_CHUNK_STABLE)) { /* * No checkpoint has completed this run. Keep slowing down * inserts until one does. */ lsm_tree->throttle_sleep = WT_MAX(20, 2 * lsm_tree->throttle_sleep); } else { chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]; lsm_tree->throttle_sleep = (long)((in_memory - 2) * WT_TIMEDIFF(chunk->create_ts, (*cp)->create_ts) / (20 * in_memory * chunk->count)); } WT_VERBOSE_ERR(session, lsm, "Tree switch to: %d, throttle %d", new_id, (int)lsm_tree->throttle_sleep); WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = new_id; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); ++lsm_tree->dsk_gen; F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); err: /* TODO: mark lsm_tree bad on error(?) */ return (ret); }
/* * __wt_json_alloc_unpack -- * Allocate space for, and unpack an entry into JSON format. */ int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) { WT_CONFIG_ITEM *names; WT_DECL_RET; size_t needed; char **json_bufp; if (iskey) { names = &json->key_names; json_bufp = &json->key_buf; } else { names = &json->value_names; json_bufp = &json->value_buf; } needed = 0; WT_RET(__json_struct_size(session, buffer, size, fmt, names, iskey, &needed)); WT_RET(__wt_realloc(session, NULL, needed + 1, json_bufp)); WT_RET(__json_struct_unpackv(session, buffer, size, fmt, names, (u_char *)*json_bufp, needed + 1, iskey, ap)); return (ret); }
/* * __meta_track_next -- * Extend the list of operations we're tracking, as necessary, and * optionally return the next slot. */ static int __meta_track_next(WT_SESSION_IMPL *session, WT_META_TRACK **trkp) { size_t offset, sub_off; if (session->meta_track_next == NULL) session->meta_track_next = session->meta_track; offset = WT_PTRDIFF(session->meta_track_next, session->meta_track); sub_off = WT_PTRDIFF(session->meta_track_sub, session->meta_track); if (offset == session->meta_track_alloc) { WT_RET(__wt_realloc(session, &session->meta_track_alloc, WT_MAX(2 * session->meta_track_alloc, 20 * sizeof(WT_META_TRACK)), &session->meta_track)); /* Maintain positions in the new chunk of memory. */ session->meta_track_next = (uint8_t *)session->meta_track + offset; if (session->meta_track_sub != NULL) session->meta_track_sub = (uint8_t *)session->meta_track + sub_off; } WT_ASSERT(session, session->meta_track_next != NULL); if (trkp != NULL) { *trkp = session->meta_track_next; session->meta_track_next = *trkp + 1; } return (0); }
static int __logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item) { size_t needed; needed = __logrec_json_unpack_str(NULL, 0, item->data, item->size); WT_RET(__wt_realloc(session, NULL, needed, destp)); (void)__logrec_json_unpack_str(*destp, needed, item->data, item->size); return (0); }
/* * __lsm_copy_chunks -- * Take a copy of part of the LSM tree chunk array so that we can work on * the contents without holding the LSM tree handle lock long term. */ static int __lsm_copy_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_WORKER_COOKIE *cookie, bool old_chunks) { WT_DECL_RET; u_int i, nchunks; size_t alloc; /* Always return zero chunks on error. */ cookie->nchunks = 0; __wt_lsm_tree_readlock(session, lsm_tree); if (!lsm_tree->active) { __wt_lsm_tree_readunlock(session, lsm_tree); return (0); } /* Take a copy of the current state of the LSM tree. */ nchunks = old_chunks ? lsm_tree->nold_chunks : lsm_tree->nchunks; alloc = old_chunks ? lsm_tree->old_alloc : lsm_tree->chunk_alloc; /* * If the tree array of active chunks is larger than our current buffer, * increase the size of our current buffer to match. */ if (cookie->chunk_alloc < alloc) WT_ERR(__wt_realloc(session, &cookie->chunk_alloc, alloc, &cookie->chunk_array)); if (nchunks > 0) memcpy(cookie->chunk_array, old_chunks ? lsm_tree->old_chunks : lsm_tree->chunk, nchunks * sizeof(*cookie->chunk_array)); /* * Mark each chunk as active, so we don't drop it until after we know * it's safe. */ for (i = 0; i < nchunks; i++) (void)__wt_atomic_add32(&cookie->chunk_array[i]->refcnt, 1); err: __wt_lsm_tree_readunlock(session, lsm_tree); if (ret == 0) cookie->nchunks = nchunks; return (ret); }
/* * __wt_buf_grow -- * Grow a buffer that's currently in-use. */ int __wt_buf_grow(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { size_t offset; int set_data; WT_ASSERT(session, size <= UINT32_MAX); /* Clear buffers previously used for mapped returns. */ if (F_ISSET(buf, WT_ITEM_MAPPED)) __wt_buf_clear(buf); if (size > buf->memsize) { /* * Grow the buffer's memory: if the data reference is not set * or references the buffer's memory, maintain it. */ WT_ASSERT(session, buf->mem == NULL || buf->memsize > 0); if (buf->data == NULL) { offset = 0; set_data = 1; } else if (buf->data >= buf->mem && WT_PTRDIFF(buf->data, buf->mem) < buf->memsize) { offset = WT_PTRDIFF(buf->data, buf->mem); set_data = 1; } else { offset = 0; set_data = 0; } if (F_ISSET(buf, WT_ITEM_ALIGNED)) WT_RET(__wt_realloc_aligned( session, &buf->memsize, size, &buf->mem)); else WT_RET(__wt_realloc( session, &buf->memsize, size, &buf->mem)); if (set_data) buf->data = (uint8_t *)buf->mem + offset; } return (0); }
/* * __wt_conn_foc_add -- * Add a new entry into the connection's free-on-close list. */ static int __wt_conn_foc_add(WT_SESSION_IMPL *session, ...) { WT_CONNECTION_IMPL *conn; va_list ap; size_t cnt; void *p; conn = S2C(session); /* * Instead of using locks to protect configuration information, assume * we can atomically update a pointer to a chunk of memory, and because * a pointer is never partially written, readers will correctly see the * original or new versions of the memory. Readers might be using the * old version as it's being updated, though, which means we cannot free * the old chunk of memory until all possible readers have finished. * Currently, that's on connection close: in other words, we can use * this because it's small amounts of memory, and we really, really do * not want to acquire locks every time we access configuration strings, * since that's done on every API call. * * Our caller is expected to be holding any locks we need. */ /* Count the slots. */ va_start(ap, session); for (cnt = 0; va_arg(ap, void *) != NULL; ++cnt) ; va_end(ap); if (conn->foc_cnt + cnt >= conn->foc_size) { WT_RET(__wt_realloc(session, NULL, (conn->foc_size + cnt + 20) * sizeof(void *), &conn->foc)); conn->foc_size += cnt + 20; } va_start(ap, session); while ((p = va_arg(ap, void *)) != NULL) conn->foc[conn->foc_cnt++] = p; va_end(ap); return (0); }
/* * __rec_track_extend -- * Extend the list of objects we're tracking */ static int __rec_track_extend(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_PAGE_MODIFY *mod; size_t bytes_allocated; mod = page->modify; /* * The __wt_realloc() function uses the "bytes allocated" value * to figure out how much of the memory it needs to clear (see * the function for an explanation of why the memory is cleared, * it's a security thing). We can calculate the bytes allocated * so far, which saves a size_t in the WT_PAGE_MODIFY structure. * That's worth a little dance, we have one of them per modified * page. */ bytes_allocated = mod->track_entries * sizeof(*mod->track); WT_RET(__wt_realloc(session, &bytes_allocated, (mod->track_entries + 20) * sizeof(*mod->track), &mod->track)); mod->track_entries += 20; return (0); }
/* * __hazard_exclusive -- * Request exclusive access to a page. */ static int __hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top) { /* * Make sure there is space to track exclusive access so we can unlock * to clean up. */ if (session->excl_next * sizeof(WT_REF *) == session->excl_allocated) WT_RET(__wt_realloc(session, &session->excl_allocated, (session->excl_next + 50) * sizeof(WT_REF *), &session->excl)); /* * Hazard pointers are acquired down the tree, which means we can't * deadlock. * * Request exclusive access to the page. The top-level page should * already be in the locked state, lock child pages in memory. * If another thread already has this page, give up. */ if (!top && !WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED)) return (EBUSY); /* We couldn't change the state. */ WT_ASSERT(session, ref->state == WT_REF_LOCKED); session->excl[session->excl_next++] = ref; /* Check for a matching hazard pointer. */ if (__wt_page_hazard_check(session, ref->page) == NULL) return (0); WT_DSTAT_INCR(session, cache_eviction_hazard); WT_CSTAT_INCR(session, cache_eviction_hazard); WT_VERBOSE_RET( session, evict, "page %p hazard request failed", ref->page); return (EBUSY); }
/*尝试重分配ITEM中的mem*/ int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { size_t offset; int copy_data; /*需要拷贝数据的长度*/ /*判断buf->data是否在buf->mem上,如果在,不用做数据拷贝*/ if(WT_DATA_IN_ITEM(buf)){ offset =WT_PTRDIFF(buf->data, buf->mem); copy_data = 0; } else{ offset = 0; copy_data = buf->size ? 1 : 0; } /*进行内存重分配*/ if(size > buf->memsize){ if (F_ISSET(buf, WT_ITEM_ALIGNED)) WT_RET(__wt_realloc_aligned(session, &buf->memsize, size, &buf->mem)); else WT_RET(__wt_realloc(session, &buf->memsize, size, &buf->mem)); } if(buf->data == NULL){ buf->data = buf->mem; buf->size = 0; } else{ if (copy_data) /*进行数据拷贝*/ memcpy(buf->mem, buf->data, buf->size); buf->data = (uint8_t *)buf->mem + offset; } return 0; }
/* * __wt_realloc_aligned -- * ANSI realloc function that aligns to buffer boundaries, configured with * the "buffer_alignment" key to wiredtiger_open. */ int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) { #if defined(HAVE_POSIX_MEMALIGN) WT_DECL_RET; /* * !!! * This function MUST handle a NULL WT_SESSION_IMPL handle. */ if (session != NULL && S2C(session)->buffer_alignment > 0) { void *p, *newp; size_t bytes_allocated; /* * Sometimes we're allocating memory and we don't care about the * final length -- bytes_allocated_ret may be NULL. */ p = *(void **)retp; bytes_allocated = (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; WT_ASSERT(session, (p == NULL && bytes_allocated == 0) || (p != NULL && (bytes_allocated_ret == NULL || bytes_allocated != 0))); WT_ASSERT(session, bytes_to_allocate != 0); WT_ASSERT(session, bytes_allocated < bytes_to_allocate); /* * We are going to allocate an aligned buffer. When we do this * repeatedly, the allocator is expected to start on a boundary * each time, account for that additional space by never asking * for less than a full alignment size. The primary use case * for aligned buffers is Linux direct I/O, which requires that * the size be a multiple of the alignment anyway. */ bytes_to_allocate = WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment); WT_STAT_FAST_CONN_INCR(session, memory_allocation); if ((ret = posix_memalign(&newp, S2C(session)->buffer_alignment, bytes_to_allocate)) != 0) WT_RET_MSG(session, ret, "memory allocation"); if (p != NULL) memcpy(newp, p, bytes_allocated); __wt_free(session, p); p = newp; /* Clear the allocated memory (see above). */ memset((uint8_t *)p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated); /* Update caller's bytes allocated value. */ if (bytes_allocated_ret != NULL) *bytes_allocated_ret = bytes_to_allocate; *(void **)retp = p; return (0); } #endif /* * If there is no posix_memalign function, or no alignment configured, * fall back to realloc. * * Windows note: Visual C CRT memalign does not match Posix behavior * and would also double each allocation so it is bad for memory use */ return (__wt_realloc( session, bytes_allocated_ret, bytes_to_allocate, retp)); }
/* * __wt_meta_ckptlist_get -- * Load all available checkpoint information for a file. */ int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) { WT_CKPT *ckpt, *ckptbase; WT_CONFIG ckptconf; WT_CONFIG_ITEM a, k, v; WT_DECL_RET; WT_ITEM *buf; size_t allocated, slot; const char *config; char timebuf[64]; *ckptbasep = NULL; buf = NULL; ckptbase = NULL; allocated = slot = 0; config = NULL; /* Retrieve the metadata information for the file. */ WT_RET(__wt_metadata_read(session, fname, &config)); /* Load any existing checkpoints into the array. */ WT_ERR(__wt_scr_alloc(session, 0, &buf)); if (__wt_config_getones(session, config, "checkpoint", &v) == 0 && __wt_config_subinit(session, &ckptconf, &v) == 0) for (; __wt_config_next(&ckptconf, &k, &v) == 0; ++slot) { if (slot * sizeof(WT_CKPT) == allocated) WT_ERR(__wt_realloc(session, &allocated, (slot + 50) * sizeof(WT_CKPT), &ckptbase)); ckpt = &ckptbase[slot]; /* * Copy the name, address (raw and hex), order and time * into the slot. If there's no address, it's a fake. */ WT_ERR( __wt_strndup(session, k.str, k.len, &ckpt->name)); WT_ERR(__wt_config_subgets(session, &v, "addr", &a)); WT_ERR( __wt_buf_set(session, &ckpt->addr, a.str, a.len)); if (a.len == 0) F_SET(ckpt, WT_CKPT_FAKE); else WT_ERR(__wt_nhex_to_raw( session, a.str, a.len, &ckpt->raw)); WT_ERR(__wt_config_subgets(session, &v, "order", &a)); if (a.val == 0) goto format; ckpt->order = a.val; WT_ERR(__wt_config_subgets(session, &v, "time", &a)); if (a.len == 0) goto format; if (a.len > sizeof(timebuf) - 1) goto format; memcpy(timebuf, a.str, a.len); timebuf[a.len] = '\0'; if (sscanf(timebuf, "%" SCNuMAX, &ckpt->sec) != 1) goto format; WT_ERR(__wt_config_subgets(session, &v, "size", &a)); ckpt->ckpt_size = (uint64_t)a.val; } /* * Allocate an extra slot for a new value, plus a slot to mark the end. * * This isn't very clean, but there's necessary cooperation between the * schema layer (that maintains the list of checkpoints), the btree * layer (that knows when the root page is written, creating a new * checkpoint), and the block manager (which actually creates the * checkpoint). All of that cooperation is handled in the WT_CKPT * structure referenced from the WT_BTREE structure. */ if ((slot + 2) * sizeof(WT_CKPT) > allocated) WT_ERR(__wt_realloc(session, &allocated, (slot + 2) * sizeof(WT_CKPT), &ckptbase)); /* Sort in creation-order. */ qsort(ckptbase, slot, sizeof(WT_CKPT), __ckpt_compare_order); /* Return the array to our caller. */ *ckptbasep = ckptbase; if (0) { format: WT_ERR_MSG(session, WT_ERROR, "corrupted checkpoint list"); err: __wt_meta_ckptlist_free(session, ckptbase); } __wt_free(session, config); __wt_scr_free(&buf); return (ret); }
/* * __wt_realloc_aligned -- * ANSI realloc function that aligns to buffer boundaries, configured with * the "buffer_alignment" key to wiredtiger_open. */ int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) { #if defined(HAVE_POSIX_MEMALIGN) WT_DECL_RET; /* * !!! * This function MUST handle a NULL WT_SESSION_IMPL handle. */ if (session != NULL && S2C(session)->buffer_alignment > 0) { void *p, *newp; size_t bytes_allocated; /* * Sometimes we're allocating memory and we don't care about the * final length -- bytes_allocated_ret may be NULL. */ p = *(void **)retp; bytes_allocated = (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; WT_ASSERT(session, (p == NULL && bytes_allocated == 0) || (p != NULL && (bytes_allocated_ret == NULL || bytes_allocated != 0))); WT_ASSERT(session, bytes_to_allocate != 0); WT_ASSERT(session, bytes_allocated < bytes_to_allocate); if (session != NULL) WT_STAT_FAST_CONN_INCR(session, memory_allocation); if ((ret = posix_memalign(&newp, S2C(session)->buffer_alignment, bytes_to_allocate)) != 0) WT_RET_MSG(session, ret, "memory allocation"); if (p != NULL) memcpy(newp, p, bytes_allocated); __wt_free(session, p); p = newp; /* Clear the allocated memory (see above). */ memset((uint8_t *)p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated); /* Update caller's bytes allocated value. */ if (bytes_allocated_ret != NULL) *bytes_allocated_ret = bytes_to_allocate; *(void **)retp = p; return (0); } #endif /* * If there is no posix_memalign function, or no alignment configured, * fall back to realloc. * * Windows note: Visual C CRT memalign does not match Posix behavior * and would also double each allocation so it is bad for memory use */ return (__wt_realloc( session, bytes_allocated_ret, bytes_to_allocate, retp)); }
/* * __curlog_kv -- * Set the key and value of the log cursor to return to the user. */ static int __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor) { WT_CURSOR_LOG *cl; WT_ITEM item; uint32_t fileid, key_count, opsize, optype; cl = (WT_CURSOR_LOG *)cursor; /* * If it is a commit and we have stepped over the header, peek to get * the size and optype and read out any key/value from this operation. */ if ((key_count = cl->step_count++) > 0) { WT_RET(__wt_logop_read(session, &cl->stepp, cl->stepp_end, &optype, &opsize)); WT_RET(__curlog_op_read(session, cl, optype, opsize, &fileid)); /* Position on the beginning of the next record part. */ cl->stepp += opsize; } else { optype = WT_LOGOP_INVALID; fileid = 0; cl->opkey->data = NULL; cl->opkey->size = 0; /* * Non-commit records we want to return the record without the * header and the adjusted size. Add one to skip over the type * which is normally consumed by __wt_logrec_read. */ cl->opvalue->data = WT_LOG_SKIP_HEADER(cl->logrec->data) + 1; cl->opvalue->size = WT_LOG_REC_SIZE(cl->logrec->size) - 1; } /* * The log cursor sets the LSN and step count as the cursor key and * and log record related data in the value. The data in the value * contains any operation key/value that was in the log record. * For the special case that the caller needs the result in raw form, * we create packed versions of the key/value. */ if (FLD_ISSET(cursor->flags, WT_CURSTD_RAW)) { memset(&item, 0, sizeof(item)); WT_RET(wiredtiger_struct_size((WT_SESSION *)session, &item.size, WT_LOGC_KEY_FORMAT, cl->cur_lsn->l.file, cl->cur_lsn->l.offset, key_count)); WT_RET(__wt_realloc(session, NULL, item.size, &cl->packed_key)); item.data = cl->packed_key; WT_RET(wiredtiger_struct_pack((WT_SESSION *)session, cl->packed_key, item.size, WT_LOGC_KEY_FORMAT, cl->cur_lsn->l.file, cl->cur_lsn->l.offset, key_count)); __wt_cursor_set_key(cursor, &item); WT_RET(wiredtiger_struct_size((WT_SESSION *)session, &item.size, WT_LOGC_VALUE_FORMAT, cl->txnid, cl->rectype, optype, fileid, cl->opkey, cl->opvalue)); WT_RET(__wt_realloc(session, NULL, item.size, &cl->packed_value)); item.data = cl->packed_value; WT_RET(wiredtiger_struct_pack((WT_SESSION *)session, cl->packed_value, item.size, WT_LOGC_VALUE_FORMAT, cl->txnid, cl->rectype, optype, fileid, cl->opkey, cl->opvalue)); __wt_cursor_set_value(cursor, &item); } else { __wt_cursor_set_key(cursor, cl->cur_lsn->l.file, cl->cur_lsn->l.offset, key_count); __wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype, fileid, cl->opkey, cl->opvalue); } return (0); }
int __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif ) { WT_DECL_RET; WT_ITEM *buf, **p, **best, **slot; size_t allocated; u_int i; /* Don't risk the caller not catching the error. */ *scratchp = NULL; /* * Each WT_SESSION_IMPL has an array of scratch buffers available for * use by any function. We use WT_ITEM structures for scratch memory * because we already have functions that do variable-length allocation * on a WT_ITEM. Scratch buffers are allocated only by a single thread * of control, so no locking is necessary. * * Walk the array, looking for a buffer we can use. */ for (i = 0, best = slot = NULL, p = session->scratch; i < session->scratch_alloc; ++i, ++p) { /* If we find an empty slot, remember it. */ if ((buf = *p) == NULL) { if (slot == NULL) slot = p; continue; } if (F_ISSET(buf, WT_ITEM_INUSE)) continue; /* * If we find a buffer that's not in-use, check its size: we * want the smallest buffer larger than the requested size, * or the largest buffer if none are large enough. */ if (best == NULL || (buf->memsize <= size && buf->memsize > (*best)->memsize) || (buf->memsize >= size && buf->memsize < (*best)->memsize)) best = p; /* If we find a perfect match, use it. */ if ((*best)->memsize == size) break; } /* * If we didn't find a free buffer, extend the array and use the first * slot we allocated. */ if (best == NULL && slot == NULL) { allocated = session->scratch_alloc * sizeof(WT_ITEM *); WT_ERR(__wt_realloc(session, &allocated, (session->scratch_alloc + 10) * sizeof(WT_ITEM *), &session->scratch)); #ifdef HAVE_DIAGNOSTIC allocated = session->scratch_alloc * sizeof(WT_SCRATCH_TRACK); WT_ERR(__wt_realloc(session, &allocated, (session->scratch_alloc + 10) * sizeof(WT_SCRATCH_TRACK), &session->scratch_track)); #endif slot = session->scratch + session->scratch_alloc; session->scratch_alloc += 10; } /* * If slot is non-NULL, we found an empty slot, try to allocate a * buffer. */ if (best == NULL) { WT_ASSERT(session, slot != NULL); best = slot; WT_ERR(__wt_calloc_one(session, best)); /* Scratch buffers must be aligned. */ F_SET(*best, WT_ITEM_ALIGNED); } /* Grow the buffer as necessary and return. */ session->scratch_cached -= (*best)->memsize; WT_ERR(__wt_buf_init(session, *best, size)); F_SET(*best, WT_ITEM_INUSE); #ifdef HAVE_DIAGNOSTIC session->scratch_track[best - session->scratch].file = file; session->scratch_track[best - session->scratch].line = line; #endif *scratchp = *best; return (0); err: WT_RET_MSG(session, ret, "session unable to allocate a scratch buffer"); }
/* * __wt_struct_repack -- * Return the subset of the packed buffer that represents part of * the format. If the result is not contiguous in the existing * buffer, a buffer is reallocated and filled. */ int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf, void **reallocp) { WT_DECL_PACK_VALUE(pvin); WT_DECL_PACK_VALUE(pvout); WT_DECL_RET; WT_PACK packin, packout; const uint8_t *before, *end, *p; uint8_t *newbuf, *pout; size_t len; const void *start; start = newbuf = NULL; p = inbuf->data; end = p + inbuf->size; /* * Handle this non-contiguous case: 'U' -> 'u' at the end of the buf. * The former case has the size embedded before the item, the latter * does not. */ if ((len = strlen(outfmt)) > 1 && outfmt[len - 1] == 'u' && strlen(infmt) > len && infmt[len - 1] == 'U') { WT_ERR(__wt_realloc(session, NULL, inbuf->size, reallocp)); pout = *reallocp; } else pout = NULL; WT_ERR(__pack_init(session, &packout, outfmt)); WT_ERR(__pack_init(session, &packin, infmt)); /* Outfmt should complete before infmt */ while ((ret = __pack_next(&packout, &pvout)) == 0) { WT_ERR(__pack_next(&packin, &pvin)); before = p; WT_ERR(__unpack_read(session, &pvin, &p, (size_t)(end - p))); if (pvout.type != pvin.type) { if (pvout.type == 'u' && pvin.type == 'U') { /* Skip the prefixed size, we don't need it */ WT_ERR(__wt_struct_unpack_size(session, before, (size_t)(end - before), "I", &len)); before += len; } else WT_ERR(ENOTSUP); } if (pout != NULL) { memcpy(pout, before, WT_PTRDIFF(p, before)); pout += p - before; } else if (start == NULL) start = before; } WT_ERR_NOTFOUND_OK(ret); /* Be paranoid - __pack_write should never overflow. */ WT_ASSERT(session, p <= end); if (pout != NULL) { outbuf->data = *reallocp; outbuf->size = WT_PTRDIFF(pout, *reallocp); } else { outbuf->data = start; outbuf->size = WT_PTRDIFF(p, start); } err: return (ret); }
/* * __wt_realloc_aligned -- * ANSI realloc function that aligns to buffer boundaries, configured with * the "buffer_alignment" key to wiredtiger_open. */ int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) { #if defined(HAVE_POSIX_MEMALIGN) int ret; /* * !!! * This function MUST handle a NULL WT_SESSION_IMPL handle. */ if (session != NULL && S2C(session)->buffer_alignment > 0) { void *p, *newp; size_t bytes_allocated; WT_ASSERT(session, bytes_to_allocate != 0); /* * Sometimes we're allocating memory and we don't care about the * final length -- bytes_allocated_ret may be NULL. */ bytes_allocated = (bytes_allocated_ret == NULL) ? 0 : *bytes_allocated_ret; WT_ASSERT(session, bytes_allocated < bytes_to_allocate); p = *(void **)retp; WT_ASSERT(session, p == NULL || bytes_allocated != 0); if (p == NULL && session != NULL && S2C(session)->stats != NULL) WT_CSTAT_INCR(session, memalloc); if ((ret = posix_memalign(&newp, S2C(session)->buffer_alignment, bytes_to_allocate)) != 0) WT_RET_MSG(session, ret, "memory allocation"); if (p != NULL) memcpy(newp, p, bytes_allocated); __wt_free(session, p); p = newp; /* Clear the allocated memory (see above). */ memset((uint8_t *)p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated); /* Update caller's bytes allocated value. */ if (bytes_allocated_ret != NULL) *bytes_allocated_ret = bytes_to_allocate; *(void **)retp = p; return (0); } #endif /* * If there is no posix_memalign function, or no alignment configured, * fall back to realloc. */ return (__wt_realloc( session, bytes_allocated_ret, bytes_to_allocate, retp)); }
/* * __thread_group_resize -- * Resize an array of utility threads already holding the lock. */ static int __thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_THREAD *thread; size_t alloc; uint32_t i, session_flags; conn = S2C(session); session_flags = 0; WT_ASSERT(session, group->current_threads <= group->alloc && __wt_rwlock_islocked(session, group->lock)); if (new_min == group->min && new_max == group->max) return (0); /* * Coll shrink to reduce the number of thread structures and running * threads if required by the change in group size. */ WT_RET(__thread_group_shrink(session, group, new_max)); /* * Only reallocate the thread array if it is the largest ever, since * our realloc doesn't support shrinking the allocated size. */ if (group->alloc < new_max) { alloc = group->alloc * sizeof(*group->threads); WT_RET(__wt_realloc(session, &alloc, new_max * sizeof(*group->threads), &group->threads)); group->alloc = new_max; } /* * Initialize the structures based on the previous group size, not * the previous allocated size. */ for (i = group->max; i < new_max; i++) { WT_ERR(__wt_calloc_one(session, &thread)); /* * Threads get their own session and lookaside table cursor * if the lookaside table is open. Note that threads are * started during recovery, before the lookaside table is * created. */ if (LF_ISSET(WT_THREAD_CAN_WAIT)) session_flags = WT_SESSION_CAN_WAIT; if (F_ISSET(conn, WT_CONN_LAS_OPEN)) FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR); WT_ERR(__wt_open_internal_session(conn, group->name, false, session_flags, &thread->session)); if (LF_ISSET(WT_THREAD_PANIC_FAIL)) F_SET(thread, WT_THREAD_PANIC_FAIL); thread->id = i; thread->run_func = group->run_func; WT_ASSERT(session, group->threads[i] == NULL); group->threads[i] = thread; } if (group->current_threads < new_min) WT_ERR(__thread_group_grow(session, group, new_min)); err: /* * Update the thread group information even on failure to improve our * chances of cleaning up properly. */ group->max = new_max; group->min = new_min; /* * An error resizing a thread array is fatal, it should only happen * in an out of memory situation. */ if (ret != 0) { WT_TRET(__wt_thread_group_destroy(session, group)); WT_PANIC_RET(session, ret, "Error while resizing thread group"); } return (ret); }
/* * __wt_schema_open_indices -- * Open the indices for a table. */ int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) { WT_CURSOR *cursor; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_INDEX *idx; u_int i; int cmp, match; const char *idxconf, *name, *tablename, *uri; /* Check if we've already done the work. */ if (idxname == NULL && table->idx_complete) return (0); cursor = NULL; idx = NULL; /* Build a search key. */ tablename = table->name; (void)WT_PREFIX_SKIP(tablename, "table:"); WT_ERR(__wt_scr_alloc(session, 512, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename)); /* Find matching indices. */ WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tmp->data); if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0) ret = cursor->next(cursor); for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &uri)); name = uri; if (!WT_PREFIX_SKIP(name, tmp->data)) break; /* Is this the index we are looking for? */ match = idxname == NULL || WT_STRING_MATCH(name, idxname, len); /* * Ensure there is space, including if we have to make room for * a new entry in the middle of the list. */ if (table->idx_alloc <= sizeof(WT_INDEX *) * ((size_t)WT_MAX(i, table->nindices) + 1)) WT_ERR(__wt_realloc(session, &table->idx_alloc, WT_MAX(10 * sizeof(WT_INDEX *), 2 * table->idx_alloc), &table->indices)); /* Keep the in-memory list in sync with the metadata. */ cmp = 0; while (table->indices[i] != NULL && (cmp = strcmp(uri, table->indices[i]->name)) > 0) { /* Index no longer exists, remove it. */ __wt_free(session, table->indices[i]); memmove(&table->indices[i], &table->indices[i + 1], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[--table->nindices] = NULL; } if (cmp < 0) { /* Make room for a new index. */ memmove(&table->indices[i + 1], &table->indices[i], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[i] = NULL; ++table->nindices; } if (!match) continue; if (table->indices[i] == NULL) { WT_ERR(cursor->get_value(cursor, &idxconf)); WT_ERR(__wt_calloc_def(session, 1, &idx)); WT_ERR(__wt_strdup(session, uri, &idx->name)); WT_ERR(__wt_strdup(session, idxconf, &idx->config)); WT_ERR(__open_index(session, table, idx)); table->indices[i] = idx; idx = NULL; } /* If we were looking for a single index, we're done. */ if (indexp != NULL) *indexp = table->indices[i]; if (idxname != NULL) break; } WT_ERR_NOTFOUND_OK(ret); /* If we did a full pass, we won't need to do it again. */ if (idxname == NULL) { table->nindices = i; table->idx_complete = 1; } err: __wt_scr_free(&tmp); if (idx != NULL) __wt_schema_destroy_index(session, idx); if (cursor != NULL) WT_TRET(cursor->close(cursor)); return (ret); }