LIBCOUCHSTORE_API couchstore_error_t couchstore_open_db_ex(const char *filename, couchstore_open_flags flags, const couch_file_ops *ops, Db **pDb) { couchstore_error_t errcode = COUCHSTORE_SUCCESS; Db *db; int openflags; /* Sanity check input parameters */ if ((flags & COUCHSTORE_OPEN_FLAG_RDONLY) && (flags & COUCHSTORE_OPEN_FLAG_CREATE)) { return COUCHSTORE_ERROR_INVALID_ARGUMENTS; } if ((db = calloc(1, sizeof(Db))) == NULL) { return COUCHSTORE_ERROR_ALLOC_FAIL; } if (flags & COUCHSTORE_OPEN_FLAG_RDONLY) { openflags = O_RDONLY; } else { openflags = O_RDWR; } if (flags & COUCHSTORE_OPEN_FLAG_CREATE) { openflags |= O_CREAT; } error_pass(tree_file_open(&db->file, filename, openflags, ops)); if ((db->file.pos = db->file.ops->goto_eof(db->file.handle)) == 0) { /* This is an empty file. Create a new fileheader unless the * user wanted a read-only version of the file */ if (flags & COUCHSTORE_OPEN_FLAG_RDONLY) { error_pass(COUCHSTORE_ERROR_NO_HEADER); } else { error_pass(create_header(db)); } } else { error_pass(find_header(db)); } *pDb = db; return COUCHSTORE_SUCCESS; cleanup: couchstore_close_db(db); return errcode; }
//Move this node's pointers list to dst node's values list. static couchstore_error_t mr_move_pointers(couchfile_modify_result *src, couchfile_modify_result *dst) { couchstore_error_t errcode = COUCHSTORE_SUCCESS; if (src->pointers_end == src->pointers) { return 0; } nodelist *ptr = src->pointers->next; nodelist *next = ptr; while (ptr != NULL && errcode == 0) { dst->node_len += ptr->data.size + ptr->key.size + 5; dst->count++; next = ptr->next; ptr->next = NULL; dst->values_end->next = ptr; dst->values_end = ptr; ptr = next; error_pass(maybe_flush(dst)); } cleanup: src->pointers->next = next; src->pointers_end = src->pointers; return errcode; }
LIBCOUCHSTORE_API couchstore_error_t couchstore_docinfos_by_sequence(Db *db, const uint64_t sequence[], unsigned numDocs, couchstore_docinfos_options options, couchstore_changes_callback_fn callback, void *ctx) { // Create the array of keys: sized_buf *keylist = malloc(numDocs * sizeof(sized_buf)); raw_by_seq_key *keyvalues = malloc(numDocs * sizeof(raw_by_seq_key)); couchstore_error_t errcode; error_unless(keylist && keyvalues, COUCHSTORE_ERROR_ALLOC_FAIL); unsigned i; for (i = 0; i< numDocs; ++i) { keyvalues[i].sequence = encode_raw48(sequence[i]); keylist[i].buf = (void*) &keyvalues[i]; keylist[i].size = sizeof(keyvalues[i]); } error_pass(iterate_docinfos(db, keylist, numDocs, db->header.by_seq_root, seq_ptr_cmp, seq_cmp, callback, (options & RANGES) != 0, ctx)); cleanup: free(keylist); free(keyvalues); return errcode; }
couchstore_error_t tree_file_open(tree_file* file, const char *filename, int openflags, const couch_file_ops *ops) { couchstore_error_t errcode = COUCHSTORE_SUCCESS; /* Sanity check input parameters */ if (filename == NULL || file == NULL || ops == NULL || ops->version != 4 || ops->constructor == NULL || ops->open == NULL || ops->close == NULL || ops->pread == NULL || ops->pwrite == NULL || ops->goto_eof == NULL || ops->sync == NULL || ops->destructor == NULL) { return COUCHSTORE_ERROR_INVALID_ARGUMENTS; } memset(file, 0, sizeof(*file)); file->path = strdup(filename); error_unless(file->path, COUCHSTORE_ERROR_ALLOC_FAIL); file->ops = couch_get_buffered_file_ops(ops, &file->handle); error_unless(file->ops, COUCHSTORE_ERROR_ALLOC_FAIL); error_pass(file->ops->open(&file->handle, filename, openflags)); cleanup: return errcode; }
// Attempts to initialize the database from a header at the given file position static couchstore_error_t find_header_at_pos(Db *db, cs_off_t pos) { int errcode = COUCHSTORE_SUCCESS; raw_file_header *header_buf = NULL; uint8_t buf[2]; ssize_t readsize = db->file.ops->pread(db->file.handle, buf, 2, pos); error_unless(readsize == 2, COUCHSTORE_ERROR_READ); if (buf[0] == 0) { return COUCHSTORE_ERROR_NO_HEADER; } else if (buf[0] != 1) { return COUCHSTORE_ERROR_CORRUPT; } int header_len = pread_header(&db->file, pos, (char**)&header_buf); if (header_len < 0) { error_pass(header_len); } db->header.position = pos; db->header.disk_version = decode_raw08(header_buf->version); error_unless(db->header.disk_version == COUCH_DISK_VERSION, COUCHSTORE_ERROR_HEADER_VERSION); db->header.update_seq = decode_raw48(header_buf->update_seq); db->header.purge_seq = decode_raw48(header_buf->purge_seq); db->header.purge_ptr = decode_raw48(header_buf->purge_ptr); error_unless(db->header.purge_ptr <= db->header.position, COUCHSTORE_ERROR_CORRUPT); int seqrootsize = decode_raw16(header_buf->seqrootsize); int idrootsize = decode_raw16(header_buf->idrootsize); int localrootsize = decode_raw16(header_buf->localrootsize); error_unless(header_len == HEADER_BASE_SIZE + seqrootsize + idrootsize + localrootsize, COUCHSTORE_ERROR_CORRUPT); char *root_data = (char*) (header_buf + 1); // i.e. just past *header_buf error_pass(read_db_root(db, &db->header.by_seq_root, root_data, seqrootsize)); root_data += seqrootsize; error_pass(read_db_root(db, &db->header.by_id_root, root_data, idrootsize)); root_data += idrootsize; error_pass(read_db_root(db, &db->header.local_docs_root, root_data, localrootsize)); cleanup: free(header_buf); return errcode; }
couchstore_error_t TreeWriterOpen(const char* unsortedFilePath, compare_callback key_compare, reduce_fn reduce, reduce_fn rereduce, TreeWriter** out_writer) { couchstore_error_t errcode = COUCHSTORE_SUCCESS; TreeWriter* writer = calloc(1, sizeof(TreeWriter)); error_unless(writer, COUCHSTORE_ERROR_ALLOC_FAIL); writer->file = unsortedFilePath ? fopen(unsortedFilePath, "r+b") : tmpfile(); if (!writer->file) { TreeWriterFree(writer); error_pass(COUCHSTORE_ERROR_NO_SUCH_FILE); } if (unsortedFilePath) { fseek(writer->file, 0, SEEK_END); // in case more items will be added } writer->key_compare = (key_compare ? key_compare : ebin_cmp); writer->reduce = reduce; writer->rereduce = rereduce; *out_writer = writer; cleanup: return errcode; }
couchstore_error_t TreeWriterWrite(TreeWriter* writer, tree_file* treefile, node_pointer** out_root) { couchstore_error_t errcode = COUCHSTORE_SUCCESS; arena* transient_arena = new_arena(0); arena* persistent_arena = new_arena(0); error_unless(transient_arena && persistent_arena, COUCHSTORE_ERROR_ALLOC_FAIL); rewind(writer->file); // Create the structure to write the tree to the db: compare_info idcmp; sized_buf tmp; idcmp.compare = writer->key_compare; idcmp.arg = &tmp; couchfile_modify_result* target_mr = new_btree_modres(persistent_arena, transient_arena, treefile, &idcmp, writer->reduce, writer->rereduce, DB_CHUNK_THRESHOLD, DB_CHUNK_THRESHOLD); if(target_mr == NULL) { error_pass(COUCHSTORE_ERROR_ALLOC_FAIL); } // Read all the key/value pairs from the file and add them to the tree: uint16_t klen; uint32_t vlen; sized_buf k, v; while(1) { if(fread(&klen, sizeof(klen), 1, writer->file) != 1) { break; } if(fread(&vlen, sizeof(vlen), 1, writer->file) != 1) { break; } k.size = ntohs(klen); k.buf = arena_alloc(transient_arena, k.size); v.size = ntohl(vlen); v.buf = arena_alloc(transient_arena, v.size); if(fread(k.buf, k.size, 1, writer->file) != 1) { error_pass(COUCHSTORE_ERROR_READ); } if(fread(v.buf, v.size, 1, writer->file) != 1) { error_pass(COUCHSTORE_ERROR_READ); } //printf("K: '%.*s'\n", k.size, k.buf); mr_push_item(&k, &v, target_mr); if(target_mr->count == 0) { /* No items queued, we must have just flushed. We can safely rewind the transient arena. */ arena_free_all(transient_arena); } } // Check for file error: int readerr = ferror(writer->file); if(readerr != 0 && readerr != EOF) { error_pass(COUCHSTORE_ERROR_READ); } // Finish up the tree: *out_root = complete_new_btree(target_mr, &errcode); cleanup: delete_arena(transient_arena); delete_arena(persistent_arena); return errcode; }
static couchstore_error_t update_indexes(Db *db, sized_buf *seqs, sized_buf *seqvals, sized_buf *ids, sized_buf *idvals, int numdocs) { couchfile_modify_action *idacts; couchfile_modify_action *seqacts; const sized_buf **sorted_ids = NULL; size_t size; fatbuf *actbuf; node_pointer *new_id_root; node_pointer *new_seq_root; couchstore_error_t errcode; couchfile_modify_request seqrq, idrq; sized_buf tmpsb; int ii; /* ** Two action list up to numdocs * 2 in size + Compare keys for ids, ** and compare keys for removed seqs found from id index + ** Max size of a int64 erlang term (for deleted seqs) */ size = 4 * sizeof(couchfile_modify_action) + 2 * sizeof(sized_buf) + 10; actbuf = fatbuf_alloc(numdocs * size); error_unless(actbuf, COUCHSTORE_ERROR_ALLOC_FAIL); idacts = fatbuf_get(actbuf, numdocs * sizeof(couchfile_modify_action) * 2); seqacts = fatbuf_get(actbuf, numdocs * sizeof(couchfile_modify_action) * 2); error_unless(idacts && seqacts, COUCHSTORE_ERROR_ALLOC_FAIL); index_update_ctx fetcharg = { seqacts, 0, &seqs, &seqvals, 0, actbuf }; // Sort the array indexes of ids[] by ascending id. Since we can't pass context info to qsort, // actually sort an array of pointers to the elements of ids[], rather than the array indexes. sorted_ids = malloc(numdocs * sizeof(sized_buf*)); error_unless(sorted_ids, COUCHSTORE_ERROR_ALLOC_FAIL); for (ii = 0; ii < numdocs; ++ii) { sorted_ids[ii] = &ids[ii]; } qsort(sorted_ids, numdocs, sizeof(sorted_ids[0]), &ebin_ptr_compare); // Assemble idacts[] array, in sorted order by id: for (ii = 0; ii < numdocs; ii++) { ptrdiff_t isorted = sorted_ids[ii] - ids; // recover index of ii'th id in sort order idacts[ii * 2].type = ACTION_FETCH; idacts[ii * 2].value.arg = &fetcharg; idacts[ii * 2 + 1].type = ACTION_INSERT; idacts[ii * 2 + 1].value.data = &idvals[isorted]; idacts[ii * 2].key = &ids[isorted]; idacts[ii * 2 + 1].key = &ids[isorted]; } idrq.cmp.compare = ebin_cmp; idrq.cmp.arg = &tmpsb; idrq.db = db; idrq.actions = idacts; idrq.num_actions = numdocs * 2; idrq.reduce = by_id_reduce; idrq.rereduce = by_id_rereduce; idrq.fetch_callback = idfetch_update_cb; idrq.db = db; idrq.compacting = 0; new_id_root = modify_btree(&idrq, db->header.by_id_root, &errcode); error_pass(errcode); while (fetcharg.valpos < numdocs) { seqacts[fetcharg.actpos].type = ACTION_INSERT; seqacts[fetcharg.actpos].value.data = &seqvals[fetcharg.valpos]; seqacts[fetcharg.actpos].key = &seqs[fetcharg.valpos]; fetcharg.valpos++; fetcharg.actpos++; } //printf("Total seq actions: %d\n", fetcharg.actpos); qsort(seqacts, fetcharg.actpos, sizeof(couchfile_modify_action), seq_action_compare); seqrq.cmp.compare = seq_cmp; seqrq.cmp.arg = &tmpsb; seqrq.actions = seqacts; seqrq.num_actions = fetcharg.actpos; seqrq.reduce = by_seq_reduce; seqrq.rereduce = by_seq_rereduce; seqrq.db = db; seqrq.compacting = 0; new_seq_root = modify_btree(&seqrq, db->header.by_seq_root, &errcode); if (errcode != COUCHSTORE_SUCCESS) { fatbuf_free(actbuf); return errcode; } if (db->header.by_id_root != new_id_root) { free(db->header.by_id_root); db->header.by_id_root = new_id_root; } if (db->header.by_seq_root != new_seq_root) { free(db->header.by_seq_root); db->header.by_seq_root = new_seq_root; } cleanup: free(sorted_ids); fatbuf_free(actbuf); return errcode; }
static couchstore_error_t modify_node(couchfile_modify_request *rq, node_pointer *nptr, int start, int end, couchfile_modify_result *dst) { char *nodebuf = NULL; // FYI, nodebuf is a malloced block, not in the arena int bufpos = 1; int nodebuflen = 0; int errcode = 0; couchfile_modify_result *local_result = NULL; if (start == end) { return 0; } if (nptr) { if ((nodebuflen = pread_compressed(rq->db, nptr->pointer, (char **) &nodebuf)) < 0) { error_pass(COUCHSTORE_ERROR_READ); } } local_result = make_modres(dst->arena, rq); error_unless(local_result, COUCHSTORE_ERROR_ALLOC_FAIL); if (nptr == NULL || nodebuf[0] == 1) { //KV Node local_result->node_type = KV_NODE; while (bufpos < nodebuflen) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); int advance = 0; while (!advance && start < end) { advance = 1; int cmp_val = rq->cmp.compare(&cmp_key, rq->actions[start].key); if (cmp_val < 0) { //Key less than action key mr_push_item(&cmp_key, &val_buf, local_result); } else if (cmp_val > 0) { //Key greater than action key switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL, rq->actions[start].value.arg); } } start++; //Do next action on same item in the node, as our action was //not >= it. advance = 0; } else if (cmp_val == 0) { //Node key is equal to action key switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { (*rq->fetch_callback)(rq, rq->actions[start].key, &val_buf, rq->actions[start].value.arg); } //Do next action on same item in the node, as our action was a fetch //and there may be an equivalent insert or remove //following. advance = 0; } start++; } } if (start == end && !advance) { //If we've exhausted actions then just keep this key mr_push_item(&cmp_key, &val_buf, local_result); } } while (start < end) { //We're at the end of a leaf node. switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL, rq->actions[start].value.arg); } break; } start++; } } else if (nodebuf[0] == 0) { //KP Node local_result->node_type = KP_NODE; while (bufpos < nodebuflen && start < end) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); int cmp_val = rq->cmp.compare(&cmp_key, rq->actions[start].key); if (bufpos == nodebuflen) { //We're at the last item in the kpnode, must apply all our //actions here. node_pointer *desc = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!desc) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = modify_node(rq, desc, start, end, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } break; } if (cmp_val < 0) { //Key in node item less than action item and not at end //position, so just add it and continue. node_pointer *add = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!add) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = mr_push_pointerinfo(add, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } else if (cmp_val >= 0) { //Found a key in the node greater than the one in the current //action. Descend into the pointed node with as many actions as //are less than the key here. int range_end = start; while (range_end < end && rq->cmp.compare(rq->actions[range_end].key, &cmp_key) <= 0) { range_end++; } node_pointer *desc = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!desc) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = modify_node(rq, desc, start, range_end, local_result); start = range_end; if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } } while (bufpos < nodebuflen) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); node_pointer *add = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!add) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = mr_push_pointerinfo(add, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } } else { errcode = COUCHSTORE_ERROR_CORRUPT; goto cleanup; } //If we've done modifications, write out the last leaf node. error_pass(flush_mr(local_result)); if (!local_result->modified && nptr != NULL) { //If we didn't do anything, give back the pointer to the original mr_push_pointerinfo(nptr, dst); } else { //Otherwise, give back the pointers to the nodes we've created. dst->modified = 1; error_pass(mr_move_pointers(local_result, dst)); } cleanup: if (nodebuf) { free(nodebuf); } return errcode; }
static couchstore_error_t btree_lookup_inner(couchfile_lookup_request *rq, uint64_t diskpos, int current, int end) { int bufpos = 1, nodebuflen = 0; if (current == end) { return 0; } couchstore_error_t errcode = COUCHSTORE_SUCCESS; char *nodebuf = NULL; nodebuflen = pread_compressed(rq->file, diskpos, &nodebuf); error_unless(nodebuflen >= 0, nodebuflen); // if negative, it's an error code if (nodebuf[0] == 0) { //KP Node while (bufpos < nodebuflen && current < end) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); if (rq->cmp.compare(&cmp_key, rq->keys[current]) >= 0) { if (rq->fold) { rq->in_fold = 1; } uint64_t pointer = 0; int last_item = current; //Descend into the pointed to node. //with all keys < item key. do { last_item++; } while (last_item < end && rq->cmp.compare(&cmp_key, rq->keys[last_item]) >= 0); const raw_node_pointer *raw = (const raw_node_pointer*)val_buf.buf; if(rq->node_callback) { uint64_t subtreeSize = decode_raw48(raw->subtreesize); sized_buf reduce_value = {val_buf.buf + sizeof(raw_node_pointer), decode_raw16(raw->reduce_value_size)}; error_pass(rq->node_callback(rq, subtreeSize, &reduce_value)); } pointer = decode_raw48(raw->pointer); error_pass(btree_lookup_inner(rq, pointer, current, last_item)); if (!rq->in_fold) { current = last_item; } if(rq->node_callback) { error_pass(rq->node_callback(rq, 0, NULL)); } } } } else if (nodebuf[0] == 1) { //KV Node while (bufpos < nodebuflen && current < end) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); int cmp_val = rq->cmp.compare(&cmp_key, rq->keys[current]); if (cmp_val >= 0 && rq->fold && !rq->in_fold) { rq->in_fold = 1; } else if (rq->in_fold && (current + 1) < end && (rq->cmp.compare(&cmp_key, rq->keys[current + 1])) > 0) { //We've hit a key past the end of our range. rq->in_fold = 0; rq->fold = 0; current = end; } if (cmp_val == 0 || (cmp_val > 0 && rq->in_fold)) { //Found error_pass(rq->fetch_callback(rq, &cmp_key, &val_buf)); if (!rq->in_fold) { current++; } } } } //Any remaining items are not found. while (current < end && !rq->fold) { error_pass(rq->fetch_callback(rq, rq->keys[current], NULL)); current++; } cleanup: free(nodebuf); return errcode; }