node_pointer* complete_new_btree(couchfile_modify_result* mr, couchstore_error_t *errcode) { *errcode = flush_mr(mr); if(*errcode != COUCHSTORE_SUCCESS) { return NULL; } couchfile_modify_result* targ_mr = make_modres(mr->arena, mr->rq); if (!targ_mr) { *errcode = COUCHSTORE_ERROR_ALLOC_FAIL; return NULL; } targ_mr->modified = 1; targ_mr->node_type = KP_NODE; *errcode = mr_move_pointers(mr, targ_mr); if(*errcode != COUCHSTORE_SUCCESS) { return NULL; } node_pointer* ret_ptr; if(targ_mr->count > 1 || targ_mr->pointers != targ_mr->pointers_end) { ret_ptr = finish_root(mr->rq, targ_mr, errcode); } else { ret_ptr = targ_mr->values_end->pointer; } return copy_node_pointer(ret_ptr); }
couchfile_pointer_info* modify_btree(couchfile_modify_request *rq, couchfile_pointer_info *root, int *errcode) { couchfile_pointer_info* ret_ptr = root; couchfile_modify_result* root_result = make_modres(rq); root_result->node_type = KP_NODE; *errcode = modify_node(rq, root, 0, rq->num_actions, root_result); if(*errcode < 0) { free_modres(root_result); return NULL; } if(root_result->values_end->value.pointer == root) { //If we got the root pointer back, remove it from the list //so we don't try to free it. root_result->values_end->value.mem = NULL; } if(!root_result->modified) { free_modres(root_result); } else { if(root_result->count > 1 || root_result->pointers != root_result->pointers_end) { //The root was split //Write it to disk and return the pointer to it. ret_ptr = finish_root(rq, root_result, errcode); if(*errcode < 0) { ret_ptr = NULL; } } else { ret_ptr = root_result->values_end->value.pointer; root_result->values_end->value.mem = NULL; free_modres(root_result); } } return ret_ptr; }
couchfile_pointer_info* finish_root(couchfile_modify_request* rq, couchfile_modify_result *root_result, int *errcode) { couchfile_pointer_info *ret_ptr = NULL; couchfile_modify_result *collector = make_modres(rq); collector->modified = 1; collector->node_type = KP_NODE; flush_mr(root_result); while(1) { if(root_result->pointers_end == root_result->pointers->next) { //The root result split into exactly one kp_node. //Return the pointer to it. ret_ptr = root_result->pointers_end->value.pointer; root_result->pointers_end->value.mem = NULL; break; } else { //The root result split into more than one kp_node. //Move the pointer list to the value list and write out the new node. *errcode = mr_move_pointers(root_result, collector); if(*errcode < 0) { goto cleanup; } *errcode = flush_mr(collector); if(*errcode < 0) { goto cleanup; } //Swap root_result and collector mr's. couchfile_modify_result *tmp = root_result; root_result = collector; collector = tmp; } } cleanup: free_modres(root_result); free_modres(collector); return ret_ptr; }
node_pointer *modify_btree(couchfile_modify_request *rq, node_pointer *root, couchstore_error_t *errcode) { arena* a = new_arena(0); node_pointer *ret_ptr = root; couchfile_modify_result *root_result = make_modres(a, rq); if (!root_result) { delete_arena(a); *errcode = COUCHSTORE_ERROR_ALLOC_FAIL; return root; } root_result->node_type = KP_NODE; *errcode = modify_node(rq, root, 0, rq->num_actions, root_result); if (*errcode < 0) { delete_arena(a); return NULL; } if (root_result->values_end->pointer == root) { //If we got the root pointer back, remove it from the list //so we don't try to free it. root_result->values_end->pointer = NULL; } if (root_result->modified) { if (root_result->count > 1 || root_result->pointers != root_result->pointers_end) { //The root was split //Write it to disk and return the pointer to it. ret_ptr = finish_root(rq, root_result, errcode); if (*errcode < 0) { ret_ptr = NULL; } } else { ret_ptr = root_result->values_end->pointer; } } if (ret_ptr != root) { ret_ptr = copy_node_pointer(ret_ptr); } delete_arena(a); return ret_ptr; }
static node_pointer *finish_root(couchfile_modify_request *rq, couchfile_modify_result *root_result, couchstore_error_t *errcode) { node_pointer *ret_ptr = NULL; couchfile_modify_result *collector = make_modres(root_result->arena, rq); if (!collector) { *errcode = COUCHSTORE_ERROR_ALLOC_FAIL; return NULL; } collector->modified = 1; collector->node_type = KP_NODE; flush_mr(root_result); while (1) { if (root_result->pointers_end == root_result->pointers->next) { //The root result split into exactly one kp_node. //Return the pointer to it. ret_ptr = root_result->pointers_end->pointer; break; } else { //The root result split into more than one kp_node. //Move the pointer list to the value list and write out the new node. *errcode = mr_move_pointers(root_result, collector); if (*errcode < 0) { goto cleanup; } *errcode = flush_mr(collector); if (*errcode < 0) { goto cleanup; } //Swap root_result and collector mr's. couchfile_modify_result *tmp = root_result; root_result = collector; collector = tmp; } } cleanup: return ret_ptr; }
couchfile_modify_result *new_btree_modres(arena *a, arena *transient_arena, Db* db, compare_info* cmp, reduce_fn reduce, reduce_fn rereduce) { couchfile_modify_request* rq = arena_alloc(a, sizeof(couchfile_modify_request)); rq->cmp = *cmp; rq->db = db; rq->num_actions = 0; rq->fetch_callback = NULL; rq->reduce = reduce; rq->rereduce = rereduce; rq->compacting = 1; couchfile_modify_result* mr = make_modres(a, rq); if (!mr) return NULL; mr->arena_transient = transient_arena; mr->modified = 1; mr->node_type = KV_NODE; return mr; }
static couchstore_error_t modify_node(couchfile_modify_request *rq, node_pointer *nptr, int start, int end, couchfile_modify_result *dst) { char *nodebuf = NULL; // FYI, nodebuf is a malloced block, not in the arena int bufpos = 1; int nodebuflen = 0; int errcode = 0; couchfile_modify_result *local_result = NULL; if (start == end) { return 0; } if (nptr) { if ((nodebuflen = pread_compressed(rq->db, nptr->pointer, (char **) &nodebuf)) < 0) { error_pass(COUCHSTORE_ERROR_READ); } } local_result = make_modres(dst->arena, rq); error_unless(local_result, COUCHSTORE_ERROR_ALLOC_FAIL); if (nptr == NULL || nodebuf[0] == 1) { //KV Node local_result->node_type = KV_NODE; while (bufpos < nodebuflen) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); int advance = 0; while (!advance && start < end) { advance = 1; int cmp_val = rq->cmp.compare(&cmp_key, rq->actions[start].key); if (cmp_val < 0) { //Key less than action key mr_push_item(&cmp_key, &val_buf, local_result); } else if (cmp_val > 0) { //Key greater than action key switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL, rq->actions[start].value.arg); } } start++; //Do next action on same item in the node, as our action was //not >= it. advance = 0; } else if (cmp_val == 0) { //Node key is equal to action key switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { (*rq->fetch_callback)(rq, rq->actions[start].key, &val_buf, rq->actions[start].value.arg); } //Do next action on same item in the node, as our action was a fetch //and there may be an equivalent insert or remove //following. advance = 0; } start++; } } if (start == end && !advance) { //If we've exhausted actions then just keep this key mr_push_item(&cmp_key, &val_buf, local_result); } } while (start < end) { //We're at the end of a leaf node. switch (rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_item(rq->actions[start].key, rq->actions[start].value.data, local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if (rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL, rq->actions[start].value.arg); } break; } start++; } } else if (nodebuf[0] == 0) { //KP Node local_result->node_type = KP_NODE; while (bufpos < nodebuflen && start < end) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); int cmp_val = rq->cmp.compare(&cmp_key, rq->actions[start].key); if (bufpos == nodebuflen) { //We're at the last item in the kpnode, must apply all our //actions here. node_pointer *desc = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!desc) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = modify_node(rq, desc, start, end, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } break; } if (cmp_val < 0) { //Key in node item less than action item and not at end //position, so just add it and continue. node_pointer *add = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!add) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = mr_push_pointerinfo(add, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } else if (cmp_val >= 0) { //Found a key in the node greater than the one in the current //action. Descend into the pointed node with as many actions as //are less than the key here. int range_end = start; while (range_end < end && rq->cmp.compare(rq->actions[range_end].key, &cmp_key) <= 0) { range_end++; } node_pointer *desc = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!desc) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = modify_node(rq, desc, start, range_end, local_result); start = range_end; if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } } while (bufpos < nodebuflen) { sized_buf cmp_key, val_buf; bufpos += read_kv(nodebuf + bufpos, &cmp_key, &val_buf); node_pointer *add = read_pointer(dst->arena, &cmp_key, val_buf.buf); if (!add) { errcode = COUCHSTORE_ERROR_ALLOC_FAIL; goto cleanup; } errcode = mr_push_pointerinfo(add, local_result); if (errcode != COUCHSTORE_SUCCESS) { goto cleanup; } } } else { errcode = COUCHSTORE_ERROR_CORRUPT; goto cleanup; } //If we've done modifications, write out the last leaf node. error_pass(flush_mr(local_result)); if (!local_result->modified && nptr != NULL) { //If we didn't do anything, give back the pointer to the original mr_push_pointerinfo(nptr, dst); } else { //Otherwise, give back the pointers to the nodes we've created. dst->modified = 1; error_pass(mr_move_pointers(local_result, dst)); } cleanup: if (nodebuf) { free(nodebuf); } return errcode; }
int modify_node(couchfile_modify_request *rq, couchfile_pointer_info *nptr, int start, int end, couchfile_modify_result *dst) { eterm_buf current_node; int curnode_pos = 0; int read_size = 0; int list_start_pos = 0; int node_len = 0; int node_bound = 0; int errcode = 0; int kpos = 0; char node_type[MAXATOMLEN + 1]; node_type[0] = 0; DBG("Enter modify_node. %d - %d\r\n", start, end); if(start == end) { return 0; } if(nptr == NULL) { current_node = empty_root; } else { if((read_size = pread_bin(rq->fd, nptr->pointer, ¤t_node.buf)) < 0) { return ERROR_READ_FILE; } current_node.size = read_size; DBG("... read node from %d\r\n", nptr->pointer); curnode_pos++; //Skip over 131. } couchfile_modify_result *local_result = make_modres(rq); ei_decode_tuple_header(current_node.buf, &curnode_pos, NULL); if(ei_decode_atom(current_node.buf, &curnode_pos, node_type) < 0) { errcode = ERROR_PARSE; goto cleanup; } list_start_pos = curnode_pos; if(ei_decode_list_header(current_node.buf, &curnode_pos, &node_len) < 0) { errcode = ERROR_PARSE; goto cleanup; } if(strcmp("kv_node", node_type) == 0) { local_result->node_type = KV_NODE; while(start < end) { DBG("act on kvnode item\r\n"); if(node_bound >= node_len) { //We're at the end of a leaf node. DBG(" ... exec action at end!\r\n"); switch(rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_action(&rq->actions[start], local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if(rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL); } break; } start++; } else { kpos = find_first_gteq(current_node.buf, list_start_pos, rq->actions[start].cmp_key, &rq->cmp, node_bound); if(kpos < 0) { errcode = ERROR_PARSE; goto cleanup; } //Add items from node_bound up to but not including the current mr_push_kv_range(current_node.buf, list_start_pos, node_bound, rq->cmp.list_pos, local_result); if(rq->cmp.last_cmp_val > 0) // Node key > action key { DBG(" Inserting action before\r\n"); switch(rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_action(&rq->actions[start], local_result); break; case ACTION_REMOVE: local_result->modified = 1; break; case ACTION_FETCH: if(rq->fetch_callback) { //not found (*rq->fetch_callback)(rq, rq->actions[start].key, NULL); } break; } start++; node_bound = rq->cmp.list_pos; } else if(rq->cmp.last_cmp_val < 0) // Node key < action key { DBG(" -- Continue with this action\r\n"); node_bound = rq->cmp.list_pos + 1; mr_push_kv_range(current_node.buf, list_start_pos, node_bound - 1, node_bound, local_result); } else //Node key == action key { DBG(" Replacing value with action\r\n"); switch(rq->actions[start].type) { case ACTION_INSERT: local_result->modified = 1; mr_push_action(&rq->actions[start], local_result); node_bound = rq->cmp.list_pos + 1; break; case ACTION_REMOVE: local_result->modified = 1; node_bound = rq->cmp.list_pos + 1; break; case ACTION_FETCH: if(rq->fetch_callback) { eterm_buf cb_tmp; int cb_vpos = kpos; ei_decode_tuple_header(current_node.buf, &cb_vpos, NULL); ei_skip_term(current_node.buf, &cb_vpos); cb_tmp.buf = current_node.buf + cb_vpos; cb_tmp.size = cb_vpos; ei_skip_term(current_node.buf, &cb_vpos); cb_tmp.size = cb_vpos - cb_tmp.size; (*rq->fetch_callback)(rq, rq->actions[start].key, &cb_tmp); } node_bound = rq->cmp.list_pos; break; } start++; } } } //Push any items past the end of what we dealt with onto result. if(node_bound < node_len) { mr_push_kv_range(current_node.buf, list_start_pos, node_bound, node_len, local_result); } } else if(strcmp("kp_node", node_type) == 0) { local_result->node_type = KP_NODE; while(start < end) { kpos = find_first_gteq(current_node.buf, list_start_pos, rq->actions[start].cmp_key, &rq->cmp, node_bound); if(kpos < 0) { errcode = ERROR_PARSE; goto cleanup; } if(rq->cmp.list_pos == (node_len - 1)) //got last item in kp_node { //Push all items in node onto mr mr_push_kp_range(current_node.buf, list_start_pos, node_bound, rq->cmp.list_pos, local_result); DBG(" ...descending into final item of kpnode\r\n"); couchfile_pointer_info *desc = read_pointer(current_node.buf, kpos); errcode = modify_node(rq, desc, start, end, local_result); if(local_result->values_end->value.pointer != desc) { free(desc); } if(errcode < 0) { goto cleanup; } node_bound = node_len; break; } else { //Get all actions with key <= the key of the current item in the //kp_node //Push items in node up to but not including current onto mr mr_push_kp_range(current_node.buf, list_start_pos, node_bound, rq->cmp.list_pos - 1, local_result); int range_end = start; while(range_end < end && ((*rq->cmp.compare)(rq->actions[range_end].cmp_key, rq->cmp.last_cmp_key) <= 0)) { range_end++; } DBG(" ...descending into item %d of kpnode\r\n", rq->cmp.list_pos); node_bound = rq->cmp.list_pos + 1; couchfile_pointer_info *desc = read_pointer(current_node.buf, kpos); errcode = modify_node(rq, desc, start, range_end, local_result); if(local_result->values_end->value.pointer != desc) { free(desc); } if(errcode < 0) { goto cleanup; } start = range_end; } } DBG(".. Finished kp node, up to %d\r\n", node_bound); if(node_bound < node_len) { //Processed all the actions but haven't exhausted this kpnode. //push the rest of it onto the mr. mr_push_kp_range(current_node.buf, list_start_pos, node_bound, node_len, local_result); } } else { errcode = ERROR_PARSE; goto cleanup; } //If we've done modifications, write out the last leaf node. errcode = flush_mr(local_result); if(errcode == 0) { if(!local_result->modified && nptr != NULL) { //If we didn't do anything, give back the pointer to the original mr_push_pointerinfo(nptr, dst); } else { //Otherwise, give back the pointers to the nodes we've created. dst->modified = 1; errcode = mr_move_pointers(local_result, dst); } } cleanup: free_modres(local_result); if(current_node.buf != empty_root.buf) { free(current_node.buf); } return errcode; }