/* * Deletes the digest as in the passed in as gc_list, bound by n2del number of * elements per iteration, with *deleted successful deletes. */ bool ai_btree_defrag_list(as_sindex_metadata *imd, as_sindex_pmetadata *pimd, cf_ll *gc_list, ulong n2del, ulong *deleted) { // If n2del is zero here, that means caller do not want to defrag if (n2del == 0 ) { return false; } ulong success = 0; as_namespace *ns = imd->si->ns; // STEP 3: go thru the PKtoDeleteList and delete the keys ulong bb = pimd->ibtr->msize + pimd->ibtr->nsize; uint64_t validation_time_ns = 0; uint64_t deletion_time_ns = 0; while (cf_ll_size(gc_list)) { cf_ll_element * ele = cf_ll_get_head(gc_list); ll_sindex_gc_element * node = (ll_sindex_gc_element * )ele; objs_to_defrag_arr * dt = node->objs_to_defrag; // check before deleting. The digest may re-appear after the list // creation and before deletion from the secondary index int i = 0; while (dt->num != 0) { i = dt->num - 1; SET_TIME_FOR_SINDEX_GC_HIST(validation_time_ns); int ret = as_sindex_can_defrag_record(ns, &(dt->acol_digs[i].dig)); SINDEX_GC_HIST_INSERT_DATA_POINT(sindex_gc_validate_obj_hist, validation_time_ns); validation_time_ns = 0; if (ret == AS_SINDEX_GC_SKIP_ITERATION) { goto END; } else if (ret == AS_SINDEX_GC_OK) { ai_obj apk; init_ai_objFromDigest(&apk, &(dt->acol_digs[i].dig)); ai_obj *acol = &(dt->acol_digs[i].acol); cf_detail(AS_SINDEX, "Defragged %lu %ld", acol->l, *((uint64_t *)&apk.y)); SET_TIME_FOR_SINDEX_GC_HIST(deletion_time_ns); if (reduced_iRem(pimd->ibtr, acol, &apk) == AS_SINDEX_OK) { success++; SINDEX_GC_HIST_INSERT_DATA_POINT(sindex_gc_delete_obj_hist, deletion_time_ns); } deletion_time_ns = 0; } dt->num -= 1; n2del--; if (n2del == 0) { goto END; } } cf_ll_delete(gc_list, (cf_ll_element*)node); } END: as_sindex_release_data_memory(imd, (bb - pimd->ibtr->msize - pimd->ibtr->nsize)); *deleted += success; return cf_ll_size(gc_list) ? true : false; }
/* * Return 0 in case of success * -1 in case of failure */ static int btree_addsinglerec(as_sindex_metadata *imd, cf_digest *dig, cf_ll *recl, uint64_t *n_bdigs) { if (!as_sindex_partition_isactive(imd->si->ns, dig)) { return 0; } bool create = (cf_ll_size(recl) == 0) ? true : false; dig_arr_t *dt; if (!create) { cf_ll_element * ele = cf_ll_get_tail(recl); dt = ((ll_recl_element*)ele)->dig_arr; if (dt->num == NUM_DIGS_PER_ARR) { create = true; } } if (create) { dt = getDigestArray(); if (!dt) { return -1; } ll_recl_element * node; node = cf_malloc(sizeof(ll_recl_element)); node->dig_arr = dt; cf_ll_append(recl, (cf_ll_element *)node); } memcpy(&dt->digs[dt->num], dig, CF_DIGEST_KEY_SZ); dt->num++; *n_bdigs = *n_bdigs + 1; return 0; }
static void as_ev_callback(struct ev_loop* loop, ev_io* watcher, int revents) { if (revents & EV_READ) { as_event_connection* conn = watcher->data; as_event_command* cmd; if (conn->pipeline) { as_pipe_connection* pipe = (as_pipe_connection*)conn; if (pipe->writer && cf_ll_size(&pipe->readers) == 0) { // Authentication response will only have a writer. cmd = pipe->writer; } else { // Next response is at head of reader linked list. cf_ll_element* link = cf_ll_get_head(&pipe->readers); if (link) { cmd = as_pipe_link_to_command(link); } else { as_log_debug("Pipeline read event ignored"); return; } } } else { cmd = ((as_async_connection*)conn)->cmd; } as_ev_command_read(cmd); } else if (revents & EV_WRITE) { as_event_connection* conn = watcher->data; as_event_command* cmd = conn->pipeline ? ((as_pipe_connection*)conn)->writer : ((as_async_connection*)conn)->cmd; int ret = as_ev_write(cmd); if (ret == AS_EVENT_WRITE_COMPLETE) { // Done with write. Register for read. if (cmd->state == AS_ASYNC_STATE_AUTH_WRITE) { as_event_set_auth_read_header(cmd); as_ev_watch_read(cmd); } else { as_ev_command_read_start(cmd); } } } else if (revents & EV_ERROR) { as_log_error("Async error occurred: %d", revents); } else { as_log_warn("Unknown event received: %d", revents); } }
/* * Internal function which adds digests to the defrag_list * Mallocs the nodes of defrag_list * Returns : * -1 : Error * number of digests found : success * */ static long build_defrag_list_from_nbtr(as_namespace *ns, ai_obj *acol, bt *nbtr, ulong nofst, ulong *limit, uint64_t * tot_found, cf_ll *gc_list) { int error = -1; btEntry *nbe; // STEP 1: go thru a portion of the nbtr and find to-be-deleted-PKs // TODO: a range query may be smarter then using the Xth Iterator btSIter *nbi = (nofst ? btGetFullXthIter(nbtr, nofst, 1, NULL, 0) : btGetFullRangeIter(nbtr, 1, NULL)); if (!nbi) { return error; } long found = 0; long processed = 0; while ((nbe = btRangeNext(nbi, 1))) { ai_obj *akey = nbe->key; int ret = as_sindex_can_defrag_record(ns, (cf_digest *) (&akey->y)); if (ret == AS_SINDEX_GC_SKIP_ITERATION) { *limit = 0; break; } else if (ret == AS_SINDEX_GC_OK) { bool create = (cf_ll_size(gc_list) == 0) ? true : false; objs_to_defrag_arr *dt; if (!create) { cf_ll_element * ele = cf_ll_get_tail(gc_list); dt = ((ll_sindex_gc_element*)ele)->objs_to_defrag; if (dt->num == SINDEX_GC_NUM_OBJS_PER_ARR) { create = true; } } if (create) { dt = as_sindex_gc_get_defrag_arr(); if (!dt) { *tot_found += found; return -1; } ll_sindex_gc_element * node; node = cf_malloc(sizeof(ll_sindex_gc_element)); node->objs_to_defrag = dt; cf_ll_append(gc_list, (cf_ll_element *)node); } cloneDigestFromai_obj(&(dt->acol_digs[dt->num].dig), akey); ai_objClone(&(dt->acol_digs[dt->num].acol), acol); dt->num += 1; found++; } processed++; (*limit)--; if (*limit == 0) break; } btReleaseRangeIterator(nbi); *tot_found += found; return processed; }
/* * Return 0 in case of success * -1 in case of failure */ static int btree_addsinglerec(as_sindex_metadata *imd, ai_obj * key, cf_digest *dig, cf_ll *recl, uint64_t *n_bdigs, bool * can_partition_query, bool partitions_pre_reserved) { // The digests which belongs to one of the query-able partitions are elligible to go into recl uint32_t pid = as_partition_getid(dig); as_namespace * ns = imd->si->ns; if (partitions_pre_reserved) { if (!can_partition_query[pid]) { return 0; } } else { if (! client_replica_maps_is_partition_queryable(ns, pid)) { return 0; } } bool create = (cf_ll_size(recl) == 0) ? true : false; as_index_keys_arr * keys_arr = NULL; if (!create) { cf_ll_element * ele = cf_ll_get_tail(recl); keys_arr = ((as_index_keys_ll_element*)ele)->keys_arr; if (keys_arr->num == AS_INDEX_KEYS_PER_ARR) { create = true; } } if (create) { keys_arr = as_index_get_keys_arr(); if (!keys_arr) { cf_warning(AS_SINDEX, "Fail to allocate sindex key value array"); return -1; } as_index_keys_ll_element * node = cf_malloc(sizeof(as_index_keys_ll_element)); node->keys_arr = keys_arr; cf_ll_append(recl, (cf_ll_element *)node); } // Copy the digest (value) memcpy(&keys_arr->pindex_digs[keys_arr->num], dig, CF_DIGEST_KEY_SZ); // Copy the key if (C_IS_DG(imd->sktype)) { memcpy(&keys_arr->sindex_keys[keys_arr->num].key.str_key, &key->y, CF_DIGEST_KEY_SZ); } else { keys_arr->sindex_keys[keys_arr->num].key.int_key = key->l; } keys_arr->num++; *n_bdigs = *n_bdigs + 1; return 0; }
static long build_defrag_list_from_arr(as_namespace *ns, ai_obj *acol, ai_arr *arr, long nofst, long *limit, uint64_t * tot_found, cf_ll *gc_list) { long found = 0; long processed = 0; uint64_t validation_time_ns = 0; for (int i = nofst; i < arr->used; i++) { SET_TIME_FOR_SINDEX_GC_HIST(validation_time_ns); int ret = as_sindex_can_defrag_record(ns, (cf_digest *) &arr->data[i * CF_DIGEST_KEY_SZ]); SINDEX_GC_HIST_INSERT_DATA_POINT(sindex_gc_validate_obj_hist, validation_time_ns); validation_time_ns = 0; if (ret == AS_SINDEX_GC_SKIP_ITERATION) { *limit = 0; break; } else if (ret == AS_SINDEX_GC_OK) { bool create = (cf_ll_size(gc_list) == 0) ? true : false; objs_to_defrag_arr *dt; if (!create) { cf_ll_element * ele = cf_ll_get_tail(gc_list); dt = ((ll_sindex_gc_element*)ele)->objs_to_defrag; if (dt->num == SINDEX_GC_NUM_OBJS_PER_ARR) { create = true; } } if (create) { dt = as_sindex_gc_get_defrag_arr(); if (!dt) { *tot_found += found; return -1; } ll_sindex_gc_element * node; node = cf_malloc(sizeof(ll_sindex_gc_element)); node->objs_to_defrag = dt; cf_ll_append(gc_list, (cf_ll_element *)node); } memcpy(&(dt->acol_digs[dt->num].dig), (cf_digest *) &arr->data[i * CF_DIGEST_KEY_SZ], CF_DIGEST_KEY_SZ); ai_objClone(&(dt->acol_digs[dt->num].acol), acol); dt->num += 1; found++; } processed++; (*limit)--; if (*limit == 0) { break; } } *tot_found += found; return processed; }
static void as_uv_command_write_complete(uv_write_t* req, int status) { if (!as_uv_connection_alive(req->handle)) { return; } as_event_command* cmd = req->data; if (status == 0) { cmd->len = sizeof(as_proto); cmd->pos = 0; cmd->state = AS_ASYNC_STATE_READ_HEADER; if (cmd->pipe_listener != NULL) { as_pipe_read_start(cmd); as_pipe_connection* conn = (as_pipe_connection*)cmd->conn; // There already was an active reader for a previous command. if (cf_ll_size(&conn->readers) > 1) { return; } } status = uv_read_start(req->handle, as_uv_command_buffer, as_uv_command_read); if (status) { as_error err; as_error_update(&err, AEROSPIKE_ERR_ASYNC_CONNECTION, "uv_read_start failed: %s", uv_strerror(status)); as_event_socket_error(cmd, &err); } } else if (status != UV_ECANCELED) { as_error err; as_error_update(&err, AEROSPIKE_ERR_ASYNC_CONNECTION, "Socket write failed: %s", uv_strerror(status)); as_event_socket_error(cmd, &err); } }
static void as_uv_command_read(uv_stream_t* stream, ssize_t nread, const uv_buf_t* buf) { if (!as_uv_connection_alive(stream)) { return; } as_event_command* cmd = as_uv_get_command(stream->data); if (nread < 0) { uv_read_stop(stream); as_error err; as_error_update(&err, AEROSPIKE_ERR_ASYNC_CONNECTION, "Socket read failed: %zd", nread); as_event_socket_error(cmd, &err); return; } cmd->pos += nread; if (cmd->pos < cmd->len) { // Read not finished. return; } if (cmd->state == AS_ASYNC_STATE_READ_HEADER) { as_proto* proto = (as_proto*)cmd->buf; as_proto_swap_from_be(proto); size_t size = proto->sz; cmd->len = (uint32_t)size; cmd->pos = 0; cmd->state = AS_ASYNC_STATE_READ_BODY; if (cmd->len < sizeof(as_msg)) { uv_read_stop(stream); as_error err; as_error_update(&err, AEROSPIKE_ERR_CLIENT, "Invalid record header size: %u", cmd->len); as_event_socket_error(cmd, &err); return; } if (cmd->len > cmd->capacity) { if (cmd->free_buf) { cf_free(cmd->buf); } cmd->buf = cf_malloc(size); cmd->capacity = cmd->len; cmd->free_buf = true; } return; } as_pipe_connection* conn_to_read = NULL; if (cmd->pipe_listener != NULL) { conn_to_read = (as_pipe_connection*)cmd->conn; if (cf_ll_size(&conn_to_read->readers) < 2) { conn_to_read = NULL; } } if (cmd->parse_results(cmd)) { uv_read_stop(stream); // Register the next reader, if there are readers left. if (conn_to_read != NULL) { stream->data = conn_to_read; int status = uv_read_start(stream, as_uv_command_buffer, as_uv_command_read); if (status) { as_error err; as_error_update(&err, AEROSPIKE_ERR_ASYNC_CONNECTION, "uv_read_start failed: %s", uv_strerror(status)); as_event_socket_error(cmd, &err); } } } else { // Batch, scan, query is not finished. cmd->len = sizeof(as_proto); cmd->pos = 0; cmd->state = AS_ASYNC_STATE_READ_HEADER; } }