as_bin * as_bin_create_from_buf(as_storage_rd *rd, byte *name, size_t namesz) { if (rd->ns->single_bin) { if (as_bin_inuse(rd->bins)) { cf_crash(AS_BIN, "single bin create found bin in use"); } as_bin_init_nameless(rd->bins); return rd->bins; } if (namesz >= AS_ID_BIN_SZ) { cf_warning(AS_BIN, "bin name too long (%lu)", namesz); return NULL; } as_bin *b = NULL; for (uint16_t i = 0; i < rd->n_bins; i++) { if (! as_bin_inuse(&rd->bins[i])) { b = &rd->bins[i]; break; } } if (b) { as_bin_init_w_len(rd->ns, b, name, namesz); } return b; }
// Does not check bin name length. as_bin * as_bin_create(as_storage_rd *rd, const char *name) { if (rd->ns->single_bin) { if (as_bin_inuse(rd->bins)) { cf_crash(AS_BIN, "single bin create found bin in use"); } as_bin_init_nameless(rd->bins); return rd->bins; } as_bin *b = NULL; for (uint16_t i = 0; i < rd->n_bins; i++) { if (! as_bin_inuse(&rd->bins[i])) { b = &rd->bins[i]; break; } } if (b) { as_bin_init(rd->ns, b, name); } return b; }
as_bin * as_bin_get_from_buf(as_storage_rd *rd, byte *name, size_t namesz) { if (rd->ns->single_bin) { return as_bin_inuse_has(rd) ? rd->bins : NULL; } uint32_t id; if (! as_bin_get_id_w_len(rd->ns, name, namesz, &id)) { return NULL; } for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { return b; } } return NULL; }
int32_t as_bin_get_index(as_storage_rd *rd, byte *name, size_t namesz) { if (rd->ns->single_bin) { return as_bin_inuse_has(rd) ? 0 : -1; } uint32_t id; if (! as_bin_get_id_from_name_buf(rd->ns, name, namesz, &id)) { return -1; } for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { return (int32_t)i; } } return -1; }
static uint16_t udf_record_numbins(const as_rec * rec) { int ret = udf_record_param_check(rec, UDF_BIN_NONAME, __FILE__, __LINE__); if (ret) { return 0; } udf_record *urecord = (udf_record *) as_rec_source(rec); if (urecord && (urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN)) { if (urecord->rd->ns->single_bin) { return 1; } uint16_t i; as_storage_rd *rd = urecord->rd; for (i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } } return i; } else { cf_warning(AS_UDF, "Error in getting numbins: no record found"); return 0; } }
int32_t as_bin_get_index(as_storage_rd *rd, const char *name) { if (rd->ns->single_bin) { return as_bin_inuse_has(rd) ? 0 : -1; } uint32_t id; if (cf_vmapx_get_index(rd->ns->p_bin_name_vmap, name, &id) != CF_VMAPX_OK) { return -1; } for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { return (int32_t)i; } } return -1; }
// - Seems like an as_storage_record method, but leaving it here for now. // - sets rd->bins! // - for data-in-memory, assumes rd->n_bins is already set! bool as_bin_get_and_size_all(as_storage_rd *rd, as_bin *stack_bins) { if (rd->ns->storage_data_in_memory) { rd->bins = rd->ns->single_bin ? as_index_get_single_bin(rd->r) : safe_bins(rd->r); if (! rd->bins) { // i.e. multi-bin when record just created. return true; } if (rd->ns->storage_type != AS_STORAGE_ENGINE_SSD) { // Not interested in calculating flat-sizing info. return true; } // Calculate starting values for rd->n_bins_to_write and // rd->particles_flat_size. uint16_t i; for (i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { // i.e. single-bin when record just created. break; } size_t flat_size; if (0 != as_particle_get_flat_size(b, &flat_size)) { return false; } rd->particles_flat_size += flat_size; } rd->n_bins_to_write = (uint32_t)i; return true; } // Data NOT in-memory. rd->bins = stack_bins; as_bin_set_all_empty(rd); if (rd->record_on_device && ! rd->ignore_record_on_device) { // Sets rd->n_bins_to_write and rd->particles_flat_size: if (0 != as_storage_particle_read_and_size_all_ssd(rd)) { return false; } } return true; }
/* Internal Function: Workhorse function to send response back to the client * after UDF execution. * * caller: * send_success * send_failure * * Assumption: The call should be setup properly pointing to the tr. * * Special Handling: If it is background scan udf job do not sent any * response to client * If it is scan job ...do not cleanup the fd it will * be done by the scan thread after scan is finished */ static int send_response(udf_call *call, const char *key, size_t klen, int vtype, void *val, size_t vlen) { as_transaction * tr = call->transaction; as_namespace * ns = tr->rsv.ns; uint32_t generation = tr->generation; uint sp_sz = 1024 * 16; uint32_t void_time = 0; uint written_sz = 0; bool keep_fd = false; as_bin stack_bin; as_bin * bin = &stack_bin; // space for the stack particles uint8_t stack_particle_buf[sp_sz]; uint8_t * sp_p = stack_particle_buf; if (call->udf_type == AS_SCAN_UDF_OP_BACKGROUND) { // If we are doing a background UDF scan, do not send any result back cf_detail(AS_UDF, "UDF: Background transaction, send no result back. " "Parent job id [%"PRIu64"]", ((tscan_job*)(tr->udata.req_udata))->tid); if(strncmp(key, "FAILURE", 8) == 0) { cf_atomic_int_incr(&((tscan_job*)(tr->udata.req_udata))->n_obj_udf_failed); } else if(strncmp(key, "SUCCESS", 8) == 0) { cf_atomic_int_incr(&((tscan_job*)(tr->udata.req_udata))->n_obj_udf_success); } return 0; } else if(call->udf_type == AS_SCAN_UDF_OP_UDF) { // Do not release fd now, scan will do it at the end of all internal // udf transactions cf_detail(AS_UDF, "UDF: Internal udf transaction, do not release fd"); keep_fd = true; } if (0 != make_send_bin(ns, bin, &sp_p, sp_sz, key, klen, vtype, val, vlen)) { return(-1); } // this is going to release the file descriptor if (keep_fd && tr->proto_fd_h) cf_rc_reserve(tr->proto_fd_h); single_transaction_response( tr, ns, NULL/*ops*/, &bin, 1, generation, void_time, &written_sz, NULL); // clean up. // TODO: check: is bin_inuse valid only when data_in_memory? // There must be another way to determine if the particle is used? if ( as_bin_inuse(bin) ) { as_particle_destroy(&stack_bin, ns->storage_data_in_memory); } if (sp_p != stack_particle_buf) { cf_free(sp_p); } return 0; } // end send_response()
void as_bin_all_dump(as_storage_rd *rd, char *msg) { cf_info(AS_BIN, "bin dump: %s: new nbins %d", msg, rd->n_bins); for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; cf_info(AS_BIN, "bin %s: %d: bin %p inuse %d particle %p", msg, i, b, as_bin_inuse(b), b->particle); } }
void unwind_dim_single_bin(as_bin* old_bin, as_bin* new_bin) { if (as_bin_inuse(new_bin)) { as_bin_particle_destroy(new_bin, true); } as_single_bin_copy(new_bin, old_bin); }
/* as_bin_get_all_versions * Get all versions of a bin from the record * returns the number of bins found and also the bin pointers in the array * NB: You must be holding the value lock for the record! */ int as_bin_get_all_versions(as_storage_rd *rd, byte *name, size_t namesz, as_bin **curr_bins) { int n_curr_bins = 0; if (!curr_bins) return (0); if (rd->ns->single_bin) { // no name comparison for single bin namespaces as_bin *b = rd->bins; if (rd->n_bins == 1 && as_bin_inuse(b)) { curr_bins[n_curr_bins] = b; n_curr_bins++; } } else { uint32_t id; if (! as_bin_get_id_from_name_buf(rd->ns, name, namesz, &id)) { return 0; } for (uint16_t i = 0; i < rd->n_bins; i++) { /* Skip empty bins */ as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { curr_bins[n_curr_bins] = b; n_curr_bins++; if (n_curr_bins >= BIN_VERSION_MAX) { cf_debug(AS_RECORD, "bin: somehow got maximum versions: problem"); break; } } } } return (n_curr_bins); }
// Does not check bin name length. // Checks bin name quota - use appropriately. as_bin * as_bin_get_or_create(as_storage_rd *rd, const char *name) { if (rd->ns->single_bin) { if (! as_bin_inuse_has(rd)) { as_bin_init_nameless(rd->bins); } return rd->bins; } uint32_t id = (uint32_t)-1; uint16_t i; as_bin *b; if (cf_vmapx_get_index(rd->ns->p_bin_name_vmap, name, &id) == CF_VMAPX_OK) { for (i = 0; i < rd->n_bins; i++) { b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { return b; } } } else { if (cf_vmapx_count(rd->ns->p_bin_name_vmap) >= BIN_NAMES_QUOTA) { cf_warning(AS_BIN, "{%s} bin-name quota full - can't add new bin-name %s", rd->ns->name, name); return NULL; } i = as_bin_inuse_count(rd); } if (i >= rd->n_bins) { cf_crash(AS_BIN, "ran out of allocated bins in rd"); } b = &rd->bins[i]; if (id == (uint32_t)-1) { as_bin_init(rd->ns, b, name); } else { as_bin_init_nameless(b); b->id = (uint16_t)id; } return b; }
/* as_bin_create * Create a new bin with the specified name in a record * NB: You must be holding the value lock for the record! */ as_bin * as_bin_create(as_record *r, as_storage_rd *rd, byte *name, size_t namesz, uint version) { cf_detail(AS_BIN, "as_bin_create: %s %zu", name, namesz); // what policy should we make for too-large bins passed in? if (namesz > (AS_ID_BIN_SZ - 1)) { cf_warning(AS_RW, "WARNING: too large bin name %d passed in, internal error", namesz); return(NULL); } as_bin *b = 0; if (rd->ns->single_bin) { if (as_bin_inuse(rd->bins)) { cf_warning(AS_RW, "WARNING: cannot allocate more than 1 bin in a single bin namespace"); return (NULL); } // do not store bin name byte c = 0; as_bin_init(rd->ns, rd->bins, &c, 0, version); return (rd->bins); } // seek for an empty one for (uint16_t i = 0; i < rd->n_bins; i++) { if (! as_bin_inuse(&rd->bins[i])) { b = &rd->bins[i]; break; } } if (b) { as_bin_init(rd->ns, b, name, namesz, version); } return (b); }
void as_bin_destroy_from(as_storage_rd *rd, uint16_t from) { for (uint16_t i = from; i < rd->n_bins; i++) { if (! as_bin_inuse(&rd->bins[i])) { break; } as_particle_destroy(&rd->bins[i], rd->ns->storage_data_in_memory); } as_bin_set_empty_from(rd, from); }
// Destroy particles in specified bins. void as_record_destroy_bins_from(as_storage_rd *rd, uint16_t from) { for (uint16_t i = from; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { return; // no more used bins - there are never unused bin gaps } as_bin_particle_destroy(b, rd->ns->storage_data_in_memory); as_bin_set_empty(b); } }
uint32_t as_bin_get_particle_size(as_bin *b) { if (! as_bin_inuse(b)) return (0); as_particle *p = as_bin_get_particle(b); uint8_t type = as_bin_get_particle_type(b); uint32_t sz = 0; (void)g_particle_getter_table[type](p, 0, &sz); return(sz); }
uint16_t as_bin_inuse_count(as_storage_rd *rd) { uint16_t i; // TODO: rd NULL is an error condition, 0 is a valid value, change function semantics if (!rd) { return 0; } for (i = 0; i < rd->n_bins; i++) { if (! as_bin_inuse(&rd->bins[i])) { break; } } return (i); }
// TODO - old pickle - remove in "six months". // Flatten record's bins into "pickle" format for fabric. uint8_t * as_record_pickle(as_storage_rd *rd, size_t *len_r) { as_namespace *ns = rd->ns; uint32_t sz = 2; // always 2 bytes for number of bins uint16_t n_bins_in_use; for (n_bins_in_use = 0; n_bins_in_use < rd->n_bins; n_bins_in_use++) { as_bin *b = &rd->bins[n_bins_in_use]; if (! as_bin_inuse(b)) { break; } sz += 1; // for bin name length sz += ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(ns, b->id)); // for bin name sz += 1; // was for version - currently not used sz += as_bin_particle_pickled_size(b); } uint8_t *pickle = cf_malloc(sz); uint8_t *buf = pickle; (*(uint16_t *)buf) = cf_swap_to_be16(n_bins_in_use); // number of bins buf += 2; for (uint16_t i = 0; i < n_bins_in_use; i++) { as_bin *b = &rd->bins[i]; // Copy bin name, skipping a byte for name length. uint8_t name_len = (uint8_t)as_bin_memcpy_name(ns, buf + 1, b); *buf++ = name_len; // fill in bin name length buf += name_len; // skip past bin name *buf++ = 0; // was version - currently not used buf += as_bin_particle_to_pickled(b, buf); } *len_r = sz; return pickle; }
as_bin * as_bin_get_and_reserve_name(as_storage_rd *rd, byte *name, size_t namesz, bool *p_reserved, uint32_t *p_idx) { *p_reserved = true; if (rd->ns->single_bin) { return as_bin_inuse_has(rd) ? rd->bins : NULL; } char zname[namesz + 1]; memcpy(zname, name, namesz); zname[namesz] = 0; if (cf_vmapx_get_index(rd->ns->p_bin_name_vmap, zname, p_idx) != CF_VMAPX_OK) { if (cf_vmapx_count(rd->ns->p_bin_name_vmap) >= BIN_NAMES_QUOTA) { cf_warning(AS_BIN, "{%s} bin-name quota full - can't add new bin-name %s", rd->ns->name, zname); *p_reserved = false; } else { cf_vmapx_err result = cf_vmapx_put_unique(rd->ns->p_bin_name_vmap, zname, p_idx); if (! (result == CF_VMAPX_OK || result == CF_VMAPX_ERR_NAME_EXISTS)) { cf_warning(AS_BIN, "{%s} can't add new bin name %s, vmap err %d", rd->ns->name, zname, result); *p_reserved = false; } } return NULL; } for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == *p_idx) { return b; } } return NULL; }
as_bin * as_bin_get_by_id(as_storage_rd *rd, uint32_t id) { for (uint16_t i = 0; i < rd->n_bins; i++) { as_bin *b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { return b; } } return NULL; }
static int udf_record_bin_names(const as_rec *rec, as_rec_bin_names_callback callback, void * udata) { int ret = udf_record_param_check(rec, UDF_BIN_NONAME, __FILE__, __LINE__); if (ret) { return 1; } udf_record *urecord = (udf_record *)as_rec_source(rec); char * bin_names = NULL; if (urecord && (urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN)) { uint16_t nbins; if (urecord->rd->ns->single_bin) { nbins = 1; bin_names = alloca(1); *bin_names = 0; } else { nbins = urecord->rd->n_bins; bin_names = alloca(nbins * AS_ID_BIN_SZ); for (uint16_t i = 0; i < nbins; i++) { as_bin *b = &urecord->rd->bins[i]; if (! as_bin_inuse(b)) { nbins = i; break; } const char * name = as_bin_get_name_from_id(urecord->rd->ns, b->id); strcpy(bin_names + (i * AS_ID_BIN_SZ), name); } } callback(bin_names, nbins, AS_ID_BIN_SZ, udata); return 0; } else { cf_warning(AS_UDF, "Error in getting bin names: no record found"); bin_names = alloca(1); *bin_names = 0; callback(bin_names, 1, AS_ID_BIN_SZ, udata); return -1; } }
int record_apply_dim_single_bin(as_remote_record *rr, as_storage_rd *rd, bool *is_delete) { // TODO - old pickle - remove in "six months". if (rr->is_old_pickle) { return old_record_apply_dim_single_bin(rr, rd, is_delete); } as_namespace* ns = rr->rsv->ns; as_record* r = rd->r; rd->n_bins = 1; // Set rd->bins! as_storage_rd_load_bins(rd, NULL); // For memory accounting, note current usage. uint64_t memory_bytes = 0; // TODO - as_storage_record_get_n_bytes_memory() could check bins in use. if (as_bin_inuse(rd->bins)) { memory_bytes = as_storage_record_get_n_bytes_memory(rd); } uint16_t n_new_bins = rr->n_bins; if (n_new_bins > 1) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: single-bin got %u bins ", ns->name, n_new_bins); return AS_ERR_UNKNOWN; } // Keep old bin for unwinding. as_bin old_bin; as_single_bin_copy(&old_bin, rd->bins); // No stack new bin - simpler to operate directly on bin embedded in index. as_bin_set_empty(rd->bins); int result; // Fill the new bins and particles. if (n_new_bins == 1 && (result = as_flat_unpack_remote_bins(rr, rd->bins)) != 0) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: failed unpickle bin ", ns->name); unwind_dim_single_bin(&old_bin, rd->bins); return -result; } // Won't use to flatten, but needed to know if bins are in use. Amazingly, // rd->n_bins 0 ok adjusting memory stats. Also, rd->bins already filled. rd->n_bins = n_new_bins; // Apply changes to metadata in as_index needed for and writing. index_metadata old_metadata; update_index_metadata(rr, &old_metadata, r); // Write the record to storage. if ((result = as_record_write_from_pickle(rd)) < 0) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: failed write ", ns->name); unwind_index_metadata(&old_metadata, r); unwind_dim_single_bin(&old_bin, rd->bins); return -result; } // Cleanup - destroy old bin, can't unwind after. as_bin_particle_destroy(&old_bin, true); as_storage_record_adjust_mem_stats(rd, memory_bytes); *is_delete = n_new_bins == 0; return AS_OK; }
// TODO - old pickle - remove in "six months". int old_record_apply_dim_single_bin(as_remote_record *rr, as_storage_rd *rd, bool *is_delete) { as_namespace* ns = rr->rsv->ns; as_record* r = rd->r; rd->n_bins = 1; // Set rd->bins! as_storage_rd_load_bins(rd, NULL); // For memory accounting, note current usage. uint64_t memory_bytes = 0; // TODO - as_storage_record_get_n_bytes_memory() could check bins in use. if (as_bin_inuse(rd->bins)) { memory_bytes = as_storage_record_get_n_bytes_memory(rd); } uint16_t n_new_bins = cf_swap_from_be16(*(uint16_t *)rr->pickle); if (n_new_bins > 1) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: single-bin got %u bins ", ns->name, n_new_bins); return AS_ERR_UNKNOWN; } // Keep old bin intact for unwinding, clear record bin for incoming. as_bin old_bin; as_single_bin_copy(&old_bin, rd->bins); as_bin_set_empty(rd->bins); int result; // Fill the new bins and particles. if (n_new_bins == 1 && (result = unpickle_bins(rr, rd, NULL)) != 0) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: failed unpickle bin ", ns->name); unwind_dim_single_bin(&old_bin, rd->bins); return result; } // Apply changes to metadata in as_index needed for and writing. index_metadata old_metadata; update_index_metadata(rr, &old_metadata, r); // Write the record to storage. if ((result = as_record_write_from_pickle(rd)) < 0) { cf_warning_digest(AS_RECORD, rr->keyd, "{%s} record replace: failed write ", ns->name); unwind_index_metadata(&old_metadata, r); unwind_dim_single_bin(&old_bin, rd->bins); return -result; } // Cleanup - destroy old bin, can't unwind after. as_bin_particle_destroy(&old_bin, true); as_storage_record_adjust_mem_stats(rd, memory_bytes); *is_delete = n_new_bins == 0; return AS_OK; }
/* as_particle_set * Set the contents of a particle, which safely destroys the old particle */ as_particle * as_particle_frombuf(as_bin *b, as_particle_type type, byte *buf, uint32_t sz, uint8_t *stack_particle, bool data_in_memory) { #ifdef EXTRA_CHECKS // check the incoming type if (type < AS_PARTICLE_TYPE_NULL || type >= AS_PARTICLE_TYPE_MAX) { cf_info(AS_PARTICLE, "particle set: bad particle type %d, error", (int)type); return(NULL); } #endif as_particle *retval = 0; if (data_in_memory) { // we have to deal with these cases // current type is integer, new type is integer // current type is not integer, new type is integer // current type is integer, new type is not integer // current type is not integer, new type is not integer if (as_bin_is_integer(b)) { if (type == AS_PARTICLE_TYPE_INTEGER) { // current type is integer, new type is integer // just copy the new integer over the existing one. return (g_particle_setter_table[type](&b->iparticle, type, buf, sz, data_in_memory)); } else { // current type is integer, new type is not integer // make this the same case as current type is not integer, new type is not integer // cleanup the integer and allocate a pointer. b->particle = 0; } } else if (as_bin_inuse(b)) { // if it's a completely new type, destruct the old one and create a new one uint8_t bin_particle_type = as_bin_get_particle_type(b); if (type != bin_particle_type) { g_particle_destructor_table[bin_particle_type](b->particle); b->particle = 0; } } else { b->particle = 0; } } switch (type) { case AS_PARTICLE_TYPE_INTEGER: // current type is not integer, new type is integer as_bin_state_set(b, AS_BIN_STATE_INUSE_INTEGER); // use the iparticle embedded in the bin retval = g_particle_setter_table[type](&b->iparticle, type, buf, sz, data_in_memory); break; case AS_PARTICLE_TYPE_NULL: // special case, used to free old particle w/o setting new one break; default: // current type is not integer, new type is not integer if (! data_in_memory) { b->particle = (as_particle *)stack_particle; } if (as_particle_type_hidden(type)) { as_bin_state_set(b, AS_BIN_STATE_INUSE_HIDDEN); } else { as_bin_state_set(b, AS_BIN_STATE_INUSE_OTHER); } b->particle = g_particle_setter_table[type](b->particle, type, buf, sz, data_in_memory); retval = b->particle; break; } return(retval); }
transaction_status read_local(as_transaction* tr) { as_msg* m = &tr->msgp->msg; as_namespace* ns = tr->rsv.ns; as_index_ref r_ref; if (as_record_get(tr->rsv.tree, &tr->keyd, &r_ref) != 0) { read_local_done(tr, NULL, NULL, AS_ERR_NOT_FOUND); return TRANS_DONE_ERROR; } as_record* r = r_ref.r; // Check if it's an expired or truncated record. if (as_record_is_doomed(r, ns)) { read_local_done(tr, &r_ref, NULL, AS_ERR_NOT_FOUND); return TRANS_DONE_ERROR; } int result = repl_state_check(r, tr); if (result != 0) { if (result == -3) { read_local_done(tr, &r_ref, NULL, AS_ERR_UNAVAILABLE); return TRANS_DONE_ERROR; } // No response sent to origin. as_record_done(&r_ref, ns); return result == 1 ? TRANS_IN_PROGRESS : TRANS_WAITING; } // Check if it's a tombstone. if (! as_record_is_live(r)) { read_local_done(tr, &r_ref, NULL, AS_ERR_NOT_FOUND); return TRANS_DONE_ERROR; } as_storage_rd rd; as_storage_record_open(ns, r, &rd); // If configuration permits, allow reads to use page cache. rd.read_page_cache = ns->storage_read_page_cache; // Check the key if required. // Note - for data-not-in-memory "exists" ops, key check is expensive! if (as_transaction_has_key(tr) && as_storage_record_get_key(&rd) && ! check_msg_key(m, &rd)) { read_local_done(tr, &r_ref, &rd, AS_ERR_KEY_MISMATCH); return TRANS_DONE_ERROR; } if ((m->info1 & AS_MSG_INFO1_GET_NO_BINS) != 0) { tr->generation = r->generation; tr->void_time = r->void_time; tr->last_update_time = r->last_update_time; read_local_done(tr, &r_ref, &rd, AS_OK); return TRANS_DONE_SUCCESS; } if ((result = as_storage_rd_load_n_bins(&rd)) < 0) { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: failed as_storage_rd_load_n_bins() ", ns->name); read_local_done(tr, &r_ref, &rd, -result); return TRANS_DONE_ERROR; } as_bin stack_bins[ns->storage_data_in_memory ? 0 : rd.n_bins]; if ((result = as_storage_rd_load_bins(&rd, stack_bins)) < 0) { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: failed as_storage_rd_load_bins() ", ns->name); read_local_done(tr, &r_ref, &rd, -result); return TRANS_DONE_ERROR; } if (! as_bin_inuse_has(&rd)) { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: found record with no bins ", ns->name); read_local_done(tr, &r_ref, &rd, AS_ERR_UNKNOWN); return TRANS_DONE_ERROR; } uint32_t bin_count = (m->info1 & AS_MSG_INFO1_GET_ALL) != 0 ? rd.n_bins : m->n_ops; as_msg_op* ops[bin_count]; as_msg_op** p_ops = ops; as_bin* response_bins[bin_count]; uint16_t n_bins = 0; as_bin result_bins[bin_count]; uint32_t n_result_bins = 0; if ((m->info1 & AS_MSG_INFO1_GET_ALL) != 0) { p_ops = NULL; n_bins = rd.n_bins; as_bin_get_all_p(&rd, response_bins); } else { if (m->n_ops == 0) { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: bin op(s) expected, none present ", ns->name); read_local_done(tr, &r_ref, &rd, AS_ERR_PARAMETER); return TRANS_DONE_ERROR; } bool respond_all_ops = (m->info2 & AS_MSG_INFO2_RESPOND_ALL_OPS) != 0; as_msg_op* op = 0; int n = 0; while ((op = as_msg_op_iterate(m, op, &n)) != NULL) { if (op->op == AS_MSG_OP_READ) { as_bin* b = as_bin_get_from_buf(&rd, op->name, op->name_sz); if (b || respond_all_ops) { ops[n_bins] = op; response_bins[n_bins++] = b; } } else if (op->op == AS_MSG_OP_CDT_READ) { as_bin* b = as_bin_get_from_buf(&rd, op->name, op->name_sz); if (b) { as_bin* rb = &result_bins[n_result_bins]; as_bin_set_empty(rb); if ((result = as_bin_cdt_read_from_client(b, op, rb)) < 0) { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: failed as_bin_cdt_read_from_client() ", ns->name); destroy_stack_bins(result_bins, n_result_bins); read_local_done(tr, &r_ref, &rd, -result); return TRANS_DONE_ERROR; } if (as_bin_inuse(rb)) { n_result_bins++; ops[n_bins] = op; response_bins[n_bins++] = rb; } else if (respond_all_ops) { ops[n_bins] = op; response_bins[n_bins++] = NULL; } } else if (respond_all_ops) { ops[n_bins] = op; response_bins[n_bins++] = NULL; } } else { cf_warning_digest(AS_RW, &tr->keyd, "{%s} read_local: unexpected bin op %u ", ns->name, op->op); destroy_stack_bins(result_bins, n_result_bins); read_local_done(tr, &r_ref, &rd, AS_ERR_PARAMETER); return TRANS_DONE_ERROR; } } } cf_dyn_buf_define_size(db, 16 * 1024); if (tr->origin != FROM_BATCH) { db.used_sz = db.alloc_sz; db.buf = (uint8_t*)as_msg_make_response_msg(tr->result_code, r->generation, r->void_time, p_ops, response_bins, n_bins, ns, (cl_msg*)dyn_bufdb, &db.used_sz, as_transaction_trid(tr)); db.is_stack = db.buf == dyn_bufdb; // Note - not bothering to correct alloc_sz if buf was allocated. } else { tr->generation = r->generation; tr->void_time = r->void_time; tr->last_update_time = r->last_update_time; // Since as_batch_add_result() constructs response directly in shared // buffer to avoid extra copies, can't use db. send_read_response(tr, p_ops, response_bins, n_bins, NULL); } destroy_stack_bins(result_bins, n_result_bins); as_storage_record_close(&rd); as_record_done(&r_ref, ns); // Now that we're not under the record lock, send the message we just built. if (db.used_sz != 0) { send_read_response(tr, NULL, NULL, 0, &db); cf_dyn_buf_free(&db); tr->from.proto_fd_h = NULL; } return TRANS_DONE_SUCCESS; }
int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd, cf_buf_builder **bb_r, bool nobindata, char *nsname, bool use_sets, bool include_key, cf_vector *binlist) { // Sanity checks. Either rd should be there or nobindata and nsname should be present. if (!(rd || (nobindata && nsname))) { cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record."); return 0; } // figure out the size of the entire buffer int set_name_len = 0; const char *set_name = NULL; int ns_len = rd ? strlen(rd->ns->name) : strlen(nsname); if (use_sets && as_index_get_set_id(r) != INVALID_SET_ID) { as_namespace *ns = NULL; if (rd) { ns = rd->ns; } else if (nsname) { ns = as_namespace_get_byname(nsname); } if (!ns) { cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record."); return -1; } set_name = as_index_get_set_name(r, ns); if (set_name) { set_name_len = strlen(set_name); } } uint8_t* key = NULL; uint32_t key_size = 0; if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) { if (! as_storage_record_get_key(rd)) { cf_info(AS_PROTO, "can't get key - skipping record"); return -1; } key = rd->key; key_size = rd->key_size; } uint16_t n_fields = 2; int msg_sz = sizeof(as_msg); msg_sz += sizeof(as_msg_field) + sizeof(cf_digest); msg_sz += sizeof(as_msg_field) + ns_len; if (set_name) { n_fields++; msg_sz += sizeof(as_msg_field) + set_name_len; } if (key) { n_fields++; msg_sz += sizeof(as_msg_field) + key_size; } int list_bins = 0; int in_use_bins = 0; if (rd) { in_use_bins = as_bin_inuse_count(rd); } if (nobindata == false) { if(binlist) { int binlist_sz = cf_vector_size(binlist); for(uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname); as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname)); if (!p_bin) { cf_debug(AS_PROTO, "To be projected bin |%s| not found \n", binname); continue; } cf_debug(AS_PROTO, "Adding bin |%s| to projected bins |%s| \n", binname); list_bins++; msg_sz += sizeof(as_msg_op); msg_sz += rd->ns->single_bin ? 0 : strlen(binname); uint32_t psz; if (as_bin_is_hidden(p_bin)) { psz = 0; } else { as_particle_tobuf(p_bin, 0, &psz); // get size } msg_sz += psz; } } else { msg_sz += sizeof(as_msg_op) * in_use_bins; // the bin headers for (uint16_t i = 0; i < in_use_bins; i++) { as_bin *p_bin = &rd->bins[i]; msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id)); uint32_t psz; if (as_bin_is_hidden(p_bin)) { psz = 0; } else { as_particle_tobuf(p_bin, 0, &psz); // get size } msg_sz += psz; } } } uint8_t *b; cf_buf_builder_reserve(bb_r, msg_sz, &b); // set up the header uint8_t *buf = b; as_msg *msgp = (as_msg *) buf; msgp->header_sz = sizeof(as_msg); msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0); msgp->info2 = 0; msgp->info3 = 0; msgp->unused = 0; msgp->result_code = 0; msgp->generation = r->generation; msgp->record_ttl = r->void_time; msgp->transaction_ttl = 0; msgp->n_fields = n_fields; if (rd) { if (binlist) msgp->n_ops = list_bins; else msgp->n_ops = in_use_bins; } else { msgp->n_ops = 0; } as_msg_swap_header(msgp); buf += sizeof(as_msg); as_msg_field *mf = (as_msg_field *) buf; mf->field_sz = sizeof(cf_digest) + 1; mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE; if (rd) { memcpy(mf->data, &rd->keyd, sizeof(cf_digest)); } else { memcpy(mf->data, &r->key, sizeof(cf_digest)); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + sizeof(cf_digest); mf = (as_msg_field *) buf; mf->field_sz = ns_len + 1; mf->type = AS_MSG_FIELD_TYPE_NAMESPACE; if (rd) { memcpy(mf->data, rd->ns->name, ns_len); } else { memcpy(mf->data, nsname, ns_len); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + ns_len; if (set_name) { mf = (as_msg_field *) buf; mf->field_sz = set_name_len + 1; mf->type = AS_MSG_FIELD_TYPE_SET; memcpy(mf->data, set_name, set_name_len); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + set_name_len; } if (key) { mf = (as_msg_field *) buf; mf->field_sz = key_size + 1; mf->type = AS_MSG_FIELD_TYPE_KEY; memcpy(mf->data, key, key_size); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + key_size; } if (nobindata) { goto Out; } if(binlist) { int binlist_sz = cf_vector_size(binlist); for(uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname); as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname)); if (!p_bin) // should it be checked before ??? continue; as_msg_op *op = (as_msg_op *)buf; buf += sizeof(as_msg_op); op->op = AS_MSG_OP_READ; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin); buf += op->name_sz; // Since there are two variable bits, the size is everything after // the data bytes - and this is only the head, we're patching up // the rest in a minute. op->op_sz = 4 + op->name_sz; if (as_bin_inuse(p_bin)) { op->particle_type = as_particle_type_convert(as_bin_get_particle_type(p_bin)); op->version = as_bin_get_version(p_bin, rd->ns->single_bin); uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety if (as_bin_is_hidden(p_bin)) { op->particle_type = AS_PARTICLE_TYPE_NULL; psz = 0; } else { if (0 != as_particle_tobuf(p_bin, buf, &psz)) { cf_warning(AS_PROTO, "particle to buf: could not copy data!"); } } buf += psz; op->op_sz += psz; } else { cf_debug(AS_PROTO, "Whoops !! bin not in use"); op->particle_type = AS_PARTICLE_TYPE_NULL; } as_msg_swap_op(op); } } else { // over all bins, copy into the buffer for (uint16_t i = 0; i < in_use_bins; i++) { as_msg_op *op = (as_msg_op *)buf; buf += sizeof(as_msg_op); op->op = AS_MSG_OP_READ; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]); buf += op->name_sz; // Since there are two variable bits, the size is everything after // the data bytes - and this is only the head, we're patching up // the rest in a minute. op->op_sz = 4 + op->name_sz; if (as_bin_inuse(&rd->bins[i])) { op->particle_type = as_particle_type_convert(as_bin_get_particle_type(&rd->bins[i])); op->version = as_bin_get_version(&rd->bins[i], rd->ns->single_bin); uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety if (as_bin_is_hidden(&rd->bins[i])) { op->particle_type = AS_PARTICLE_TYPE_NULL; psz = 0; } else { if (0 != as_particle_tobuf(&rd->bins[i], buf, &psz)) { cf_warning(AS_PROTO, "particle to buf: could not copy data!"); } } buf += psz; op->op_sz += psz; } else { op->particle_type = AS_PARTICLE_TYPE_NULL; } as_msg_swap_op(op); } } Out: return(0); }
cl_msg * as_msg_make_response_msg( uint32_t result_code, uint32_t generation, uint32_t void_time, as_msg_op **ops, as_bin **bins, uint16_t bin_count, as_namespace *ns, cl_msg *msgp_in, size_t *msg_sz_in, uint64_t trid, const char *setname) { int setname_len = 0; // figure out the size of the entire buffer int msg_sz = sizeof(cl_msg); msg_sz += sizeof(as_msg_op) * bin_count; // the bin headers for (uint16_t i = 0; i < bin_count; i++) { if (bins[i]) { msg_sz += ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(ns, bins[i]->id)); uint32_t psz; if (as_bin_is_hidden(bins[i])) { psz = 0; } else { bool tojson = (as_bin_get_particle_type(bins[i]) == AS_PARTICLE_TYPE_LUA_BLOB); _as_particle_tobuf(bins[i], 0, &psz, tojson); // get size } msg_sz += psz; } else if (ops[i]) // no bin, only op, no particle size msg_sz += ops[i]->name_sz; else cf_warning(AS_PROTO, "internal error!"); } //If a transaction-id is sent by the client, we should send it back in a field if (trid != 0) { msg_sz += (sizeof(as_msg_field) + sizeof(trid)); } // If setname is present, we will send it as a field. Account for its space overhead. if (setname != 0) { setname_len = strlen(setname); msg_sz += (sizeof(as_msg_field) + setname_len); } // most cases are small messages - try to stack alloc if we can byte *b; if ((0 == msgp_in) || (*msg_sz_in < msg_sz)) { b = cf_malloc(msg_sz); if (!b) return(0); } else { b = (byte *) msgp_in; } *msg_sz_in = msg_sz; // set up the header byte *buf = b; // current buffer pointer cl_msg *msgp = (cl_msg *) buf; msgp->proto.version = PROTO_VERSION; msgp->proto.type = PROTO_TYPE_AS_MSG; msgp->proto.sz = msg_sz - sizeof(as_proto); as_proto_swap(&msgp->proto); as_msg *m = &msgp->msg; m->header_sz = sizeof(as_msg); m->info1 = 0; m->info2 = 0; m->info3 = 0; m->unused = 0; m->result_code = result_code; m->generation = generation; m->record_ttl = void_time; m->transaction_ttl = 0; m->n_ops = bin_count; m->n_fields = 0; // Count the number of fields that we are going to send back if (trid != 0) { m->n_fields++; } if (setname != NULL) { m->n_fields++; } as_msg_swap_header(m); buf += sizeof(cl_msg); //If we have to send back the transaction-id, we have fields to send back if (trid != 0) { as_msg_field *trfield = (as_msg_field *) buf; //Allow space for the message field header buf += sizeof(as_msg_field); //Fill the field header trfield->type = AS_MSG_FIELD_TYPE_TRID; //Copy the transaction-id as field data in network byte order (big-endian) uint64_t trid_nbo = __cpu_to_be64(trid); trfield->field_sz = sizeof(trid_nbo); memcpy(trfield->data, &trid_nbo, sizeof(trid_nbo)); as_msg_swap_field(trfield); //Allow space for the message field data buf += sizeof(trid_nbo); } // If we have to send back the setname, we have fields to send back if (setname != NULL) { as_msg_field *trfield = (as_msg_field *) buf; // Allow space for the message field header buf += sizeof(as_msg_field); // Fill the field header trfield->type = AS_MSG_FIELD_TYPE_SET; trfield->field_sz = setname_len + 1; memcpy(trfield->data, setname, setname_len); as_msg_swap_field(trfield); // Allow space for the message field data buf += setname_len; } // over all bins, copy into the buffer for (uint16_t i = 0; i < bin_count; i++) { as_msg_op *op = (as_msg_op *)buf; buf += sizeof(as_msg_op); op->op = AS_MSG_OP_READ; if (bins[i]) { op->version = as_bin_get_version(bins[i], ns->single_bin); op->name_sz = as_bin_memcpy_name(ns, op->name, bins[i]); } else { op->version = 0; memcpy(op->name, ops[i]->name, ops[i]->name_sz); op->name_sz = ops[i]->name_sz; } buf += op->name_sz; // cf_detail(AS_PROTO, "make response: bin %d %s : version %d",i,bins[i]->name,op->version); // Since there are two variable bits, the size is everything after the // data bytes - and this is only the head, we're patching up the rest // in a minute. op->op_sz = 4 + op->name_sz; if (bins[i] && as_bin_inuse(bins[i])) { op->particle_type = as_particle_type_convert(as_bin_get_particle_type(bins[i])); uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety if (as_bin_is_hidden(bins[i])) { op->particle_type = AS_PARTICLE_TYPE_NULL; psz = 0; // packet of size NULL } else { bool tojson = (as_bin_get_particle_type(bins[i]) == AS_PARTICLE_TYPE_LUA_BLOB); if (0 != _as_particle_tobuf(bins[i], buf, &psz, tojson)) { cf_warning(AS_PROTO, "particle to buf: could not copy data!"); } } buf += psz; op->op_sz += psz; } else { op->particle_type = AS_PARTICLE_TYPE_NULL; } as_msg_swap_op(op); } return((cl_msg *) b); }
// Does not check bin name length. // Checks bin name quota and bin-level policy - use appropriately. as_bin * as_bin_get_or_create_from_buf(as_storage_rd *rd, byte *name, size_t namesz, bool create_only, bool replace_only, int *p_result) { if (rd->ns->single_bin) { if (! as_bin_inuse_has(rd)) { as_bin_init_nameless(rd->bins); } // Ignored bin-level policy - single-bin needs only record-level policy. return rd->bins; } uint32_t id = (uint32_t)-1; uint16_t i; as_bin *b; if (cf_vmapx_get_index_w_len(rd->ns->p_bin_name_vmap, (const char *)name, namesz, &id) == CF_VMAPX_OK) { for (i = 0; i < rd->n_bins; i++) { b = &rd->bins[i]; if (! as_bin_inuse(b)) { break; } if ((uint32_t)b->id == id) { if (as_bin_is_hidden(b)) { cf_warning(AS_BIN, "cannot manipulate hidden bin directly"); *p_result = AS_PROTO_RESULT_FAIL_INCOMPATIBLE_TYPE; return NULL; } if (create_only) { *p_result = AS_PROTO_RESULT_FAIL_BIN_EXISTS; return NULL; } return b; } } } else { if (cf_vmapx_count(rd->ns->p_bin_name_vmap) >= BIN_NAMES_QUOTA) { char zname[namesz + 1]; memcpy(zname, name, namesz); zname[namesz] = 0; cf_warning(AS_BIN, "{%s} bin-name quota full - can't add new bin-name %s", rd->ns->name, zname); *p_result = AS_PROTO_RESULT_FAIL_BIN_NAME; return NULL; } i = as_bin_inuse_count(rd); } if (replace_only) { *p_result = AS_PROTO_RESULT_FAIL_BIN_NOT_FOUND; return NULL; } if (i >= rd->n_bins) { cf_crash(AS_BIN, "ran out of allocated bins in rd"); } b = &rd->bins[i]; if (id == (uint32_t)-1) { as_bin_init_w_len(rd->ns, b, name, namesz); } else { as_bin_init_nameless(b); b->id = (uint16_t)id; } return b; }