/* * Internal Function: To create new chunk record * * Parameters: * lr : Parent ldt record * * Return value : * crec (as_val) in case of success * NULL in case of failure * * Description: * 1. Search for empty chunk slot. * 2. Read the record into it * * Callers: * ldt_aerospike_crec_create */ as_rec * ldt_crec_create(ldt_record *lrecord) { // Generate Key Digest udf_record *h_urecord = (udf_record *) as_rec_source(lrecord->h_urec); cf_digest keyd = h_urecord->r_ref->r->key; cf_detail(AS_LDT, "ldt_aerospike_crec_create %"PRIx64"", *(uint64_t *)&keyd); as_ldt_digest_randomizer(h_urecord->tr->rsv.ns, &keyd); as_ldt_subdigest_setversion(&keyd, lrecord->version); // Setup Chunk int slot = ldt_crec_find_freeslot(lrecord); if (slot == -1) { cf_warning(AS_LDT, "ldt_crec_create: Cannot open more than (%d) records in a single UDF", s_max_open_subrecs); return NULL; } cf_detail(AS_LDT, "ldt_crec_create: Popped slot %d", slot); lrecord->chunk[slot].slot = slot; ldt_chunk *lchunk = &lrecord->chunk[slot]; ldt_chunk_init (lchunk, lrecord); ldt_chunk_setup(lchunk, lrecord->h_urec, &keyd); // Create Record int rv = as_aerospike_rec_create(lrecord->as, lchunk->c_urec_p); if (rv < 0) { // Mark Slot as free ldt_chunk_destroy(&lrecord->chunk[slot]); cf_warning(AS_LDT, "ldt_crec_create: Record Create Failed rv=%d ... ", rv); return NULL; } cf_debug_digest(AS_LDT, &(lchunk->c_urecord.keyd), "Crec Create:Ptr(%p) Digest: ", lchunk->c_urec_p); as_val_reserve(lchunk->c_urec_p); return lchunk->c_urec_p; }
/* * Internal Function: Function to open chunk record * * Parameters: * lrd : Parent ldt record * keyd : Key digest for the record to be opened * slot(out): Filled with slot in case of success * * Return value : * 0 in case of success returns positive slot value * -1 in case record is already open * -2 in case free slot cannot be found * -3 in case record cannot be opened * * Description: * 1. Get the empty chunk slot. * 2. Read the record into it * * Callers: * ldt_aerospike_crec_open */ int ldt_crec_open(ldt_record *lrecord, cf_digest *keyd, int *slotp) { cf_debug_digest(AS_LDT, keyd, "[ENTER] ldt_crec_open(): Digest: "); // 1. Search in opened record int slot = ldt_crec_find_digest(lrecord, keyd); if (slot != -1) { cf_info(AS_LDT, "ldt_aerospike_rec_open : Found already open"); return 0; } // 2. Find free slot and setup chunk slot = ldt_crec_find_freeslot(lrecord); if (slot == -1) { cf_warning(AS_LDT, "Cannot open more than (%d) records in a single UDF", s_max_open_subrecs); return -2; } cf_detail(AS_LDT, "ldt_crec_open popped slot %d", slot); lrecord->chunk[slot].slot = slot; ldt_chunk *lchunk = &lrecord->chunk[slot]; ldt_chunk_init(lchunk, lrecord); ldt_chunk_setup(lchunk, lrecord->h_urec, keyd); //ldt_chunk_print(lrecord, slot); // Open Record int rv = udf_record_open((udf_record *)as_rec_source(lchunk->c_urec_p)); if (rv) { // Open the slot for reuse ldt_chunk_destroy(&lrecord->chunk[slot]); return -3; } *slotp = slot; return 0; }
/* * Main routine to replicate the chunks of LDT objects. The LDT directory rec * is not replicated using this function. This function is called for each chunk * that got updated as part of the single LDT operation. Note that in a single * LDT operation, there can be only few chunks that change. i.e chunks in one * path of the tree structure. * * Assumption: * 1. All records should have been closed. * 2. Pickled buf for all the record and subrecord which needs shipping should have * been filled. * * Function: * * 1. Walk through each sub record and use its pickled buf to create * RW_OP_WRITE. Pack it in the buffer and push it into the RW_MULTI_OP * packet. * 2. This function packs entire pickled buf into the message that is one extra * allocation into the multi-op over the fabric. The message hangs from the * wr for the parent record for the retransmit */ int ldt_record_pickle(ldt_record *lrecord, uint8_t ** pickled_buf, size_t * pickled_sz, uint32_t * pickled_void_time) { cf_detail(AS_LDT, "Enter: MULTI_OP: Packing LDT record"); udf_record *h_urecord = as_rec_source(lrecord->h_urec); as_transaction *h_tr = h_urecord->tr; // Do an early check if we need to replicate to other nodes. In cases like // single-replica or single-node we don't need to do any replication. cf_node dest_nodes_tmp[AS_CLUSTER_SZ]; memset(dest_nodes_tmp, 0, sizeof(dest_nodes_tmp)); int listsz = as_partition_getreplica_readall(h_tr->rsv.ns, h_tr->rsv.pid, dest_nodes_tmp); if (listsz == 0) { return 0; } bool is_delete = (h_urecord->pickled_buf) ? false : true; int ret = 0; int ops = 0; // TODO: change hard coded 7 to meaningful constant. msg *m[7]; memset(m, 0, 7 * sizeof(msg *)); if (is_delete) { *pickled_buf = 0; *pickled_sz = 0; } else { size_t sz = 0; size_t buflen = 0; m[ops] = as_fabric_msg_get(M_TYPE_RW); if (!m[ops]) { ret = -3; goto Out; } if (!is_delete && h_urecord->pickled_buf) { cf_detail(AS_LDT, "MULTI_OP: Packing LDT Head Record"); rw_msg_setup(m[ops], h_tr, &h_tr->keyd, &h_urecord->pickled_buf, h_urecord->pickled_sz, h_urecord->pickled_void_time, &h_urecord->pickled_rec_props, RW_OP_WRITE, h_urecord->ldt_rectype_bits, true); buflen = 0; msg_fillbuf(m[ops], NULL, &buflen); sz += buflen; ops++; } // This macro is a for-loop thru the SR list and a test for valid SR entry FOR_EACH_SUBRECORD(i, lrecord) { udf_record *c_urecord = &lrecord->chunk[i].c_urecord; is_delete = (c_urecord->pickled_buf) ? false : true; as_transaction *c_tr = c_urecord->tr; if ( ((!c_urecord->pickled_buf) || (c_urecord->pickled_sz <= 0)) && !is_delete ) { cf_warning(AS_RW, "Got an empty pickled buf while trying to " " replicate record with digest %"PRIx64" %p, %d, %d", (uint64_t *)&c_tr->keyd, pickled_buf, pickled_sz, is_delete); ret = -2; goto Out; } // if pickled_buf is there then it is a write operation if (!is_delete && c_urecord->pickled_buf) { cf_detail(AS_LDT, "MULTI_OP: Packing LDT SUB Record"); m[ops] = as_fabric_msg_get(M_TYPE_RW); if (!m[ops]) { ret = -3; goto Out; } rw_msg_setup(m[ops], c_tr, &c_tr->keyd, &c_urecord->pickled_buf, c_urecord->pickled_sz, c_urecord->pickled_void_time, &c_urecord->pickled_rec_props, RW_OP_WRITE, c_urecord->ldt_rectype_bits, true); buflen = 0; msg_fillbuf(m[ops], NULL, &buflen); sz += buflen; ops++; } } if (sz) { uint8_t *buf = cf_malloc(sz); if (!buf) { pickled_sz = 0; *pickled_buf = NULL; ret = -1; goto Out; } *pickled_buf = buf; *pickled_sz = sz; int rsz = sz; sz = 0; for (int i = 0; i < ops; i++) { sz = rsz - sz; ret = msg_fillbuf(m[i], buf, &sz); buf += sz; } *pickled_void_time = 0; } }
/** * aerospike::create(record) * Function: udf_aerospike_rec_create * * Parameters: * as - as_aerospike * rec - as_rec * * Return Values: * 1 if record is being read or on a create, it already exists * o/w return value of udf_aerospike__execute_updates * * Description: * Create a new record in local storage. * The record will only be created if it does not exist. * This assumes the record has a digest that is valid for local storage. * * Synchronization : object lock acquired by the transaction thread executing UDF. * Partition reservation takes place just before the transaction starts executing * ( look for as_partition_reserve_udf in thr_tsvc.c ) * * Callers: * lua interfacing function, mod_lua_aerospike_rec_create * The return value of udf_aerospike_rec_create is pushed on to the lua stack * * Notes: * The 'read' and 'exists' flag of udf_record are set to true. */ static int udf_aerospike_rec_create(const as_aerospike * as, const as_rec * rec) { int ret = udf_aerospike_param_check(as, rec, __FILE__, __LINE__); if (ret) { return ret; } udf_record * urecord = (udf_record *) as_rec_source(rec); // make sure record isn't already successfully read if (urecord->flag & UDF_RECORD_FLAG_OPEN) { cf_detail(AS_UDF, "udf_aerospike_rec_create: Record Already Exists"); return 1; } as_transaction *tr = urecord->tr; as_index_ref *r_ref = urecord->r_ref; as_storage_rd *rd = urecord->rd; as_index_tree *tree = tr->rsv.tree; if (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) { tree = tr->rsv.sub_tree; } // make sure we got the record as a create int rv = as_record_get_create(tree, &tr->keyd, r_ref, tr->rsv.ns); cf_detail_digest(AS_UDF, &tr->keyd, "Creating %sRecord", (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "Sub" : ""); // rv 0 means record exists, 1 means create, < 0 means fail // TODO: Verify correct result codes. if (rv == 0) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Record Already Exists 2"); as_record_done(r_ref, tr->rsv.ns); bzero(r_ref, sizeof(as_index_ref)); return 1; } else if (rv < 0) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Record Open Failed with rv=%d", rv); return rv; } // Associates the set name with the storage rec and index if(tr->msgp) { // Set the set name to index and close record if the setting the set name // is not successful int rv_set = as_record_set_set_from_msg(r_ref->r, tr->rsv.ns, &tr->msgp->msg); if (rv_set != 0) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Failed to set setname"); as_record_done(r_ref, tr->rsv.ns); // TODO bzero is expensive. Switch to use flag. bzero(r_ref, sizeof(as_index_ref)); return 4; } } urecord->flag |= UDF_RECORD_FLAG_OPEN; cf_detail(AS_UDF, "Open %p %x %"PRIx64"", urecord, urecord->flag, *(uint64_t *)&tr->keyd); as_index *r = r_ref->r; // open up storage as_storage_record_create(urecord->tr->rsv.ns, urecord->r_ref->r, urecord->rd, &urecord->tr->keyd); cf_detail(AS_UDF, "as_storage_record_create: udf_aerospike_rec_create: r %p rd %p", urecord->r_ref->r, urecord->rd); // if multibin storage, we will use urecord->stack_bins, so set the size appropriately if ( ! rd->ns->storage_data_in_memory && ! rd->ns->single_bin ) { rd->n_bins = sizeof(urecord->stack_bins) / sizeof(as_bin); } // side effect: will set the unused bins to properly unused rd->bins = as_bin_get_all(r, rd, urecord->stack_bins); urecord->flag |= UDF_RECORD_FLAG_STORAGE_OPEN; // If the message has a key, apply it to the record. as_msg_field* f = as_msg_field_get(&tr->msgp->msg, AS_MSG_FIELD_TYPE_KEY); if (f) { rd->key_size = as_msg_field_get_value_sz(f); rd->key = f->data; } cf_detail(AS_UDF, "Storage Open %p %x %"PRIx64"", urecord, urecord->flag, *(uint64_t *)&tr->keyd); cf_detail(AS_UDF, "udf_aerospike_rec_create: Record created %d", urecord->flag); int rc = udf_aerospike__execute_updates(urecord); if(rc) { // Creating the udf record failed, destroy the as_record if (!as_bin_inuse_has(urecord->rd)) { udf_aerospike_rec_remove(as, rec); } } return rc; }
/** * aerospike::create(record) * Function: udf_aerospike_rec_create * * Parameters: * as - as_aerospike * rec - as_rec * * Return Values: * 1 if record is being read or on a create, it already exists * o/w return value of udf_aerospike__execute_updates * * Description: * Create a new record in local storage. * The record will only be created if it does not exist. * This assumes the record has a digest that is valid for local storage. * * Synchronization : object lock acquired by the transaction thread executing UDF. * Partition reservation takes place just before the transaction starts executing * ( look for as_partition_reserve_udf in thr_tsvc.c ) * * Callers: * lua interfacing function, mod_lua_aerospike_rec_create * The return value of udf_aerospike_rec_create is pushed on to the lua stack * * Notes: * The 'read' and 'exists' flag of udf_record are set to true. */ static int udf_aerospike_rec_create(const as_aerospike * as, const as_rec * rec) { int ret = udf_aerospike_param_check(as, rec, __FILE__, __LINE__); if (ret) { return ret; } udf_record * urecord = (udf_record *) as_rec_source(rec); // make sure record isn't already successfully read if (urecord->flag & UDF_RECORD_FLAG_OPEN) { cf_detail(AS_UDF, "udf_aerospike_rec_create: Record Already Exists"); return 1; } as_transaction *tr = urecord->tr; as_index_ref *r_ref = urecord->r_ref; as_storage_rd *rd = urecord->rd; as_index_tree *tree = tr->rsv.tree; bool is_subrec = false; if (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) { tree = tr->rsv.sub_tree; is_subrec = true; } // make sure we got the record as a create bool is_create = false; int rv = as_record_get_create(tree, &tr->keyd, r_ref, tr->rsv.ns, is_subrec); cf_detail_digest(AS_UDF, &tr->keyd, "Creating %sRecord", (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "Sub" : ""); // rv 0 means record exists, 1 means create, < 0 means fail // TODO: Verify correct result codes. if (rv == 1) { is_create = true; } else if (rv == 0) { // If it's an expired record, pretend it's a fresh create. if (as_record_is_expired(r_ref->r)) { as_record_destroy(r_ref->r, tr->rsv.ns); as_record_initialize(r_ref, tr->rsv.ns); cf_atomic_int_incr(&tr->rsv.ns->n_objects); is_create = true; } else { cf_warning(AS_UDF, "udf_aerospike_rec_create: Record Already Exists 2"); as_record_done(r_ref, tr->rsv.ns); // DO NOT change it has special meaning for caller return 1; } } else if (rv < 0) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Record Open Failed with rv=%d", rv); return rv; } // Associates the set name with the storage rec and index if (tr->msgp) { // Set the set name to index and close record if the setting the set name // is not successful int rv_set = as_transaction_has_set(tr) ? as_record_set_set_from_msg(r_ref->r, tr->rsv.ns, &tr->msgp->msg) : 0; if (rv_set != 0) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Failed to set setname"); if (is_create) { as_index_delete(tree, &tr->keyd); } as_record_done(r_ref, tr->rsv.ns); return 4; } } urecord->flag |= UDF_RECORD_FLAG_OPEN; cf_detail(AS_UDF, "Open %p %x %"PRIx64"", urecord, urecord->flag, *(uint64_t *)&tr->keyd); as_index *r = r_ref->r; // open up storage as_storage_record_create(urecord->tr->rsv.ns, urecord->r_ref->r, urecord->rd, &urecord->tr->keyd); cf_detail(AS_UDF, "as_storage_record_create: udf_aerospike_rec_create: r %p rd %p", urecord->r_ref->r, urecord->rd); // If the message has a key, apply it to the record. if (! get_msg_key(tr, rd)) { cf_warning(AS_UDF, "udf_aerospike_rec_create: Can't store key"); if (is_create) { as_index_delete(tree, &tr->keyd); } as_record_done(r_ref, tr->rsv.ns); urecord->flag &= ~UDF_RECORD_FLAG_OPEN; return 4; } // if multibin storage, we will use urecord->stack_bins, so set the size appropriately if ( ! rd->ns->storage_data_in_memory && ! rd->ns->single_bin ) { rd->n_bins = sizeof(urecord->stack_bins) / sizeof(as_bin); } // side effect: will set the unused bins to properly unused rd->bins = as_bin_get_all(r, rd, urecord->stack_bins); urecord->flag |= UDF_RECORD_FLAG_STORAGE_OPEN; cf_detail(AS_UDF, "Storage Open %p %x %"PRIx64"", urecord, urecord->flag, *(uint64_t *)&tr->keyd); cf_detail(AS_UDF, "udf_aerospike_rec_create: Record created %d", urecord->flag); int rc = udf_aerospike__execute_updates(urecord); if (rc) { // Creating the udf record failed, destroy the as_record cf_warning(AS_UDF, "udf_aerospike_rec_create: failure executing record updates (%d)", rc); if (!as_bin_inuse_has(urecord->rd)) { udf_aerospike_rec_remove(as, rec); } } return rc; }
static int map_rec_set(const as_rec * r, const char * name, const as_val * value) { as_map * m = (as_map *) as_rec_source(r); return as_map_set(m, (as_val *) as_string_new(strdup(name),true), (as_val *) value); }