/* * Function: Open storage record for passed in udf record * also set up flag like exists / read et al. * * Parameters: * urec : UDF record * * Return value : 0 on success * -1 if the record's bin count exceeds the UDF limit * * Callers: * udf_record_open * * Note: There are no checks, so the caller has to make sure that all * protections are taken and all checks are done. * * Side effect: * Counters will be reset * flag will be set * bins will be opened */ int udf_storage_record_open(udf_record *urecord) { cf_debug_digest(AS_UDF, &urecord->tr->keyd, "[ENTER] Opening record key:"); as_storage_rd *rd = urecord->rd; as_index *r = urecord->r_ref->r; as_transaction *tr = urecord->tr; int rv = as_storage_record_open(tr->rsv.ns, r, rd, &r->key); if (0 != rv) { cf_warning(AS_UDF, "Could not open record !! %d", rv); return rv; } rd->n_bins = as_bin_get_n_bins(r, rd); if (rd->n_bins > UDF_RECORD_BIN_ULIMIT) { cf_warning(AS_UDF, "record has too many bins (%d) for UDF processing", rd->n_bins); as_storage_record_close(r, rd); return -1; } // if multibin storage, we will use urecord->stack_bins, so set the size appropriately if ( ! tr->rsv.ns->storage_data_in_memory && ! tr->rsv.ns->single_bin ) { rd->n_bins = sizeof(urecord->stack_bins) / sizeof(as_bin); } rd->bins = as_bin_get_all(r, rd, urecord->stack_bins); urecord->starting_memory_bytes = as_storage_record_get_n_bytes_memory(rd); as_storage_record_get_key(rd); urecord->flag |= UDF_RECORD_FLAG_STORAGE_OPEN; if (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) { urecord->lrecord->subrec_io++; } cf_detail_digest(AS_UDF, &tr->keyd, "Storage Open: Rec(%p) flag(%x) Digest:", urecord, urecord->flag ); if (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) { as_ldt_subrec_storage_validate(rd, "Reading"); } return 0; }
/* * Function: Open storage record for passed in udf record * also set up flag like exists / read et al. * Does as_record_get as well if it is not done yet. * * Parameters: * urec : UDF record * * Return value : * 0 in case record is successfully read * -1 in case record is not found * -2 in case record is found but has expired * * Callers: * query_agg_istream_read * ldt_crec_open */ int udf_record_open(udf_record * urecord) { cf_debug_digest(AS_UDF, &urecord->tr->keyd, "[ENTER] Opening record key:"); if (urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN) { cf_info(AS_UDF, "Record already open"); return 0; } as_transaction *tr = urecord->tr; as_index_ref *r_ref = urecord->r_ref; as_index_tree *tree = tr->rsv.tree; if (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) { tree = tr->rsv.sub_tree; } int rec_rv = 0; if (!(urecord->flag & UDF_RECORD_FLAG_OPEN)) { cf_detail(AS_UDF, "Opening %sRecord ", (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "Sub" : ""); rec_rv = as_record_get(tree, &tr->keyd, r_ref, tr->rsv.ns); } if (!rec_rv) { as_index *r = r_ref->r; // check to see this isn't an expired record waiting to die if (as_record_is_expired(r)) { as_record_done(r_ref, tr->rsv.ns); cf_detail(AS_UDF, "udf_record_open: Record has expired cannot read"); rec_rv = -2; } else { urecord->flag |= UDF_RECORD_FLAG_OPEN; urecord->flag |= UDF_RECORD_FLAG_PREEXISTS; cf_detail_digest(AS_UDF, &tr->keyd, "Open %p %x Digest:", urecord, urecord->flag); rec_rv = udf_storage_record_open(urecord); } } else { cf_detail_digest(AS_UDF, &urecord->tr->keyd, "udf_record_open: %s rec_get returned with %d", (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "sub" : "", rec_rv); } return rec_rv; }
/* * Internal Function: To create new chunk record * * Parameters: * lr : Parent ldt record * * Return value : * crec (as_val) in case of success * NULL in case of failure * * Description: * 1. Search for empty chunk slot. * 2. Read the record into it * * Callers: * ldt_aerospike_crec_create */ as_rec * ldt_crec_create(ldt_record *lrecord) { // Generate Key Digest udf_record *h_urecord = (udf_record *) as_rec_source(lrecord->h_urec); cf_digest keyd = h_urecord->r_ref->r->key; cf_detail(AS_LDT, "ldt_aerospike_crec_create %"PRIx64"", *(uint64_t *)&keyd); as_ldt_digest_randomizer(h_urecord->tr->rsv.ns, &keyd); as_ldt_subdigest_setversion(&keyd, lrecord->version); // Setup Chunk int slot = ldt_crec_find_freeslot(lrecord); if (slot == -1) { cf_warning(AS_LDT, "ldt_crec_create: Cannot open more than (%d) records in a single UDF", s_max_open_subrecs); return NULL; } cf_detail(AS_LDT, "ldt_crec_create: Popped slot %d", slot); lrecord->chunk[slot].slot = slot; ldt_chunk *lchunk = &lrecord->chunk[slot]; ldt_chunk_init (lchunk, lrecord); ldt_chunk_setup(lchunk, lrecord->h_urec, &keyd); // Create Record int rv = as_aerospike_rec_create(lrecord->as, lchunk->c_urec_p); if (rv < 0) { // Mark Slot as free ldt_chunk_destroy(&lrecord->chunk[slot]); cf_warning(AS_LDT, "ldt_crec_create: Record Create Failed rv=%d ... ", rv); return NULL; } cf_debug_digest(AS_LDT, &(lchunk->c_urecord.keyd), "Crec Create:Ptr(%p) Digest: ", lchunk->c_urec_p); as_val_reserve(lchunk->c_urec_p); return lchunk->c_urec_p; }
/* * Internal Function: Function to open chunk record * * Parameters: * lrd : Parent ldt record * keyd : Key digest for the record to be opened * slot(out): Filled with slot in case of success * * Return value : * 0 in case of success returns positive slot value * -1 in case record is already open * -2 in case free slot cannot be found * -3 in case record cannot be opened * * Description: * 1. Get the empty chunk slot. * 2. Read the record into it * * Callers: * ldt_aerospike_crec_open */ int ldt_crec_open(ldt_record *lrecord, cf_digest *keyd, int *slotp) { cf_debug_digest(AS_LDT, keyd, "[ENTER] ldt_crec_open(): Digest: "); // 1. Search in opened record int slot = ldt_crec_find_digest(lrecord, keyd); if (slot != -1) { cf_info(AS_LDT, "ldt_aerospike_rec_open : Found already open"); return 0; } // 2. Find free slot and setup chunk slot = ldt_crec_find_freeslot(lrecord); if (slot == -1) { cf_warning(AS_LDT, "Cannot open more than (%d) records in a single UDF", s_max_open_subrecs); return -2; } cf_detail(AS_LDT, "ldt_crec_open popped slot %d", slot); lrecord->chunk[slot].slot = slot; ldt_chunk *lchunk = &lrecord->chunk[slot]; ldt_chunk_init(lchunk, lrecord); ldt_chunk_setup(lchunk, lrecord->h_urec, keyd); //ldt_chunk_print(lrecord, slot); // Open Record int rv = udf_record_open((udf_record *)as_rec_source(lchunk->c_urec_p)); if (rv) { // Open the slot for reuse ldt_chunk_destroy(&lrecord->chunk[slot]); return -3; } *slotp = slot; return 0; }
/* * Function: Close storage record for udf record. Release * all locks and partition reservation / namespace * reservation etc. if requested. * Also cleans up entire cache (updated from udf) * * Parameters: * urec : UDF record being operated on * * Return value : Nothing * * Callers: * query_agg_istream_read * ldt_aerospike_crec_close * as_query__agg * udf_record_destroy */ void udf_record_close(udf_record *urecord) { as_transaction *tr = urecord->tr; cf_debug_digest(AS_UDF, &tr->keyd, "[ENTER] Closing record key:"); if (urecord->flag & UDF_RECORD_FLAG_OPEN) { as_index_ref *r_ref = urecord->r_ref; cf_detail(AS_UDF, "Closing %sRecord", (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "Sub" : ""); udf_storage_record_close(urecord); as_record_done(r_ref, tr->rsv.ns); urecord->flag &= ~UDF_RECORD_FLAG_OPEN; cf_detail_digest(AS_UDF, &urecord->tr->keyd, "Storage Close:: Rec(%p) Flag(%x) Digest:", urecord, urecord->flag ); } // Replication happens when the main record replicates if (urecord->particle_data) { cf_free(urecord->particle_data); urecord->particle_data = 0; } udf_record_cache_free(urecord); }
// Make a request to another node. // // Note: there's a cheat here. 'as_msg' is used in a raw form, and includes // structured data (version - type - nfields - sz ...) which should be made more // wire-protocol-friendly. int as_proxy_divert(cf_node dst, as_transaction *tr, as_namespace *ns, uint64_t cluster_key) { cf_detail(AS_PROXY, "proxy divert"); cf_atomic_int_incr(&g_config.stat_proxy_reqs); if (tr->msgp && (tr->msgp->msg.info1 & AS_MSG_INFO1_XDR)) { cf_atomic_int_incr(&g_config.stat_proxy_reqs_xdr); } as_partition_id pid = as_partition_getid(tr->keyd); if (dst == 0) { // Get the list of replicas. dst = as_partition_getreplica_read(ns, pid); } // Create a fabric message, fill it out. msg *m = as_fabric_msg_get(M_TYPE_PROXY); if (!m) { return -1; } uint32_t tid = cf_atomic32_incr(&g_proxy_tid); msg_set_uint32(m, PROXY_FIELD_OP, PROXY_OP_REQUEST); msg_set_uint32(m, PROXY_FIELD_TID, tid); msg_set_buf(m, PROXY_FIELD_DIGEST, (void *) &tr->keyd, sizeof(cf_digest), MSG_SET_COPY); msg_set_type msettype = tr->batch_shared ? MSG_SET_COPY : MSG_SET_HANDOFF_MALLOC; msg_set_buf(m, PROXY_FIELD_AS_PROTO, (void *) tr->msgp, as_proto_size_get(&tr->msgp->proto), msettype); msg_set_uint64(m, PROXY_FIELD_CLUSTER_KEY, cluster_key); msg_set_uint32(m, PROXY_FIELD_TIMEOUT_MS, tr->msgp->msg.transaction_ttl); tr->msgp = 0; cf_debug_digest(AS_PROXY, &tr->keyd, "proxy_divert: fab_msg %p dst %"PRIx64, m, dst); // Fill out a retransmit structure, insert into the retransmit hash. msg_incr_ref(m); proxy_request pr; pr.start_time = tr->start_time; pr.end_time = (tr->end_time != 0) ? tr->end_time : pr.start_time + g_config.transaction_max_ns; pr.fd_h = tr->proto_fd_h; tr->proto_fd_h = 0; pr.fab_msg = m; pr.xmit_ms = cf_getms() + g_config.transaction_retry_ms; pr.retry_interval_ms = g_config.transaction_retry_ms; pr.dest = dst; pr.pid = pid; pr.ns = ns; pr.wr = NULL; pr.batch_shared = tr->batch_shared; pr.batch_index = tr->batch_index; if (0 != shash_put(g_proxy_hash, &tid, &pr)) { cf_debug(AS_PROXY, " shash_put failed, need cleanup code"); return -1; } // Send to the remote node. int rv = as_fabric_send(dst, m, AS_FABRIC_PRIORITY_MEDIUM); if (rv != 0) { cf_debug(AS_PROXY, "as_proxy_divert: returned error %d", rv); as_fabric_msg_put(m); } cf_atomic_int_incr(&g_config.proxy_initiate); return 0; }