Esempio n. 1
0
/* PRINT DIGEST:
 * This has capability is now in the new cf_{info|debug|detail}_digest() calls.
 * printd() will eventually be replaced.
 */
int
printd(cf_digest *d, char *fname, int lineno)
{
	const char hex_chars[] = "0123456789ABCDEF";
	cf_detail(AS_LDT, "[%s:%d] ptnid = %d, Lkbits [%3d:%3d], %c%c%c%c%c%c%c%c|%c%c%c%c%c%c"
			  "%c%c%c%c%c%c%c%c%c%c%c%c%c%c|%c%c%c%c%c%c%c%c%c%c%c%c",
			  fname, lineno,
			  as_partition_getid(*d), d->digest[2], d->digest[3],
			  hex_chars[d->digest[0] >> 4], hex_chars[d->digest[0] & 0xf],
			  hex_chars[d->digest[1] >> 4], hex_chars[d->digest[1] & 0xf],
			  hex_chars[d->digest[2] >> 4], hex_chars[d->digest[2] & 0xf],
			  hex_chars[d->digest[3] >> 4], hex_chars[d->digest[3] & 0xf],
			  hex_chars[d->digest[4] >> 4], hex_chars[d->digest[4] & 0xf],
			  hex_chars[d->digest[5] >> 4], hex_chars[d->digest[5] & 0xf],
			  hex_chars[d->digest[6] >> 4], hex_chars[d->digest[6] & 0xf],
			  hex_chars[d->digest[7] >> 4], hex_chars[d->digest[7] & 0xf],
			  hex_chars[d->digest[8] >> 4], hex_chars[d->digest[8] & 0xf],
			  hex_chars[d->digest[9] >> 4], hex_chars[d->digest[9] & 0xf],
			  hex_chars[d->digest[10] >> 4], hex_chars[d->digest[10] & 0xf],
			  hex_chars[d->digest[11] >> 4], hex_chars[d->digest[11] & 0xf],
			  hex_chars[d->digest[12] >> 4], hex_chars[d->digest[12] & 0xf],
			  hex_chars[d->digest[13] >> 4], hex_chars[d->digest[13] & 0xf],
			  hex_chars[d->digest[14] >> 4], hex_chars[d->digest[14] & 0xf],
			  hex_chars[d->digest[15] >> 4], hex_chars[d->digest[15] & 0xf],
			  hex_chars[d->digest[16] >> 4], hex_chars[d->digest[16] & 0xf],
			  hex_chars[d->digest[17] >> 4], hex_chars[d->digest[17] & 0xf],
			  hex_chars[d->digest[18] >> 4], hex_chars[d->digest[18] & 0xf],
			  hex_chars[d->digest[19] >> 4], hex_chars[d->digest[19] & 0xf]);
	return 0;
}
Esempio n. 2
0
/*
 * Return 0  in case of success
 *        -1 in case of failure
 */
static int
btree_addsinglerec(as_sindex_metadata *imd, ai_obj * key, cf_digest *dig, cf_ll *recl, uint64_t *n_bdigs, 
								bool * can_partition_query, bool partitions_pre_reserved)
{
	// The digests which belongs to one of the query-able partitions are elligible to go into recl
	uint32_t pid =  as_partition_getid(dig);
	as_namespace * ns = imd->si->ns;
	if (partitions_pre_reserved) {
		if (!can_partition_query[pid]) {
			return 0;
		}
	}
	else {
		if (! client_replica_maps_is_partition_queryable(ns, pid)) {
			return 0;
		}
	}

	bool create                     = (cf_ll_size(recl) == 0) ? true : false;
	as_index_keys_arr * keys_arr    = NULL;
	if (!create) {
		cf_ll_element * ele         = cf_ll_get_tail(recl);
		keys_arr                    = ((as_index_keys_ll_element*)ele)->keys_arr;
		if (keys_arr->num == AS_INDEX_KEYS_PER_ARR) {
			create = true;
		}
	}
	if (create) {
		keys_arr                    = as_index_get_keys_arr();
		if (!keys_arr) {
			cf_warning(AS_SINDEX, "Fail to allocate sindex key value array");
			return -1;
		}
		as_index_keys_ll_element * node =  cf_malloc(sizeof(as_index_keys_ll_element));
		node->keys_arr                  = keys_arr;
		cf_ll_append(recl, (cf_ll_element *)node);
	}
	// Copy the digest (value)
	memcpy(&keys_arr->pindex_digs[keys_arr->num], dig, CF_DIGEST_KEY_SZ);

	// Copy the key
	if (C_IS_DG(imd->sktype)) {
		memcpy(&keys_arr->sindex_keys[keys_arr->num].key.str_key, &key->y, CF_DIGEST_KEY_SZ);
	}
	else {
		keys_arr->sindex_keys[keys_arr->num].key.int_key = key->l;
	}

	keys_arr->num++;
	*n_bdigs = *n_bdigs + 1;
	return 0;
}
Esempio n. 3
0
int
as_proxy_shipop(cf_node dst, write_request *wr)
{
	as_partition_id pid = as_partition_getid(wr->keyd);

	if (dst == 0) {
		cf_crash(AS_PROXY, "the destination should never be zero");
	}

	// Create a fabric message, fill it out.
	msg *m = as_fabric_msg_get(M_TYPE_PROXY);
	if (!m)	{
		return -1;
	}

	uint32_t tid = cf_atomic32_incr(&g_proxy_tid);

	msg_set_uint32(m, PROXY_FIELD_OP, PROXY_OP_REQUEST);
	msg_set_uint32(m, PROXY_FIELD_TID, tid);
	msg_set_buf(m, PROXY_FIELD_DIGEST, (void *) &wr->keyd, sizeof(cf_digest), MSG_SET_COPY);
	msg_set_buf(m, PROXY_FIELD_AS_PROTO, (void *) wr->msgp, as_proto_size_get(&wr->msgp->proto), MSG_SET_HANDOFF_MALLOC);
	msg_set_uint64(m, PROXY_FIELD_CLUSTER_KEY, as_paxos_get_cluster_key());
	msg_set_uint32(m, PROXY_FIELD_TIMEOUT_MS, wr->msgp->msg.transaction_ttl);
	wr->msgp = 0;

	// If it is shipped op.
	uint32_t info = 0;
	info |= PROXY_INFO_SHIPPED_OP;
	msg_set_uint32(m, PROXY_FIELD_INFO, info);

	cf_detail_digest(AS_PROXY, &wr->keyd, "SHIPPED_OP %s->WINNER msg %p Proxy Sent to %"PRIx64" %p tid(%d)",
			wr->proxy_msg ? "NONORIG" : "ORIG", m, dst, wr, tid);

	// Fill out a retransmit structure, insert into the retransmit hash.
	msg_incr_ref(m);
	proxy_request pr;
	pr.start_time  = wr->start_time;
	pr.end_time    = (wr->end_time != 0) ? wr->end_time : pr.start_time + g_config.transaction_max_ns;
	cf_rc_reserve(wr);
	pr.wr          = wr;
	pr.fab_msg     = m;
	pr.xmit_ms     = cf_getms() + g_config.transaction_retry_ms;
	pr.retry_interval_ms = g_config.transaction_retry_ms;
	pr.dest        = dst;
	pr.pid         = pid;
	pr.fd_h        = NULL;
	pr.batch_shared = NULL;
	pr.batch_index = 0;

	if (0 != shash_put(g_proxy_hash, &tid, &pr)) {
		cf_info(AS_PROXY, " shash_put failed, need cleanup code");
		return -1;
	}

	// Send to the remote node.
	int rv = as_fabric_send(dst, m, AS_FABRIC_PRIORITY_MEDIUM);
	if (rv != 0) {
		cf_detail(AS_PROXY, "SHIPPED_OP ORIG [Digest %"PRIx64"] Failed with %d", *(uint64_t *)&wr->keyd, rv);
		as_fabric_msg_put(m);
	}

	wr->shipped_op_initiator = true;
	cf_atomic_int_incr(&g_config.ldt_proxy_initiate);

	return 0;
}
Esempio n. 4
0
// Make a request to another node.
//
// Note: there's a cheat here. 'as_msg' is used in a raw form, and includes
// structured data (version - type - nfields - sz ...) which should be made more
// wire-protocol-friendly.
int
as_proxy_divert(cf_node dst, as_transaction *tr, as_namespace *ns, uint64_t cluster_key)
{
	cf_detail(AS_PROXY, "proxy divert");

	cf_atomic_int_incr(&g_config.stat_proxy_reqs);
	if (tr->msgp && (tr->msgp->msg.info1 & AS_MSG_INFO1_XDR)) {
		cf_atomic_int_incr(&g_config.stat_proxy_reqs_xdr);
	}
	as_partition_id pid = as_partition_getid(tr->keyd);

	if (dst == 0) {
		// Get the list of replicas.
		dst = as_partition_getreplica_read(ns, pid);
	}

	// Create a fabric message, fill it out.
	msg *m = as_fabric_msg_get(M_TYPE_PROXY);
	if (!m)	{
		return -1;
	}

	uint32_t tid = cf_atomic32_incr(&g_proxy_tid);

	msg_set_uint32(m, PROXY_FIELD_OP, PROXY_OP_REQUEST);
	msg_set_uint32(m, PROXY_FIELD_TID, tid);
	msg_set_buf(m, PROXY_FIELD_DIGEST, (void *) &tr->keyd, sizeof(cf_digest), MSG_SET_COPY);
	msg_set_type msettype = tr->batch_shared ? MSG_SET_COPY : MSG_SET_HANDOFF_MALLOC;
	msg_set_buf(m, PROXY_FIELD_AS_PROTO, (void *) tr->msgp, as_proto_size_get(&tr->msgp->proto), msettype);
	msg_set_uint64(m, PROXY_FIELD_CLUSTER_KEY, cluster_key);
	msg_set_uint32(m, PROXY_FIELD_TIMEOUT_MS, tr->msgp->msg.transaction_ttl);

	tr->msgp = 0;

	cf_debug_digest(AS_PROXY, &tr->keyd, "proxy_divert: fab_msg %p dst %"PRIx64, m, dst);

	// Fill out a retransmit structure, insert into the retransmit hash.
	msg_incr_ref(m);
	proxy_request pr;
	pr.start_time = tr->start_time;
	pr.end_time = (tr->end_time != 0) ? tr->end_time : pr.start_time + g_config.transaction_max_ns;
	pr.fd_h = tr->proto_fd_h;
	tr->proto_fd_h = 0;
	pr.fab_msg = m;
	pr.xmit_ms = cf_getms() + g_config.transaction_retry_ms;
	pr.retry_interval_ms = g_config.transaction_retry_ms;
	pr.dest = dst;
	pr.pid = pid;
	pr.ns = ns;
	pr.wr = NULL;
	pr.batch_shared = tr->batch_shared;
	pr.batch_index = tr->batch_index;

	if (0 != shash_put(g_proxy_hash, &tid, &pr)) {
		cf_debug(AS_PROXY, " shash_put failed, need cleanup code");
		return -1;
	}

	// Send to the remote node.
	int rv = as_fabric_send(dst, m, AS_FABRIC_PRIORITY_MEDIUM);
	if (rv != 0) {
		cf_debug(AS_PROXY, "as_proxy_divert: returned error %d", rv);
		as_fabric_msg_put(m);
	}

	cf_atomic_int_incr(&g_config.proxy_initiate);

	return 0;
}
// For LDTs only:
void
repl_write_handle_multiop(cf_node node, msg* m)
{
	uint8_t* ns_name;
	size_t ns_name_len;

	if (msg_get_buf(m, RW_FIELD_NAMESPACE, &ns_name, &ns_name_len,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "handle_multiop: no namespace");
		send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	as_namespace* ns = as_namespace_get_bybuf(ns_name, ns_name_len);

	if (! ns) {
		cf_warning(AS_RW, "handle_multiop: invalid namespace");
		send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	cf_digest* keyd;
	size_t sz;

	if (msg_get_buf(m, RW_FIELD_DIGEST, (uint8_t**)&keyd, &sz,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "handle_multiop: no digest");
		send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	// Note - there should be an RW_FIELD_INFO with LDT bit set, but not
	// bothering to get it here since we never use it.

	uint8_t* pickled_buf;
	size_t pickled_sz;

	if (msg_get_buf(m, RW_FIELD_MULTIOP, (uint8_t**)&pickled_buf, &pickled_sz,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "handle_multiop: no buffer");
		send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	as_partition_reservation rsv;

	as_partition_reserve_migrate(ns, as_partition_getid(*keyd), &rsv, NULL);

	if (rsv.state == AS_PARTITION_STATE_ABSENT) {
		as_partition_release(&rsv);
		send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_CLUSTER_KEY_MISMATCH);
		return;
	}

	ldt_prole_info linfo;
	memset(&linfo, 1, sizeof(ldt_prole_info));

	int offset = 0;

	while (true) {
		const uint8_t* buf = (const uint8_t*)(pickled_buf + offset);
		size_t sz = pickled_sz - offset;

		if (sz == 0) {
			break;
		}

		uint32_t op_msg_len = 0;
		msg_type op_msg_type = 0;

		if (msg_get_initial(&op_msg_len, &op_msg_type, buf, sz) != 0 ||
				op_msg_type != M_TYPE_RW) {
			cf_warning(AS_RW, "handle_multiop: peek multiop msg failed");
			as_partition_release(&rsv);
			send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		msg* op_msg = as_fabric_msg_get(op_msg_type);

		if (! op_msg) {
			cf_warning(AS_RW, "handle_multiop: can't get fabric msg");
			as_partition_release(&rsv);
			send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		if (msg_parse(op_msg, buf, sz) != 0) {
			cf_warning(AS_RW, "handle_multiop: can't parse multiop msg");
			as_fabric_msg_put(op_msg);
			as_partition_release(&rsv);
			send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		offset += op_msg_len;

		if (! handle_multiop_subop(node, op_msg, &rsv, &linfo)) {
			cf_warning(AS_RW, "handle_multiop: write_process_new failed");
			as_fabric_msg_put(op_msg);
			as_partition_release(&rsv);
			send_multiop_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		as_fabric_msg_put(op_msg);
	}

	as_partition_release(&rsv);
	send_multiop_ack(node, m, AS_PROTO_RESULT_OK);
}
void
repl_write_handle_op(cf_node node, msg* m)
{
	uint8_t* ns_name;
	size_t ns_name_len;

	if (msg_get_buf(m, RW_FIELD_NAMESPACE, &ns_name, &ns_name_len,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "repl_write_handle_op: no namespace");
		send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	as_namespace* ns = as_namespace_get_bybuf(ns_name, ns_name_len);

	if (! ns) {
		cf_warning(AS_RW, "repl_write_handle_op: invalid namespace");
		send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	cf_digest* keyd;
	size_t sz;

	if (msg_get_buf(m, RW_FIELD_DIGEST, (uint8_t**)&keyd, &sz,
			MSG_GET_DIRECT) != 0) {
		cf_warning(AS_RW, "repl_write_handle_op: no digest");
		send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	as_partition_reservation rsv;

	as_partition_reserve_migrate(ns, as_partition_getid(*keyd), &rsv, NULL);

	if (rsv.state == AS_PARTITION_STATE_ABSENT) {
		as_partition_release(&rsv);
		send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_CLUSTER_KEY_MISMATCH);
		return;
	}

	uint32_t info = 0;

	msg_get_uint32(m, RW_FIELD_INFO, &info);

	ldt_prole_info linfo;

	if ((info & RW_INFO_LDT) != 0 && ! ldt_get_info(&linfo, m, &rsv)) {
		cf_warning(AS_RW, "repl_write_handle_op: bad ldt info");
		as_partition_release(&rsv);
		send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
		return;
	}

	cl_msg* msgp;
	size_t msgp_sz;

	uint8_t* pickled_buf;
	size_t pickled_sz;

	uint32_t result;

	if (msg_get_buf(m, RW_FIELD_AS_MSG, (uint8_t**)&msgp, &msgp_sz,
			MSG_GET_DIRECT) == 0) {
		// <><><><><><>  Delete Operation  <><><><><><>

		// TODO - does this really need to be here? Just to fill linfo?
		if (! ldt_get_prole_version(&rsv, keyd, &linfo, info, NULL, false)) {
			as_partition_release(&rsv);
			send_repl_write_ack(node, m, AS_PROTO_RESULT_OK); // ???
			return;
		}

		result = delete_replica(&rsv, keyd,
				(info & (RW_INFO_LDT_SUBREC | RW_INFO_LDT_ESR)) != 0,
				(info & RW_INFO_NSUP_DELETE) != 0,
				as_msg_is_xdr(&msgp->msg),
				node);
	}
	else if (msg_get_buf(m, RW_FIELD_RECORD, (uint8_t**)&pickled_buf,
			&pickled_sz, MSG_GET_DIRECT) == 0) {
		// <><><><><><>  Write Pickle  <><><><><><>

		as_generation generation;

		if (msg_get_uint32(m, RW_FIELD_GENERATION, &generation) != 0) {
			cf_warning(AS_RW, "repl_write_handle_op: no generation");
			as_partition_release(&rsv);
			send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		uint32_t void_time;

		if (msg_get_uint32(m, RW_FIELD_VOID_TIME, &void_time) != 0) {
			cf_warning(AS_RW, "repl_write_handle_op: no void-time");
			as_partition_release(&rsv);
			send_repl_write_ack(node, m, AS_PROTO_RESULT_FAIL_UNKNOWN);
			return;
		}

		uint64_t last_update_time = 0;

		// Optional - older versions won't send it.
		msg_get_uint64(m, RW_FIELD_LAST_UPDATE_TIME, &last_update_time);

		as_rec_props rec_props;
		size_t rec_props_size = 0;

		msg_get_buf(m, RW_FIELD_REC_PROPS, &rec_props.p_data, &rec_props_size,
				MSG_GET_DIRECT);
		rec_props.size = (uint32_t)rec_props_size;

		result = write_replica(&rsv, keyd, pickled_buf, pickled_sz, &rec_props,
				generation, void_time, last_update_time, node, info, &linfo);
	}
	else {
		cf_warning(AS_RW, "repl_write_handle_op: no msg or pickle");
		result = AS_PROTO_RESULT_FAIL_UNKNOWN;
	}

	as_partition_release(&rsv);
	send_repl_write_ack(node, m, result);
}
Esempio n. 7
0
// Build response to batch request.
static void
batch_build_response(batch_transaction* btr, cf_buf_builder** bb_r)
{
	as_namespace* ns = btr->ns;
	batch_digests *bmds = btr->digests;
	bool get_data = btr->get_data;
	uint32_t yield_count = 0;

	for (int i = 0; i < bmds->n_digests; i++)
	{
		batch_digest *bmd = &bmds->digest[i];

		if (bmd->done == false) {
			// try to get the key
			as_partition_reservation rsv;
			AS_PARTITION_RESERVATION_INIT(rsv);
			cf_node other_node = 0;
			uint64_t cluster_key;

			if (! *bb_r) {
				*bb_r = cf_buf_builder_create_size(1024 * 4);
			}

			int rv = as_partition_reserve_read(ns, as_partition_getid(bmd->keyd), &rsv, &other_node, &cluster_key);

			if (rv == 0) {
				cf_atomic_int_incr(&g_config.batch_tree_count);

				as_index_ref r_ref;
				r_ref.skip_lock = false;
				int rec_rv = as_record_get(rsv.tree, &bmd->keyd, &r_ref, ns);

				if (rec_rv == 0) {
					as_index *r = r_ref.r;

					// Check to see this isn't an expired record waiting to die.
					if (r->void_time && r->void_time < as_record_void_time_get()) {
						as_msg_make_error_response_bufbuilder(&bmd->keyd, AS_PROTO_RESULT_FAIL_NOTFOUND, bb_r, ns->name);
					}
					else {
						// Make sure it's brought in from storage if necessary.
						as_storage_rd rd;
						if (get_data) {
							as_storage_record_open(ns, r, &rd, &r->key);
							rd.n_bins = as_bin_get_n_bins(r, &rd);
						}

						// Note: this array must stay in scope until the
						// response for this record has been built, since in the
						// get data w/ record on device case, it's copied by
						// reference directly into the record descriptor.
						as_bin stack_bins[!get_data || rd.ns->storage_data_in_memory ? 0 : rd.n_bins];

						if (get_data) {
							// Figure out which bins you want - for now, all.
							rd.bins = as_bin_get_all(r, &rd, stack_bins);
							rd.n_bins = as_bin_inuse_count(&rd);
						}

						as_msg_make_response_bufbuilder(r, (get_data ? &rd : NULL), bb_r, !get_data, (get_data ? NULL : ns->name), true, false, btr->binlist);

						if (get_data) {
							as_storage_record_close(r, &rd);
						}
					}
					as_record_done(&r_ref, ns);
				}
				else {
					// TODO - what about empty records?
					cf_debug(AS_BATCH, "batch_build_response: as_record_get returned %d : key %"PRIx64, rec_rv, *(uint64_t *)&bmd->keyd);
					as_msg_make_error_response_bufbuilder(&bmd->keyd, AS_PROTO_RESULT_FAIL_NOTFOUND, bb_r, ns->name);
				}

				bmd->done = true;

				as_partition_release(&rsv);
				cf_atomic_int_decr(&g_config.batch_tree_count);
			}
			else {
				cf_debug(AS_BATCH, "batch_build_response: partition reserve read failed: rv %d", rv);

				as_msg_make_error_response_bufbuilder(&bmd->keyd, AS_PROTO_RESULT_FAIL_NOTFOUND, bb_r, ns->name);

				if (other_node != 0) {
					bmd->node = other_node;
					cf_debug(AS_BATCH, "other_node is: %p.", other_node);
				} else {
					cf_debug(AS_BATCH, "other_node is NULL.");
				}
			}

			yield_count++;
			if (yield_count % g_config.batch_priority == 0) {
				usleep(1);
			}
		}
	}
}
Esempio n. 8
0
int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd,
		cf_buf_builder **bb_r, bool nobindata, char *nsname, bool use_sets,
		bool include_key, bool skip_empty_records, cf_vector *binlist)
{
	// Sanity checks. Either rd should be there or nobindata and nsname should be present.
	if (!(rd || (nobindata && nsname))) {
		cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record.");
		return 0;
	}

	// figure out the size of the entire buffer
	int         set_name_len = 0;
	const char *set_name     = NULL;
	int         ns_len       = rd ? strlen(rd->ns->name) : strlen(nsname);

	if (use_sets && as_index_get_set_id(r) != INVALID_SET_ID) {
		as_namespace *ns = NULL;

		if (rd) {
			ns = rd->ns;
		} else if (nsname) {
			ns = as_namespace_get_byname(nsname);
		}
		if (!ns) {
			cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record.");
			return -1;
		}
		set_name = as_index_get_set_name(r, ns);
		if (set_name) {
			set_name_len = strlen(set_name);
		}
	}

	uint8_t* key = NULL;
	uint32_t key_size = 0;

	if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) {
		if (! as_storage_record_get_key(rd)) {
			cf_info(AS_PROTO, "can't get key - skipping record");
			return -1;
		}

		key = rd->key;
		key_size = rd->key_size;
	}

	uint16_t n_fields = 2;
	int msg_sz = sizeof(as_msg);
	msg_sz += sizeof(as_msg_field) + sizeof(cf_digest);
	msg_sz += sizeof(as_msg_field) + ns_len;
	if (set_name) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + set_name_len;
	}
	if (key) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + key_size;
	}

	int list_bins   = 0;
	int num_bins = 0;
	if (rd) {
		num_bins = as_bin_inuse_count(rd);
	}
	if (binlist) {
		num_bins = cf_vector_size(binlist);
	}
	
	as_val *ldtBinVal[num_bins];
	as_particle_type ldtBinParticleType[num_bins];

	if (nobindata == false) {
		if (binlist) {
			for(uint16_t i = 0; i < num_bins; i++) {
				char binname[AS_ID_BIN_SZ];
				cf_vector_get(binlist, i, (void*)&binname);
				cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname);
				as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname));
				if (!p_bin)
				{
					cf_debug(AS_PROTO, "To be projected bin |%s| not found \n", binname);
					continue;
				}
				cf_debug(AS_PROTO, "Adding bin |%s| to projected bins |%s| \n", binname);
				msg_sz += sizeof(as_msg_op);
				msg_sz += rd->ns->single_bin ? 0 : strlen(binname);
				uint32_t psz;
				if (as_bin_is_hidden(p_bin)) {
					ldtBinVal[list_bins] = as_llist_scan(rd->ns, rd->ns->partitions[as_partition_getid(rd->keyd)].sub_vp, rd, p_bin);
					if (ldtBinVal[list_bins]) {
						ldtBinParticleType[list_bins] = AS_PARTICLE_TYPE_HIDDEN_LIST;
					}
					as_serializer s;
					as_msgpack_init(&s);
					psz = as_serializer_serialize_getsize(&s, (as_val *)ldtBinVal[list_bins]);
					as_serializer_destroy(&s);
				} else {
					as_particle_tobuf(p_bin, 0, &psz); // get size
				}
				list_bins++;
				msg_sz += psz;
			}

			// Don't return an empty record.
			if (skip_empty_records && list_bins == 0) {
				return 0;
			}
		}
		else {
			msg_sz += sizeof(as_msg_op) * num_bins; // the bin headers
			for (uint16_t i = 0; i < num_bins; i++) {
				as_bin *p_bin = &rd->bins[i];
				msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id));
				uint32_t psz;
				if (as_bin_is_hidden(p_bin)) {
					ldtBinVal[list_bins] = as_llist_scan(rd->ns, rd->ns->partitions[as_partition_getid(rd->keyd)].sub_vp, rd, p_bin);
					if (ldtBinVal[list_bins]) {
						ldtBinParticleType[list_bins] = AS_PARTICLE_TYPE_HIDDEN_LIST;
					}
					as_serializer s;
					as_msgpack_init(&s);
					psz = as_serializer_serialize_getsize(&s, (as_val *)ldtBinVal[list_bins]);
					as_serializer_destroy(&s);
				} else {
					as_particle_tobuf(p_bin, 0, &psz); // get size
				}
				msg_sz += psz;
				list_bins++;
			}
		}
	}

	uint8_t *b;
	cf_buf_builder_reserve(bb_r, msg_sz, &b);

	// set up the header
	uint8_t *buf = b;
	as_msg *msgp = (as_msg *) buf;

	msgp->header_sz = sizeof(as_msg);
	msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0);
	msgp->info2 = 0;
	msgp->info3 = 0;
	msgp->unused = 0;
	msgp->result_code = 0;
	msgp->generation = r->generation;
	msgp->record_ttl = r->void_time;
	msgp->transaction_ttl = 0;
	msgp->n_fields = n_fields;
	if (rd) {
		if (binlist)
			msgp->n_ops = list_bins;
		else
			msgp->n_ops = num_bins;
	} else {
		msgp->n_ops = 0;
	}
	as_msg_swap_header(msgp);

	buf += sizeof(as_msg);

	as_msg_field *mf = (as_msg_field *) buf;
	mf->field_sz = sizeof(cf_digest) + 1;
	mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE;
	if (rd) {
		memcpy(mf->data, &rd->keyd, sizeof(cf_digest));
	} else {
		memcpy(mf->data, &r->key, sizeof(cf_digest));
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + sizeof(cf_digest);

	mf = (as_msg_field *) buf;
	mf->field_sz = ns_len + 1;
	mf->type = AS_MSG_FIELD_TYPE_NAMESPACE;
	if (rd) {
		memcpy(mf->data, rd->ns->name, ns_len);
	} else {
		memcpy(mf->data, nsname, ns_len);
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + ns_len;

	if (set_name) {
		mf = (as_msg_field *) buf;
		mf->field_sz = set_name_len + 1;
		mf->type = AS_MSG_FIELD_TYPE_SET;
		memcpy(mf->data, set_name, set_name_len);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + set_name_len;
	}

	if (key) {
		mf = (as_msg_field *) buf;
		mf->field_sz = key_size + 1;
		mf->type = AS_MSG_FIELD_TYPE_KEY;
		memcpy(mf->data, key, key_size);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + key_size;
	}

	if (nobindata) {
		goto Out;
	}
	list_bins = 0;

	if (binlist) {
		for (uint16_t i = 0; i < num_bins; i++) {

			char binname[AS_ID_BIN_SZ];
			cf_vector_get(binlist, i, (void*)&binname);
			cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname);
			as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname));
			if (!p_bin) // should it be checked before ???
				continue;

			as_msg_op *op = (as_msg_op *)buf;
			buf += sizeof(as_msg_op);

			op->op = AS_MSG_OP_READ;

			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin);
			buf += op->name_sz;

			// Since there are two variable bits, the size is everything after
			// the data bytes - and this is only the head, we're patching up
			// the rest in a minute.
			op->op_sz = 4 + op->name_sz;

			if (as_bin_inuse(p_bin)) {
				op->particle_type = as_particle_type_convert(as_bin_get_particle_type(p_bin));
				op->version = as_bin_get_version(p_bin, rd->ns->single_bin);

				uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety
				if (as_bin_is_hidden(p_bin)) {
					if (!ldtBinVal[list_bins]) {
						op->particle_type = AS_PARTICLE_TYPE_NULL;
						psz = 0;
					} else {
						op->particle_type = ldtBinParticleType[list_bins];
						as_buffer abuf;
						as_buffer_init(&abuf);
						as_serializer s;
						as_msgpack_init(&s);
						as_serializer_serialize(&s, (as_val *)ldtBinVal[list_bins], &abuf);
						psz = abuf.size;
						memcpy(buf, abuf.data, abuf.size);
						as_serializer_destroy(&s);
						as_buffer_destroy(&abuf);
						as_val_destroy(ldtBinVal[list_bins]);
					}
				} else {
					if (0 != as_particle_tobuf(p_bin, buf, &psz)) {
						cf_warning(AS_PROTO, "particle to buf: could not copy data!");
					}
				}
				list_bins++;
				buf += psz;
				op->op_sz += psz;
			}
			else {
				cf_debug(AS_PROTO, "Whoops !! bin not in use");
				op->particle_type = AS_PARTICLE_TYPE_NULL;
			}
			as_msg_swap_op(op);
		}
	}
	else {
		// over all bins, copy into the buffer
		for (uint16_t i = 0; i < num_bins; i++) {

			as_msg_op *op = (as_msg_op *)buf;
			buf += sizeof(as_msg_op);

			op->op = AS_MSG_OP_READ;

			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]);
			buf += op->name_sz;

			// Since there are two variable bits, the size is everything after
			// the data bytes - and this is only the head, we're patching up
			// the rest in a minute.
			op->op_sz = 4 + op->name_sz;

			if (as_bin_inuse(&rd->bins[i])) {
				op->particle_type = as_particle_type_convert(as_bin_get_particle_type(&rd->bins[i]));
				op->version = as_bin_get_version(&rd->bins[i], rd->ns->single_bin);

				uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety
				if (as_bin_is_hidden(&rd->bins[i])) {
					if (!ldtBinVal[list_bins]) {
						op->particle_type = AS_PARTICLE_TYPE_NULL;
						psz = 0;
					} else {
						op->particle_type = ldtBinParticleType[list_bins];
						as_buffer abuf;
						as_buffer_init(&abuf);
						as_serializer s;
						as_msgpack_init(&s);
						as_serializer_serialize(&s, (as_val *)ldtBinVal[list_bins], &abuf);
						psz = abuf.size;
						memcpy(buf, abuf.data, abuf.size);
						as_serializer_destroy(&s);
						as_buffer_destroy(&abuf);
						as_val_destroy(ldtBinVal[list_bins]);
					}
				} else {
					if (0 != as_particle_tobuf(&rd->bins[i], buf, &psz)) {
						cf_warning(AS_PROTO, "particle to buf: could not copy data!");
					}
				}
				list_bins++;
				buf += psz;
				op->op_sz += psz;
			}
			else {
				op->particle_type = AS_PARTICLE_TYPE_NULL;
			}
			as_msg_swap_op(op);
		}
	}
Out:
	return(0);
}