/*
 * Internal Function: udf_aerospike_delbin
 *
 * Parameters:
 * 		r 		- udf_record to be manipulated
 * 		bname 	- name of the bin to be deleted
 *
 * Return value:
 * 		0  on success
 * 	   -1  on failure
 *
 * Description:
 * 		The function deletes the bin with the name
 * 		passed in as parameter. The as_bin_destroy function
 * 		which is called here, only frees the data and
 * 		the bin is marked as not in use. The bin can then be reused later.
 *
 * 		Synchronization : object lock acquired by the transaction thread executing UDF.
 * 		Partition reservation takes place just before the transaction starts executing
 * 		( look for as_partition_reserve_udf in thr_tsvc.c )
 *
 * 		Callers:
 * 		udf_aerospike__apply_update_atomic
 * 		In this function, if it fails at the time of update, the record is set
 * 		to rollback all the updates till this point. The case where it fails in
 * 		rollback is not handled.
 *
 * 		Side Notes:
 * 		i.	write_to_device will be set to true on a successful bin destroy.
 * 		If all the updates from udf_aerospike__apply_update_atomic (including this) are
 * 		successful, the record will be written to disk and reopened so that the rest of
 * 		sets of updates can be applied.
 *
 * 		ii.	If delete from sindex fails, we do not handle it.
 */
static int
udf_aerospike_delbin(udf_record * urecord, const char * bname)
{
	// Check that bname is not completely invalid
	if ( !bname || !bname[0] ) {
		cf_warning(AS_UDF, "delete bin: no bin name supplied");
		return -1;
	}

	size_t          blen    = strlen(bname);
	as_storage_rd  *rd      = urecord->rd;
	as_transaction *tr      = urecord->tr;

	// Check quality of bname -- first check that it is proper length, then
	// check that we're not over quota for bins, then finally make sure that
	// the bin exists.
	if (blen > (AS_ID_BIN_SZ - 1 ) || !as_bin_name_within_quota(rd->ns, (byte *)bname, blen)) {
		// Can't read bin if name too large or over quota
		cf_warning(AS_UDF, "bin name(%s) too big. Bin not added", bname);
		return -1;
	}

	as_bin * b = as_bin_get(rd, (byte *)bname, blen);
	if ( !b ) {
		cf_warning(AS_UDF, "as_bin_get failed: bin name(%s) not found", bname);
		return -1;
	}

	SINDEX_BINS_SETUP(delbin, 1);
	int sindex_ret = AS_SINDEX_OK;

	bool has_sindex = as_sindex_ns_has_sindex(rd->ns);
	if (has_sindex) {
		sindex_ret = as_sindex_sbin_from_bin(rd->ns, as_index_get_set_name(rd->r, rd->ns), b, delbin);
	}

	int32_t i = as_bin_get_index(rd, (byte *)bname, blen);
	if (i != -1) {
		if (has_sindex) {
			tr->flag |= AS_TRANSACTION_FLAG_SINDEX_TOUCHED;
			if (AS_SINDEX_OK ==  sindex_ret) {
				as_sindex_delete_by_sbin(rd->ns, as_index_get_set_name(rd->r, rd->ns), 1, delbin, rd);
				//TODO: Check the error code returned through sindex_ret. (like out of sync )
			}
		}
		as_bin_destroy(rd, i);
	} else {
		cf_warning(AS_UDF, "deleting non-existing bin %s ignored", bname);
	}

	if (has_sindex) {
		as_sindex_sbin_freeall(delbin, 1);
	}

	return 0;
}
/*
 * Internal Function: Read and figure out if the bin is hidden
 *
 * Parameters:
 * 		r    : udf record
 * 		bname: Bin name of the bin which need to be read.
 *
 * Return value :
 * 	 	true:  if hidden
 * 	 	false: o/w or in case bin is not found
 *
 * Description:
 * 		Expectation is the record is already open. No checks are
 * 		performed in this function. Caller needs to make sure the
 * 		record is good to read e.g binname etc.
 *
 * Callers:
 * 		udf_aerospike__apply_update_atomic
 */
bool
udf_record_bin_ishidden(const udf_record *urecord, const char *name)
{
	if (!name) {
		return false;
	}
	as_bin * bb = as_bin_get(urecord->rd, name);

	if ( !bb ) {
		cf_detail(AS_UDF, "udf_record_get: bin not found (%s)", name);
		return false;
	}
	return as_bin_is_hidden(bb);
}
/*
 * Internal Function: Read the bin from storage and convert it
 *                    into as_val and return
 *
 * Parameters:
 * 		r    : udf record
 * 		bname: Bin name of the bin which need to be read.
 *
 * Return value :
 * 	 	value (as_val *) in case of success
 * 		NULL  in case of failure
 *
 * Description:
 * 		Expectation is the record is already open. No checks are
 * 		performed in this function. Caller needs to make sure the
 * 		record is good to read e.g binname etc.
 *
 * 		NB: as_val which is returned is allocated one. It is callers
 * 		    responsibility to free else in case it is passed on to
 * 		    lua ... lua has responsibility of garbage collecting it.
 * 		    Hence this function call incurs and malloc cost.
 *
 * Callers:
 * 		udf_record_get
 */
as_val *
udf_record_storage_get(const udf_record *urecord, const char *name)
{
	if (!name) {
		cf_detail(AS_UDF, "Passed Null bin name to storage get");
		return NULL;
	}
	as_bin * bb = as_bin_get(urecord->rd, name);

	if ( !bb ) {
		cf_detail(AS_UDF, "udf_record_get: bin not found (%s)", name);
		return NULL;
	}

	return as_bin_particle_to_asval(bb);
}
Exemple #4
0
int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd,
		cf_buf_builder **bb_r, bool nobindata, char *nsname, bool use_sets,
		bool include_key, cf_vector *binlist)
{
	// Sanity checks. Either rd should be there or nobindata and nsname should be present.
	if (!(rd || (nobindata && nsname))) {
		cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record.");
		return 0;
	}

	// figure out the size of the entire buffer
	int         set_name_len = 0;
	const char *set_name     = NULL;
	int         ns_len       = rd ? strlen(rd->ns->name) : strlen(nsname);

	if (use_sets && as_index_get_set_id(r) != INVALID_SET_ID) {
		as_namespace *ns = NULL;

		if (rd) {
			ns = rd->ns;
		} else if (nsname) {
			ns = as_namespace_get_byname(nsname);
		}
		if (!ns) {
			cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record.");
			return -1;
		}
		set_name = as_index_get_set_name(r, ns);
		if (set_name) {
			set_name_len = strlen(set_name);
		}
	}

	uint8_t* key = NULL;
	uint32_t key_size = 0;

	if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) {
		if (! as_storage_record_get_key(rd)) {
			cf_info(AS_PROTO, "can't get key - skipping record");
			return -1;
		}

		key = rd->key;
		key_size = rd->key_size;
	}

	uint16_t n_fields = 2;
	int msg_sz = sizeof(as_msg);
	msg_sz += sizeof(as_msg_field) + sizeof(cf_digest);
	msg_sz += sizeof(as_msg_field) + ns_len;
	if (set_name) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + set_name_len;
	}
	if (key) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + key_size;
	}

	int list_bins   = 0;
	int in_use_bins = 0;
	if (rd) {
		in_use_bins = as_bin_inuse_count(rd);
	}

	if (nobindata == false) {
		if(binlist) {
			int binlist_sz = cf_vector_size(binlist);
			for(uint16_t i = 0; i < binlist_sz; i++) {
				char binname[AS_ID_BIN_SZ];
				cf_vector_get(binlist, i, (void*)&binname);
				cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname);
				as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname));
				if (!p_bin)
				{
					cf_debug(AS_PROTO, "To be projected bin |%s| not found \n", binname);
					continue;
				}
				cf_debug(AS_PROTO, "Adding bin |%s| to projected bins |%s| \n", binname);
				list_bins++;
				msg_sz += sizeof(as_msg_op);
				msg_sz += rd->ns->single_bin ? 0 : strlen(binname);
				uint32_t psz;
				if (as_bin_is_hidden(p_bin)) {
					psz = 0;
				} else {
					as_particle_tobuf(p_bin, 0, &psz); // get size
				}
				msg_sz += psz;
			}
		}
		else {
			msg_sz += sizeof(as_msg_op) * in_use_bins; // the bin headers
			for (uint16_t i = 0; i < in_use_bins; i++) {
				as_bin *p_bin = &rd->bins[i];
				msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id));
				uint32_t psz;
				if (as_bin_is_hidden(p_bin)) {
					psz = 0;
				} else {
					as_particle_tobuf(p_bin, 0, &psz); // get size
				}
				msg_sz += psz;
			}
		}
	}

	uint8_t *b;
	cf_buf_builder_reserve(bb_r, msg_sz, &b);

	// set up the header
	uint8_t *buf = b;
	as_msg *msgp = (as_msg *) buf;

	msgp->header_sz = sizeof(as_msg);
	msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0);
	msgp->info2 = 0;
	msgp->info3 = 0;
	msgp->unused = 0;
	msgp->result_code = 0;
	msgp->generation = r->generation;
	msgp->record_ttl = r->void_time;
	msgp->transaction_ttl = 0;
	msgp->n_fields = n_fields;
	if (rd) {
		if (binlist)
			msgp->n_ops = list_bins;
		else
			msgp->n_ops = in_use_bins;
	} else {
		msgp->n_ops = 0;
	}
	as_msg_swap_header(msgp);

	buf += sizeof(as_msg);

	as_msg_field *mf = (as_msg_field *) buf;
	mf->field_sz = sizeof(cf_digest) + 1;
	mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE;
	if (rd) {
		memcpy(mf->data, &rd->keyd, sizeof(cf_digest));
	} else {
		memcpy(mf->data, &r->key, sizeof(cf_digest));
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + sizeof(cf_digest);

	mf = (as_msg_field *) buf;
	mf->field_sz = ns_len + 1;
	mf->type = AS_MSG_FIELD_TYPE_NAMESPACE;
	if (rd) {
		memcpy(mf->data, rd->ns->name, ns_len);
	} else {
		memcpy(mf->data, nsname, ns_len);
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + ns_len;

	if (set_name) {
		mf = (as_msg_field *) buf;
		mf->field_sz = set_name_len + 1;
		mf->type = AS_MSG_FIELD_TYPE_SET;
		memcpy(mf->data, set_name, set_name_len);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + set_name_len;
	}

	if (key) {
		mf = (as_msg_field *) buf;
		mf->field_sz = key_size + 1;
		mf->type = AS_MSG_FIELD_TYPE_KEY;
		memcpy(mf->data, key, key_size);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + key_size;
	}

	if (nobindata) {
		goto Out;
	}

	if(binlist) {
		int binlist_sz = cf_vector_size(binlist);
		for(uint16_t i = 0; i < binlist_sz; i++) {

			char binname[AS_ID_BIN_SZ];
			cf_vector_get(binlist, i, (void*)&binname);
			cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname);
			as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname));
			if (!p_bin) // should it be checked before ???
				continue;

			as_msg_op *op = (as_msg_op *)buf;
			buf += sizeof(as_msg_op);

			op->op = AS_MSG_OP_READ;

			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin);
			buf += op->name_sz;

			// Since there are two variable bits, the size is everything after
			// the data bytes - and this is only the head, we're patching up
			// the rest in a minute.
			op->op_sz = 4 + op->name_sz;

			if (as_bin_inuse(p_bin)) {
				op->particle_type = as_particle_type_convert(as_bin_get_particle_type(p_bin));
				op->version = as_bin_get_version(p_bin, rd->ns->single_bin);

				uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety
				if (as_bin_is_hidden(p_bin)) {
                	op->particle_type = AS_PARTICLE_TYPE_NULL;
					psz = 0;
				} else {
					if (0 != as_particle_tobuf(p_bin, buf, &psz)) {
						cf_warning(AS_PROTO, "particle to buf: could not copy data!");
					}
				}
				buf += psz;
				op->op_sz += psz;
			}
			else {
				cf_debug(AS_PROTO, "Whoops !! bin not in use");
				op->particle_type = AS_PARTICLE_TYPE_NULL;
			}
			as_msg_swap_op(op);
		}
	}
	else {
		// over all bins, copy into the buffer
		for (uint16_t i = 0; i < in_use_bins; i++) {

			as_msg_op *op = (as_msg_op *)buf;
			buf += sizeof(as_msg_op);

			op->op = AS_MSG_OP_READ;

			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]);
			buf += op->name_sz;

			// Since there are two variable bits, the size is everything after
			// the data bytes - and this is only the head, we're patching up
			// the rest in a minute.
			op->op_sz = 4 + op->name_sz;

			if (as_bin_inuse(&rd->bins[i])) {
				op->particle_type = as_particle_type_convert(as_bin_get_particle_type(&rd->bins[i]));
				op->version = as_bin_get_version(&rd->bins[i], rd->ns->single_bin);

				uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety
				if (as_bin_is_hidden(&rd->bins[i])) {
                	op->particle_type = AS_PARTICLE_TYPE_NULL;
					psz = 0;
				} else {
					if (0 != as_particle_tobuf(&rd->bins[i], buf, &psz)) {
						cf_warning(AS_PROTO, "particle to buf: could not copy data!");
					}
				}
				buf += psz;
				op->op_sz += psz;
			}
			else {
				op->particle_type = AS_PARTICLE_TYPE_NULL;
			}
			as_msg_swap_op(op);
		}
	}
Out:
	return(0);
}
Exemple #5
0
// NB: this uses the same logic as the bufbuild function
// as_msg_make_response_bufbuilder() but does not build a buffer and simply
// returns sizing information.  This is required for query runtime memory
// accounting.
// returns -1 in case of error
// otherwise returns resize value
size_t as_msg_response_msgsize(as_record *r, as_storage_rd *rd, bool nobindata,
		char *nsname, bool use_sets, cf_vector *binlist)
{

	// Sanity checks. Either rd should be there or nobindata and nsname should be present.
	if (!(rd || (nobindata && nsname))) {
		cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record.");
		return -1;
	}

	// figure out the size of the entire buffer
	int         set_name_len = 0;
	const char *set_name     = NULL;
	int         ns_len       = rd ? strlen(rd->ns->name) : strlen(nsname);

	if (use_sets && as_index_get_set_id(r) != INVALID_SET_ID) {
		as_namespace *ns = NULL;

		if (rd) {
			ns = rd->ns;
		} else if (nsname) {
			ns = as_namespace_get_byname(nsname);
		}
		if (!ns) {
			cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record.");
			return -1;
		}
		set_name = as_index_get_set_name(r, ns);
		if (set_name) {
			set_name_len = strlen(set_name);
		}
	}

	int msg_sz = sizeof(as_msg);
	msg_sz += sizeof(as_msg_field) + sizeof(cf_digest);
	msg_sz += sizeof(as_msg_field) + ns_len;
	if (set_name) {
		msg_sz += sizeof(as_msg_field) + set_name_len;
	}

	int in_use_bins = as_bin_inuse_count(rd);
	int list_bins   = 0;

	if (nobindata == false) {
		if(binlist) {
			int binlist_sz = cf_vector_size(binlist);
			for(uint16_t i = 0; i < binlist_sz; i++) {
				char binname[AS_ID_BIN_SZ];
				cf_vector_get(binlist, i, (void*)&binname);
				cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname);
				as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname));
				if (!p_bin)
				{
					cf_debug(AS_PROTO, "To be projected bin |%s| not found \n", binname);
					continue;
				}
				cf_debug(AS_PROTO, "Adding bin |%s| to projected bins |%s| \n", binname);
				list_bins++;
				msg_sz += sizeof(as_msg_op);
				msg_sz += rd->ns->single_bin ? 0 : strlen(binname);
				uint32_t psz;
				as_particle_tobuf(p_bin, 0, &psz); // get size
				msg_sz += psz;
			}
		}
		else {
			msg_sz += sizeof(as_msg_op) * in_use_bins; // the bin headers
			for (uint16_t i = 0; i < in_use_bins; i++) {
				as_bin *p_bin = &rd->bins[i];
				msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id));
				uint32_t psz;
				as_particle_tobuf(p_bin, 0, &psz); // get size
				msg_sz += psz;
			}
		}
	}
	return msg_sz;
}
/*
 * Internal function: udf_aerospike__apply_update_atomic
 *
 * Parameters:
 * 		rec --	udf_record to be updated
 *
 * Return Values:
 * 		 0 success
 * 		-1 failure
 *
 * Description:
 * 		This function applies all the updates atomically. That is,
 * 		if one of the bin update/delete/create fails, the entire function
 * 		will fail. If the nth update fails, all the n-1 updates are rolled
 * 		back to their initial values
 *
 * 		Special Notes:
 * 		i. The basic checks of bin name being too long or if there is enough space
 * 		on the disk for the bin values is done before allocating space for any
 * 		of the bins.
 *
 * 		ii. If one of the updates to be rolled back is a bin creation,
 * 		udf_aerospike_delbin is called. This will not free up the bin metadata.
 * 		So there will be a small memory mismatch b/w replica (which did not get the
 * 		record at all and hence no memory is accounted) and the master will be seen.
 * 		To avoid such cases, we are doing checks upfront.
 *
 * 		Callers:
 * 		udf_aerospike__execute_updates
 * 		In this function, if udf_aerospike__apply_update_atomic fails, the record
 * 		is not committed to the storage. On success, record is closed which commits to
 * 		the storage and reopened for the next set of udf updates.
 * 		The return value from udf_aerospike__apply_update_atomic is passed on to the
 * 		callers of this function.
 */
int
udf_aerospike__apply_update_atomic(udf_record *urecord)
{
	int rc 					= 0;
	int failindex 			= 0;
	int new_bins 			= 0;	// How many new bins have to be created in this update
	as_storage_rd * rd		= urecord->rd;

	// This will iterate over all the updates and apply them to storage.
	// The items will remain, and be used as cache values. If an error
	// occurred during setbin(), we rollback all the operation which
	// is and return failure
	cf_detail(AS_UDF, "execute updates: %d updates", urecord->nupdates);

	// loop twice to make sure the updates are performed first so in case
	// something wrong it can be rolled back. The deletes will go through
	// successfully generally.

	// In first iteration, just calculate how many new bins need to be created
	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty ) {
			char *      k = urecord->updates[i].name;
			if ( k != NULL ) {
				if ( !as_bin_get(rd, (uint8_t *)k, strlen(k)) ) {
					new_bins++;
				}
			}
		}
	}
	// Free bins - total bins not in use in the record
	// Delta bins - new bins that need to be created
	int free_bins  = urecord->rd->n_bins - as_bin_inuse_count(urecord->rd);
	int delta_bins = new_bins - free_bins;
	cf_detail(AS_UDF, "Total bins %d, In use bins %d, Free bins %d , New bins %d, Delta bins %d",
			  urecord->rd->n_bins, as_bin_inuse_count(urecord->rd), free_bins, new_bins, delta_bins);

	// Allocate space for all the new bins that need to be created beforehand
	if (delta_bins > 0 && rd->ns->storage_data_in_memory && ! rd->ns->single_bin) {
		as_bin_allocate_bin_space(urecord->r_ref->r, rd, delta_bins);
	}

	bool has_sindex = as_sindex_ns_has_sindex(rd->ns); 
	if (has_sindex) {
		SINDEX_GRLOCK();
	}

	// In second iteration apply updates.
	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty && rc == 0) {

			char *      k = urecord->updates[i].name;
			as_val *    v = urecord->updates[i].value;
			bool        h = urecord->updates[i].ishidden;
			urecord->updates[i].oldvalue  = NULL;
			urecord->updates[i].washidden = false;

			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute update: position %d deletes bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					// Only case delete fails if bin is not found that is 
					// as good as delete. Ignore return code !!
					udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute update: position %d sets bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					rc = udf_aerospike_setbin(urecord, k, v, h);
					if (rc) {
						failindex = i;
						goto Rollback;
					}
				}
			}
		}
	}
	
	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ((urecord->updates[i].dirty)
				&& (urecord->updates[i].oldvalue)) {
			as_val_destroy(urecord->updates[i].oldvalue);
			cf_debug(AS_UDF, "REGULAR as_val_destroy()");
		}
	}
	// Commit successful do miscellaneous task
	// Set updated flag to true
	urecord->flag |= UDF_RECORD_FLAG_HAS_UPDATES;

	// Set up record to be flushed to storage
	urecord->rd->write_to_device = true;

	// Before committing to storage set the rec_type_bits ..
	cf_detail(AS_RW, "TO INDEX              Digest=%"PRIx64" bits %d %p",
		*(uint64_t *)&urecord->tr->keyd.digest[8], urecord->ldt_rectype_bits, urecord);
	as_index_set_flags(rd->r, urecord->ldt_rectype_bits);

	// Clean up cache and start from 0 update again. All the changes
	// made here will if flush from write buffer to storage goes
	// then will never be backed out.
	udf_record_cache_free(urecord);
	return rc;

Rollback:
	cf_debug(AS_UDF, "Rollback Called: FailIndex(%d)", failindex);
	for(int i = 0; i <= failindex; i++) {
		if (urecord->updates[i].dirty) {
			char *      k = urecord->updates[i].name;
			// Pick the oldvalue for rollback
			as_val *    v = urecord->updates[i].oldvalue;
			bool        h = urecord->updates[i].washidden;
			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute rollback: position %d deletes bin %s", i, k);
					rc = udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute rollback: position %d sets bin %s", i, k);
					rc = udf_aerospike_setbin(urecord, k, v, h);
					if (rc) {
						cf_warning(AS_UDF, "Rollback failed .. not good ... !!");
					}
				}
			}
			if (v) {
				as_val_destroy(v);
				cf_debug(AS_UDF, "ROLLBACK as_val_destroy()");
			}
		}
	}
	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// Do not clean up the cache in case of failure
	return -1;
}
/*
 * Internal function: udf_aerospike_setbin
 *
 * Parameters:
 * 		r 		-- udf_record to be manipulated
 * 		bname 	-- name of the bin to be deleted
 *		val		-- value to be updated with
 *
 * Return value:
 * 		0  on success
 * 	   -1  on failure
 *
 * Description:
 * 		The function sets the bin with the name
 * 		passed in as parameter to the value, passed as the third parameter.
 * 		Before updating the bin, it is checked if the value can fit in the storage
 *
 * 		Synchronization : object lock acquired by the transaction thread executing UDF.
 * 		Partition reservation takes place just before the transaction starts executing
 * 		( look for as_partition_reserve_udf in thr_tsvc.c )
 *
 * 		Callers:
 * 		udf_aerospike__apply_update_atomic
 * 		In this function, if it fails at the time of update, the record is set
 * 		to rollback all the updates till this point. The case where it fails in
 * 		rollback is not handled.
 *
 * 		Side Notes:
 * 		i.	write_to_device will be set to true on a successful bin update.
 * 		If all the updates from udf_aerospike__apply_update_atomic (including this) are
 * 		successful, the record will be written to disk and reopened so that the rest of
 * 		sets of updates can be applied.
 *
 * 		ii.	If put in sindex fails, we do not handle it.
 *
 * 		TODO make sure anything goes into setbin only if the bin value is
 * 		          changed
 */
static const int
udf_aerospike_setbin(udf_record * urecord, const char * bname, const as_val * val, bool is_hidden)
{
	if (bname == NULL || bname[0] == 0 ) {
		cf_warning(AS_UDF, "no bin name supplied");
		return -1;
	}

	uint8_t type = as_val_type(val);
	if (is_hidden &&
			((type != AS_MAP) && (type != AS_LIST))) {
		cf_warning(AS_UDF, "Hidden %d Type Not allowed", type);
		return -3;
	}

	size_t          blen    = strlen(bname);
	as_storage_rd * rd      = urecord->rd;
	as_transaction *tr      = urecord->tr;
	as_index_ref  * index   = urecord->r_ref;

	as_bin * b = as_bin_get(rd, (byte *)bname, blen);

	if ( !b && (blen > (AS_ID_BIN_SZ - 1 )
				|| !as_bin_name_within_quota(rd->ns, (byte *)bname, blen)) ) {
		// Can't write bin
		cf_warning(AS_UDF, "bin name %s too big. Bin not added", bname);
		return -1;
	}
	if ( !b ) {
		// See if there's a free one, the hope is you will always find the bin because
		// you have already allocated bin space before calling this function.
		b = as_bin_create(index->r, rd, (byte *)bname, blen, 0);
		if (!b) {
			cf_warning(AS_UDF, "ERROR: udf_aerospike_setbin: as_bin_create: bin not found, something went really wrong!");
			return -1;
		}
	}

	SINDEX_BINS_SETUP(oldbin, 1);
	SINDEX_BINS_SETUP(newbin, 1);
	bool needs_sindex_delete = false;
	bool needs_sindex_put    = false;
	bool needs_sindex_update = false;
	bool has_sindex          = as_sindex_ns_has_sindex(rd->ns);

	if (has_sindex
			&& (as_sindex_sbin_from_bin(rd->ns,
					as_index_get_set_name(rd->r, rd->ns),
					b, oldbin) == AS_SINDEX_OK)) {
		needs_sindex_delete = true;
	}

	// we know we are doing an update now, make sure there is particle data,
	// set to be 1 wblock size now @TODO!
	uint32_t pbytes = 0;
	int ret = 0;
	if (!rd->ns->storage_data_in_memory && !urecord->particle_data) {
		urecord->particle_data = cf_malloc(rd->ns->storage_write_block_size);
		urecord->cur_particle_data = urecord->particle_data;
		urecord->end_particle_data = urecord->particle_data + rd->ns->storage_write_block_size;
	}

	cf_detail(AS_UDF, "udf_setbin: bin %s type %d ", bname, type );

	switch(type) {
		case AS_STRING: {
			as_string * v   = as_string_fromval(val);
			byte *      s   = (byte *) as_string_tostring(v);
			size_t      l   = as_string_len(v);

			// Save for later.
			// cf_detail(AS_UDF, "udf_setbin: string: binname %s value is %s",bname,s);

			if ( !as_storage_bin_can_fit(rd->ns, l) ) {
				cf_warning(AS_UDF, "string: bin size too big");
				ret = -1;
				break;
			}
			if (rd->ns->storage_data_in_memory) {
				as_particle_frombuf(b, AS_PARTICLE_TYPE_STRING, s, l, NULL, true);
			} else {
				pbytes = l + as_particle_get_base_size(AS_PARTICLE_TYPE_STRING);
				if ((urecord->cur_particle_data + pbytes) < urecord->end_particle_data) {
					as_particle_frombuf(b, AS_PARTICLE_TYPE_STRING, s, l,
										urecord->cur_particle_data,
										rd->ns->storage_data_in_memory);
					urecord->cur_particle_data += pbytes;
				} else {
					cf_warning(AS_UDF, "string: bin data size too big: pbytes %d"
								" pdata %p cur_part+pbytes %p pend %p", pbytes,
								urecord->particle_data, urecord->cur_particle_data + pbytes,
								urecord->end_particle_data);
					ret = -1;
					break;
				}
			}
			break;
		}
		case AS_BYTES: {
			as_bytes *  v   = as_bytes_fromval(val);
			uint8_t *   s   = as_bytes_get(v);
			size_t      l   = as_bytes_size(v);

			if ( !as_storage_bin_can_fit(rd->ns, l) ) {
				cf_warning(AS_UDF, "bytes: bin size too big");
				ret = -1;
				break;
			}
			if (rd->ns->storage_data_in_memory) {
				as_particle_frombuf(b, AS_PARTICLE_TYPE_BLOB, s, l, NULL, true);
			} else {
				pbytes = l + as_particle_get_base_size(AS_PARTICLE_TYPE_BLOB);
				if ((urecord->cur_particle_data + pbytes) < urecord->end_particle_data) {
					as_particle_frombuf(b, AS_PARTICLE_TYPE_BLOB, s, l, urecord->cur_particle_data,
										rd->ns->storage_data_in_memory);
					urecord->cur_particle_data += pbytes;
				} else {
					cf_warning(AS_UDF, "bytes: bin data size too big pbytes %d"
								" pdata %p cur_part+pbytes %p pend %p", pbytes,
								urecord->particle_data, urecord->cur_particle_data + pbytes,
								urecord->end_particle_data);
					ret = -1;
					break;
				}
			}
			break;
		}
		case AS_BOOLEAN: {
			as_boolean *    v   = as_boolean_fromval(val);
			bool            d   = as_boolean_get(v);
			int64_t         i   = __be64_to_cpup((void *)&d);

			if ( !as_storage_bin_can_fit(rd->ns, 8) ) {
				cf_warning(AS_UDF, "bool: bin size too big");
				ret = -1;
				break;
			}
			if (rd->ns->storage_data_in_memory) {
				as_particle_frombuf(b, AS_PARTICLE_TYPE_INTEGER, (uint8_t *) &i, 8, NULL, true);
			} else {
				pbytes = 8 + as_particle_get_base_size(AS_PARTICLE_TYPE_INTEGER);
				if ((urecord->cur_particle_data + pbytes) < urecord->end_particle_data) {
					as_particle_frombuf(b, AS_PARTICLE_TYPE_INTEGER,
										(uint8_t *) &i, 8,
										urecord->cur_particle_data,
										rd->ns->storage_data_in_memory);
					urecord->cur_particle_data += pbytes;
				} else {
					cf_warning(AS_UDF, "bool: bin data size too big: pbytes %d %p %p %p",
								pbytes, urecord->particle_data, urecord->cur_particle_data,
								urecord->end_particle_data);
					ret = -1;
					break;
				}
			}
			break;
		}
		case AS_INTEGER: {
			as_integer *    v   = as_integer_fromval(val);
			int64_t         i   = as_integer_get(v);
			int64_t         j   = __be64_to_cpup((void *)&i);

			if ( !as_storage_bin_can_fit(rd->ns, 8) ) {
				cf_warning(AS_UDF, "int: bin size too big");
				ret = -1;
				break;
			}
			if (rd->ns->storage_data_in_memory) {
				as_particle_frombuf(b, AS_PARTICLE_TYPE_INTEGER, (uint8_t *) &j, 8, NULL, true);
			} else {
				pbytes = 8 + as_particle_get_base_size(AS_PARTICLE_TYPE_INTEGER);
				if ((urecord->cur_particle_data + pbytes) < urecord->end_particle_data) {
					as_particle_frombuf(b, AS_PARTICLE_TYPE_INTEGER,
										(uint8_t *) &j, 8, urecord->cur_particle_data,
										rd->ns->storage_data_in_memory);
					urecord->cur_particle_data += pbytes;
				} else {
					cf_warning(AS_UDF, "int: bin data size too big: pbytes %d %p %p %p",
								pbytes, urecord->particle_data, urecord->cur_particle_data,
								urecord->end_particle_data);
					ret = -1;
					break;
				}
			}
			break;
		}
		// @LDT : Possibly include AS_LDT in this list.  We need the LDT
		// bins to be updated by LDT lua calls, and that path takes us thru here.
		// However, we ALSO need to be able to set the particle type for the
		// bins -- so that requires extra processing here to take the LDT flags
		// and set the appropriate bin flags in the particle data.
		case AS_MAP:
		case AS_LIST: {
			as_buffer buf;
			as_buffer_init(&buf);
			as_serializer s;
			as_msgpack_init(&s);
			int rsp = 0;
			as_serializer_serialize(&s, (as_val *) val, &buf);

			if ( !as_storage_bin_can_fit(rd->ns, buf.size) ) {
				cf_warning(AS_UDF, "map-list: bin size too big");
				ret = -1;
				// Clean Up and jump out.
				as_serializer_destroy(&s);
				as_buffer_destroy(&buf);
				break; // can't continue if value too big.
			}
			uint8_t ptype;
			if(is_hidden) {
				ptype = as_particle_type_convert_to_hidden(to_particle_type(type));
			} else {
				ptype = to_particle_type(type);
			}
			if (rd->ns->storage_data_in_memory) {
				as_particle_frombuf(b, ptype, (uint8_t *) buf.data, buf.size, NULL, true);
			}
			else {
				pbytes = buf.size + as_particle_get_base_size(ptype);
				if ((urecord->cur_particle_data + pbytes) < urecord->end_particle_data) {
					as_particle_frombuf(b, ptype, (uint8_t *) buf.data, buf.size,
										urecord->cur_particle_data,	rd->ns->storage_data_in_memory);
					urecord->cur_particle_data += pbytes;
				} else {
					cf_warning(AS_UDF, "map-list: bin data size too big: pbytes %d %p %p %p",
								pbytes, urecord->particle_data, urecord->cur_particle_data,
								urecord->end_particle_data);
					rsp = -1;
				}
			}
			as_serializer_destroy(&s);
			as_buffer_destroy(&buf);
			if (rsp) {
				ret = rsp;
				break;
			}
			break;
		}
		default: {
			cf_warning(AS_UDF, "unrecognized object type %d, skipping", as_val_type(val) );
			break;
		}

	}

	// If something fail bailout
	if (ret) {
		as_sindex_sbin_freeall(oldbin, 1);
		as_sindex_sbin_freeall(newbin, 1);
		return ret;
	}

	// Update sindex if required
	if (has_sindex) {
		if (as_sindex_sbin_from_bin(rd->ns,
				as_index_get_set_name(rd->r, rd->ns), b, newbin) == AS_SINDEX_OK) {
			if (!as_sindex_sbin_match(newbin, oldbin)) {
				needs_sindex_put    = true;
			} else {
				needs_sindex_update = true;
			}
		}

		if (needs_sindex_update) {
			tr->flag |= AS_TRANSACTION_FLAG_SINDEX_TOUCHED;
			as_sindex_delete_by_sbin(rd->ns,
					as_index_get_set_name(rd->r, rd->ns), 1, oldbin, rd);
			as_sindex_put_by_sbin(rd->ns,
					as_index_get_set_name(rd->r, rd->ns), 1, newbin, rd);
		} else {
			if (needs_sindex_delete) {
				tr->flag |= AS_TRANSACTION_FLAG_SINDEX_TOUCHED;
				as_sindex_delete_by_sbin(rd->ns,
					as_index_get_set_name(rd->r, rd->ns), 1, oldbin, rd);
			}
			if (needs_sindex_put) {
				tr->flag |= AS_TRANSACTION_FLAG_SINDEX_TOUCHED;
				as_sindex_put_by_sbin(rd->ns,
					as_index_get_set_name(rd->r, rd->ns), 1, newbin, rd);
			}
		}
		as_sindex_sbin_freeall(oldbin, 1);
		as_sindex_sbin_freeall(newbin, 1);
	}
	return ret;
} // end udf_aerospike_setbin()
/*
 * Internal Function: udf_aerospike_delbin
 *
 * Parameters:
 * 		r 		- udf_record to be manipulated
 * 		bname 	- name of the bin to be deleted
 *
 * Return value:
 * 		0  on success
 * 	   -1  on failure
 *
 * Description:
 * 		The function deletes the bin with the name
 * 		passed in as parameter. The as_bin_destroy function
 * 		which is called here, only frees the data and
 * 		the bin is marked as not in use. The bin can then be reused later.
 *
 * 		Synchronization : object lock acquired by the transaction thread executing UDF.
 * 		Partition reservation takes place just before the transaction starts executing
 * 		( look for as_partition_reserve_udf in thr_tsvc.c )
 *
 * 		Callers:
 * 		udf_aerospike__apply_update_atomic
 * 		In this function, if it fails at the time of update, the record is set
 * 		to rollback all the updates till this point. The case where it fails in
 * 		rollback is not handled.
 *
 * 		Side Notes:
 * 		i.	write_to_device will be set to true on a successful bin destroy.
 * 		If all the updates from udf_aerospike__apply_update_atomic (including this) are
 * 		successful, the record will be written to disk and reopened so that the rest of
 * 		sets of updates can be applied.
 *
 * 		ii.	If delete from sindex fails, we do not handle it.
 */
static int
udf_aerospike_delbin(udf_record * urecord, const char * bname)
{
	// Check that bname is not completely invalid
	if ( !bname || !bname[0] ) {
		cf_warning(AS_UDF, "udf_aerospike_delbin: Invalid Parameters [No bin name supplied]... Fail");
		return -1;
	}

	as_storage_rd  *rd      = urecord->rd;
	as_transaction *tr      = urecord->tr;

	// Check quality of bname -- check that it is proper length, then make sure
	// that the bin exists.
	if (strlen(bname) >= AS_ID_BIN_SZ) {
		// Can't read bin if name too large.
		cf_warning(AS_UDF, "udf_aerospike_delbin: Invalid Parameters [bin name(%s) too big]... Fail", bname);
		return -1;
	}

	as_bin * b = as_bin_get(rd, bname);
	if ( !b ) {
		cf_debug(AS_UDF, "udf_aerospike_delbin: Invalid Operation [Bin name(%s) not found of delete]... Fail", bname);
		return -1;
	}

	const char * set_name = as_index_get_set_name(rd->r, rd->ns);
	
	bool has_sindex = as_sindex_ns_has_sindex(rd->ns);
	if (has_sindex) {
		SINDEX_GRLOCK();
	}
	SINDEX_BINS_SETUP(sbins, rd->ns->sindex_cnt);
	as_sindex * si_arr[rd->ns->sindex_cnt];
	int si_arr_index = 0;
	int sbins_populated  = 0;
	if (has_sindex) {
		si_arr_index += as_sindex_arr_lookup_by_set_binid_lockfree(rd->ns, set_name, b->id, &si_arr[si_arr_index]);
		sbins_populated += as_sindex_sbins_from_bin(rd->ns, set_name, b, sbins, AS_SINDEX_OP_DELETE);
		SINDEX_GUNLOCK();
	}

	int32_t i = as_bin_get_index(rd, bname);
	if (i != -1) {
		if (has_sindex) {
			if (sbins_populated > 0) {	
				tr->flags |= AS_TRANSACTION_FLAG_SINDEX_TOUCHED;
				as_sindex_update_by_sbin(rd->ns, as_index_get_set_name(rd->r, rd->ns), sbins, sbins_populated, &rd->keyd);
			}
		}
		as_bin_destroy(rd, i);
	} else {
		cf_warning(AS_UDF, "udf_aerospike_delbin: Internal Error [Deleting non-existing bin %s]... Fail", bname);
	}

	if (has_sindex) {
		as_sindex_sbin_freeall(sbins, sbins_populated);
		as_sindex_release_arr(si_arr, si_arr_index);
	}

	return 0;
}
/*
 * Internal function: udf_aerospike__apply_update_atomic
 *
 * Parameters:
 * 		rec --	udf_record to be updated
 *
 * Return Values:
 * 		 0 success
 * 		-1 failure
 *
 * Description:
 * 		This function applies all the updates atomically. That is,
 * 		if one of the bin update/delete/create fails, the entire function
 * 		will fail. If the nth update fails, all the n-1 updates are rolled
 * 		back to their initial values
 *
 * 		Special Notes:
 * 		i. The basic checks of bin name being too long or if there is enough space
 * 		on the disk for the bin values is done before allocating space for any
 * 		of the bins.
 *
 * 		ii. If one of the updates to be rolled back is a bin creation,
 * 		udf_aerospike_delbin is called. This will not free up the bin metadata.
 * 		So there will be a small memory mismatch b/w replica (which did not get the
 * 		record at all and hence no memory is accounted) and the master will be seen.
 * 		To avoid such cases, we are doing checks upfront.
 *
 * 		Callers:
 * 		udf_aerospike__execute_updates
 * 		In this function, if udf_aerospike__apply_update_atomic fails, the record
 * 		is not committed to the storage. On success, record is closed which commits to
 * 		the storage and reopened for the next set of udf updates.
 * 		The return value from udf_aerospike__apply_update_atomic is passed on to the
 * 		callers of this function.
 */
int
udf_aerospike__apply_update_atomic(udf_record *urecord)
{
	int rc						= 0;
	int failmax					= 0;
	int new_bins				= 0;	// How many new bins have to be created in this update
	as_storage_rd * rd			= urecord->rd;
	as_namespace * ns			= rd->ns;
	bool has_sindex				= as_sindex_ns_has_sindex(ns);
	bool is_record_dirty		= false;
	bool is_record_flag_dirty	= false;
	uint8_t old_index_flags		= as_index_get_flags(rd->r);
	uint8_t new_index_flags		= 0;

	// This will iterate over all the updates and apply them to storage.
	// The items will remain, and be used as cache values. If an error
	// occurred during setbin(), we rollback all the operation which
	// is and return failure
	cf_detail(AS_UDF, "execute updates: %d updates", urecord->nupdates);

	// loop twice to make sure the updates are performed first so in case
	// something wrong it can be rolled back. The deletes will go through
	// successfully generally.

	// In first iteration, just calculate how many new bins need to be created
	for(uint32_t i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty ) {
			char *      k = urecord->updates[i].name;
			if ( k != NULL ) {
				if ( !as_bin_get(rd, k) ) {
					new_bins++;
				}
			}
		}
	}
	// Free bins - total bins not in use in the record
	// Delta bins - new bins that need to be created
	int inuse_bins = as_bin_inuse_count(rd);
	int free_bins  = rd->n_bins - inuse_bins;
	int delta_bins = new_bins - free_bins;
	cf_detail(AS_UDF, "Total bins %d, In use bins %d, Free bins %d , New bins %d, Delta bins %d",
			  rd->n_bins, as_bin_inuse_count(urecord->rd), free_bins, new_bins, delta_bins);

	// Check bin usage limit.
	if ((inuse_bins + new_bins > UDF_RECORD_BIN_ULIMIT) ||
			(urecord->flag & UDF_RECORD_FLAG_TOO_MANY_BINS)) {
		cf_warning(AS_UDF, "bin limit of %d for UDF exceeded: %d bins in use, %d bins free, %s%d new bins needed",
				(int)UDF_RECORD_BIN_ULIMIT, inuse_bins, free_bins,
				(urecord->flag & UDF_RECORD_FLAG_TOO_MANY_BINS) ? ">" : "", new_bins);
		goto Rollback;
	}

	// Allocate space for all the new bins that need to be created beforehand
	if (delta_bins > 0 && rd->ns->storage_data_in_memory && ! rd->ns->single_bin) {
		as_bin_allocate_bin_space(urecord->r_ref->r, rd, delta_bins);
	}

	if (!rd->ns->storage_data_in_memory && !urecord->particle_data) {
		// 256 as upper bound on the LDT control bin, we may write version below
		// leave it at the end for its use
		urecord->particle_data = cf_malloc(rd->ns->storage_write_block_size + 256);
		urecord->cur_particle_data = urecord->particle_data;
		urecord->end_particle_data = urecord->particle_data + rd->ns->storage_write_block_size;
	}

	if (has_sindex) {
		SINDEX_GRLOCK();
	}

	// In second iteration apply updates.
	for(uint32_t i = 0; i < urecord->nupdates; i++ ) {
		urecord->updates[i].oldvalue  = NULL;
		urecord->updates[i].washidden = false;
		if ( urecord->updates[i].dirty && rc == 0) {

			char *      k = urecord->updates[i].name;
			as_val *    v = urecord->updates[i].value;
			bool        h = urecord->updates[i].ishidden;

			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute update: position %d deletes bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					// Only case delete fails if bin is not found that is 
					// as good as delete. Ignore return code !!
					udf_aerospike_delbin(urecord, k);

					if (urecord->dirty != NULL) {
						xdr_fill_dirty_bins(urecord->dirty);
					}
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute update: position %d sets bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					rc = udf_aerospike_setbin(urecord, i, k, v, h);
					if (rc) {
						if (urecord->updates[i].oldvalue) {
							as_val_destroy(urecord->updates[i].oldvalue);
							urecord->updates[i].oldvalue = NULL;
						} 
						failmax = i;
						goto Rollback;
					}

					if (urecord->dirty != NULL) {
						xdr_add_dirty_bin(ns, urecord->dirty, k, strlen(k));
					}
				}
			}

			is_record_dirty = true;
		}
	}

	if (urecord->ldt_rectype_bit_update) {
		if (urecord->ldt_rectype_bit_update < 0) {
			// ldt_rectype_bit_update is negative in case we want to reset the bits 
			uint8_t rectype_bits = urecord->ldt_rectype_bit_update * -1; 
			new_index_flags = old_index_flags & ~rectype_bits;
		} else { 
			new_index_flags = old_index_flags | urecord->ldt_rectype_bit_update;  
		} 

		if (new_index_flags != old_index_flags) {
			as_index_clear_flags(rd->r, old_index_flags);
			as_index_set_flags(rd->r, new_index_flags);
			is_record_flag_dirty = true;
			cf_detail_digest(AS_RW, &urecord->tr->keyd, "Setting index flags from %d to %d new flag %d", old_index_flags, new_index_flags, as_index_get_flags(rd->r));
		}
	}

	{
		// This is _NOT_ for writing to the storage but for simply performing sizing
		// calculation. If we know the upper bounds of size of rec_props.. we could 
		// avoid this work and check with that much correction ... 
		//
		// See
		//  - udf_rw_post_processing for building rec_props for replication
		//  - udf_record_close for building rec_props for writing it to storage
		size_t  rec_props_data_size = as_storage_record_rec_props_size(rd);
		uint8_t rec_props_data[rec_props_data_size];
		if (rec_props_data_size > 0) {
			as_storage_record_set_rec_props(rd, rec_props_data);
		}

		// Version is set in the end after record size check. Setting version won't change the size of
		// the record. And if it were before size check then this setting of version as well needs to
		// be backed out.
		// TODO: Add backout logic would work till very first create call of LDT end up crossing over
		// record boundary
		if (rd->ns->ldt_enabled && as_ldt_record_is_parent(rd->r)) {
			int rv = as_ldt_parent_storage_set_version(rd, urecord->lrecord->version, urecord->end_particle_data, __FILE__, __LINE__);
			if (rv < 0) {
				cf_warning(AS_LDT, "udf_aerospike__apply_update_atomic: Internal Error "
							" [Failed to set the version on storage rv=%d]... Fail",rv);
				goto Rollback;
			}
			// TODO - if size check below fails, won't write to device -
			// different behavior than write_to_device flag - OK?
			is_record_dirty = true;
		}

		if (! as_storage_record_size_and_check(rd)) {
			cf_warning(AS_UDF, "record failed storage size check, will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (cf_atomic32_get(rd->ns->stop_writes) == 1) {
			cf_warning(AS_UDF, "UDF failed by stop-writes, record will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (! as_storage_has_space(rd->ns)) {
			cf_warning(AS_UDF, "drives full, record will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (! is_valid_ttl(rd->ns, urecord->tr->msgp->msg.record_ttl)) {
			cf_warning(AS_UDF, "invalid ttl %u", urecord->tr->msgp->msg.record_ttl);
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}
	}

	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// If there were updates do miscellaneous successful commit
	// tasks
	if (is_record_dirty 
			|| is_record_flag_dirty
			|| (urecord->flag & UDF_RECORD_FLAG_METADATA_UPDATED)) {
		urecord->flag |= UDF_RECORD_FLAG_HAS_UPDATES; // will write to storage
	}
	urecord->ldt_rectype_bit_update = 0;

	// Clean up oldvalue cache and reset dirty. All the changes made 
	// here has made to the particle buffer. Nothing will now be backed out.
	for (uint32_t i = 0; i < urecord->nupdates; i++) {
		udf_record_bin * bin = &urecord->updates[i];
		if (bin->oldvalue != NULL ) {
			as_val_destroy(bin->oldvalue);
			bin->oldvalue = NULL;
		}
		bin->dirty    = false;
	}
	return rc;

Rollback:
	cf_debug(AS_UDF, "Rollback Called: failmax %d", failmax);
	for (int i = 0; i < failmax; i++) {
		if (urecord->updates[i].dirty) {
			char *      k = urecord->updates[i].name;
			// Pick the oldvalue for rollback
			as_val *    v = urecord->updates[i].oldvalue;
			bool        h = urecord->updates[i].washidden;
			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute rollback: position %d deletes bin %s", i, k);
					rc = udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute rollback: position %d sets bin %s", i, k);
					rc = udf_aerospike_setbin(urecord, i, k, v, h);
					if (rc) {
						cf_warning(AS_UDF, "Rollback failed .. not good ... !!");
					}
				}
			}
			if (v) {
				as_val_destroy(v);
				cf_debug(AS_UDF, "ROLLBACK as_val_destroy()");
			}
		}
	}

	if (is_record_dirty && urecord->dirty != NULL) {
		xdr_clear_dirty_bins(urecord->dirty);
	}

	if (is_record_flag_dirty) {
		as_index_clear_flags(rd->r, new_index_flags);
		as_index_set_flags(rd->r, old_index_flags);
		is_record_flag_dirty = false;
	}
	urecord->ldt_rectype_bit_update = 0;

	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// Reset the flat size in case the stuff is backedout !!! it should not
	// fail in the backout code ...
	if (! as_storage_record_size_and_check(rd)) {
		cf_warning(AS_LDT, "Does not fit even after rollback... it is trouble");
	}

	// Do not clean up the cache in case of failure
	return -1;
}
Exemple #10
0
int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd,
		cf_buf_builder **bb_r, bool nobindata, char *nsname, bool include_ldt_data,
		bool include_key, bool skip_empty_records, cf_vector *binlist)
{
	// Sanity checks. Either rd should be there or nobindata and nsname should be present.
	if (!(rd || (nobindata && nsname))) {
		cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record.");
		return 0;
	}

	// figure out the size of the entire buffer
	int         set_name_len = 0;
	const char *set_name     = NULL;
	int         ns_len       = rd ? strlen(rd->ns->name) : strlen(nsname);

	if (as_index_get_set_id(r) != INVALID_SET_ID) {
		as_namespace *ns = NULL;

		if (rd) {
			ns = rd->ns;
		} else if (nsname) {
			ns = as_namespace_get_byname(nsname);
		}
		if (!ns) {
			cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record.");
			return -1;
		}
		set_name = as_index_get_set_name(r, ns);
		if (set_name) {
			set_name_len = strlen(set_name);
		}
	}

	uint8_t* key = NULL;
	uint32_t key_size = 0;

	if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) {
		if (! as_storage_record_get_key(rd)) {
			cf_info(AS_PROTO, "can't get key - skipping record");
			return -1;
		}

		key = rd->key;
		key_size = rd->key_size;
	}

	uint16_t n_fields = 2;
	int msg_sz = sizeof(as_msg);
	msg_sz += sizeof(as_msg_field) + sizeof(cf_digest);
	msg_sz += sizeof(as_msg_field) + ns_len;
	if (set_name) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + set_name_len;
	}
	if (key) {
		n_fields++;
		msg_sz += sizeof(as_msg_field) + key_size;
	}

	int list_bins   = 0;
	int in_use_bins = rd ? (int)as_bin_inuse_count(rd) : 0;
	as_val *ldt_bin_vals[in_use_bins];

	if (! nobindata) {
		if (binlist) {
			int binlist_sz = cf_vector_size(binlist);

			for (uint16_t i = 0; i < binlist_sz; i++) {
				char binname[AS_ID_BIN_SZ];

				cf_vector_get(binlist, i, (void*)&binname);

				as_bin *p_bin = as_bin_get(rd, binname);

				if (! p_bin) {
					continue;
				}

				msg_sz += sizeof(as_msg_op);
				msg_sz += rd->ns->single_bin ? 0 : strlen(binname);

				if (as_bin_is_hidden(p_bin)) {
					if (include_ldt_data) {
						msg_sz += (int)as_ldt_particle_client_value_size(rd, p_bin, &ldt_bin_vals[list_bins]);
					}
					else {
						ldt_bin_vals[list_bins] = NULL;
					}
				}
				else {
					msg_sz += (int)as_bin_particle_client_value_size(p_bin);
				}

				list_bins++;
			}

			// Don't return an empty record.
			if (skip_empty_records && list_bins == 0) {
				return 0;
			}
		}
		else {
			msg_sz += sizeof(as_msg_op) * in_use_bins;

			for (uint16_t i = 0; i < in_use_bins; i++) {
				as_bin *p_bin = &rd->bins[i];

				msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id));

				if (as_bin_is_hidden(p_bin)) {
					if (include_ldt_data) {
						msg_sz += (int)as_ldt_particle_client_value_size(rd, p_bin, &ldt_bin_vals[i]);
					}
					else {
						ldt_bin_vals[i] = NULL;
					}
				}
				else {
					msg_sz += (int)as_bin_particle_client_value_size(p_bin);
				}
			}
		}
	}

	uint8_t *b;
	cf_buf_builder_reserve(bb_r, msg_sz, &b);

	// set up the header
	uint8_t *buf = b;
	as_msg *msgp = (as_msg *) buf;

	msgp->header_sz = sizeof(as_msg);
	msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0);
	msgp->info2 = 0;
	msgp->info3 = 0;
	msgp->unused = 0;
	msgp->result_code = 0;
	msgp->generation = r->generation;
	msgp->record_ttl = r->void_time;
	msgp->transaction_ttl = 0;
	msgp->n_fields = n_fields;
	if (rd) {
		if (binlist)
			msgp->n_ops = list_bins;
		else
			msgp->n_ops = in_use_bins;
	} else {
		msgp->n_ops = 0;
	}
	as_msg_swap_header(msgp);

	buf += sizeof(as_msg);

	as_msg_field *mf = (as_msg_field *) buf;
	mf->field_sz = sizeof(cf_digest) + 1;
	mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE;
	if (rd) {
		memcpy(mf->data, &rd->keyd, sizeof(cf_digest));
	} else {
		memcpy(mf->data, &r->key, sizeof(cf_digest));
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + sizeof(cf_digest);

	mf = (as_msg_field *) buf;
	mf->field_sz = ns_len + 1;
	mf->type = AS_MSG_FIELD_TYPE_NAMESPACE;
	if (rd) {
		memcpy(mf->data, rd->ns->name, ns_len);
	} else {
		memcpy(mf->data, nsname, ns_len);
	}
	as_msg_swap_field(mf);
	buf += sizeof(as_msg_field) + ns_len;

	if (set_name) {
		mf = (as_msg_field *) buf;
		mf->field_sz = set_name_len + 1;
		mf->type = AS_MSG_FIELD_TYPE_SET;
		memcpy(mf->data, set_name, set_name_len);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + set_name_len;
	}

	if (key) {
		mf = (as_msg_field *) buf;
		mf->field_sz = key_size + 1;
		mf->type = AS_MSG_FIELD_TYPE_KEY;
		memcpy(mf->data, key, key_size);
		as_msg_swap_field(mf);
		buf += sizeof(as_msg_field) + key_size;
	}

	if (nobindata) {
		return 0;
	}

	if (binlist) {
		list_bins = 0;

		int binlist_sz = cf_vector_size(binlist);

		for (uint16_t i = 0; i < binlist_sz; i++) {
			char binname[AS_ID_BIN_SZ];
			cf_vector_get(binlist, i, (void*)&binname);

			as_bin *p_bin = as_bin_get(rd, binname);

			if (! p_bin) {
				continue;
			}

			as_msg_op *op = (as_msg_op *)buf;

			op->op = AS_MSG_OP_READ;
			op->version = 0;
			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin);
			op->op_sz = 4 + op->name_sz;

			buf += sizeof(as_msg_op) + op->name_sz;

			if (as_bin_is_hidden(p_bin)) {
				buf += as_ldt_particle_to_client(ldt_bin_vals[list_bins], op);
			}
			else {
				buf += as_bin_particle_to_client(p_bin, op);
			}

			list_bins++;

			as_msg_swap_op(op);
		}
	}
	else {
		for (uint16_t i = 0; i < in_use_bins; i++) {
			as_msg_op *op = (as_msg_op *)buf;

			op->op = AS_MSG_OP_READ;
			op->version = 0;
			op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]);
			op->op_sz = 4 + op->name_sz;

			buf += sizeof(as_msg_op) + op->name_sz;

			if (as_bin_is_hidden(&rd->bins[i])) {
				buf += as_ldt_particle_to_client(ldt_bin_vals[i], op);
			}
			else {
				buf += as_bin_particle_to_client(&rd->bins[i], op);
			}

			as_msg_swap_op(op);
		}
	}

	return 0;
}