int
ldt_aerospike_crec_close(const as_aerospike * as, const as_rec *crec_p)
{
	cf_detail(AS_LDT, "[ENTER] as(%p) subrec(%p)", as, crec_p );
	if (!as || !crec_p) {
		cf_warning(AS_LDT, "ldt_aerospike_crec_close: Invalid Parameters [as=%p, subrecord=%p]... Fail", as, crec_p);
		return 2;
	}

	// Close of the record is only allowed if the user has not updated
	// it. Other wise it is a group commit
	udf_record *c_urecord = (udf_record *)as_rec_source(crec_p);
	if (!c_urecord) {
		cf_warning(AS_LDT, "ldt_aerospike_crec_close: Internal Error [Malformed Sub Record]... Fail");
		return -1;
	}
	ldt_record  *lrecord  = (ldt_record *)c_urecord->lrecord;
	if (!lrecord) {
		cf_warning(AS_LDT, "ldt_aerospike_crec_close: Internal Error [Invalid Head Record Reference in Sub Record]... Fail");
		return -1;
	}

	ldt_slot *lslotp   = slot_lookup_by_urec(lrecord, crec_p);
	if (!lslotp) {
		cf_warning(AS_LDT, "ldt_aerospike_crec_close: Invalid Operation [Sub Record close called for the record which is not open]... Fail");
		return -1;
	}
	cf_detail(AS_LDT, "ldt_aerospike_crec_close");
	if (c_urecord->flag & UDF_RECORD_FLAG_HAS_UPDATES) {
		cf_debug(AS_LDT, "Cannot close record with update ... it needs group commit");
		return -2;
	}
	udf_record_close(c_urecord);
	udf_record_cache_free(c_urecord);
	slot_destroy(lslotp, lrecord);
	c_urecord->flag &= ~UDF_RECORD_FLAG_ISVALID;
	return 0;
}
Beispiel #2
0
/*
 * Function: Close storage record for udf record. Release
 *           all locks and partition reservation / namespace
 *           reservation etc. if requested.
 *           Also cleans up entire cache (updated from udf)
 *
 * Parameters:
 * 		urec       : UDF record being operated on
 *
 * Return value : Nothing
 *
 * Callers:
 * 		query_agg_istream_read
 * 		ldt_aerospike_crec_close
 * 		as_query__agg
 * 		udf_record_destroy
 */
void
udf_record_close(udf_record *urecord)
{
	as_transaction *tr    = urecord->tr;
	cf_debug_digest(AS_UDF, &tr->keyd, "[ENTER] Closing record key:");

	if (urecord->flag & UDF_RECORD_FLAG_OPEN) {
		as_index_ref   *r_ref = urecord->r_ref;
		cf_detail(AS_UDF, "Closing %sRecord",
				  (urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) ? "Sub" : "");
		udf_storage_record_close(urecord);
		as_record_done(r_ref, tr->rsv.ns);
		urecord->flag &= ~UDF_RECORD_FLAG_OPEN;
		cf_detail_digest(AS_UDF, &urecord->tr->keyd,
			"Storage Close:: Rec(%p) Flag(%x) Digest:", urecord, urecord->flag );
	}

	// Replication happens when the main record replicates
	if (urecord->particle_data) {
		cf_free(urecord->particle_data);
		urecord->particle_data = 0;
	}
	udf_record_cache_free(urecord);
}
/*
 * Internal function: udf_aerospike__apply_update_atomic
 *
 * Parameters:
 * 		rec --	udf_record to be updated
 *
 * Return Values:
 * 		 0 success
 * 		-1 failure
 *
 * Description:
 * 		This function applies all the updates atomically. That is,
 * 		if one of the bin update/delete/create fails, the entire function
 * 		will fail. If the nth update fails, all the n-1 updates are rolled
 * 		back to their initial values
 *
 * 		Special Notes:
 * 		i. The basic checks of bin name being too long or if there is enough space
 * 		on the disk for the bin values is done before allocating space for any
 * 		of the bins.
 *
 * 		ii. If one of the updates to be rolled back is a bin creation,
 * 		udf_aerospike_delbin is called. This will not free up the bin metadata.
 * 		So there will be a small memory mismatch b/w replica (which did not get the
 * 		record at all and hence no memory is accounted) and the master will be seen.
 * 		To avoid such cases, we are doing checks upfront.
 *
 * 		Callers:
 * 		udf_aerospike__execute_updates
 * 		In this function, if udf_aerospike__apply_update_atomic fails, the record
 * 		is not committed to the storage. On success, record is closed which commits to
 * 		the storage and reopened for the next set of udf updates.
 * 		The return value from udf_aerospike__apply_update_atomic is passed on to the
 * 		callers of this function.
 */
int
udf_aerospike__apply_update_atomic(udf_record *urecord)
{
	int rc 					= 0;
	int failindex 			= 0;
	int new_bins 			= 0;	// How many new bins have to be created in this update
	as_storage_rd * rd		= urecord->rd;

	// This will iterate over all the updates and apply them to storage.
	// The items will remain, and be used as cache values. If an error
	// occurred during setbin(), we rollback all the operation which
	// is and return failure
	cf_detail(AS_UDF, "execute updates: %d updates", urecord->nupdates);

	// loop twice to make sure the updates are performed first so in case
	// something wrong it can be rolled back. The deletes will go through
	// successfully generally.

	// In first iteration, just calculate how many new bins need to be created
	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty ) {
			char *      k = urecord->updates[i].name;
			if ( k != NULL ) {
				if ( !as_bin_get(rd, (uint8_t *)k, strlen(k)) ) {
					new_bins++;
				}
			}
		}
	}
	// Free bins - total bins not in use in the record
	// Delta bins - new bins that need to be created
	int free_bins  = urecord->rd->n_bins - as_bin_inuse_count(urecord->rd);
	int delta_bins = new_bins - free_bins;
	cf_detail(AS_UDF, "Total bins %d, In use bins %d, Free bins %d , New bins %d, Delta bins %d",
			  urecord->rd->n_bins, as_bin_inuse_count(urecord->rd), free_bins, new_bins, delta_bins);

	// Allocate space for all the new bins that need to be created beforehand
	if (delta_bins > 0 && rd->ns->storage_data_in_memory && ! rd->ns->single_bin) {
		as_bin_allocate_bin_space(urecord->r_ref->r, rd, delta_bins);
	}

	bool has_sindex = as_sindex_ns_has_sindex(rd->ns); 
	if (has_sindex) {
		SINDEX_GRLOCK();
	}

	// In second iteration apply updates.
	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty && rc == 0) {

			char *      k = urecord->updates[i].name;
			as_val *    v = urecord->updates[i].value;
			bool        h = urecord->updates[i].ishidden;
			urecord->updates[i].oldvalue  = NULL;
			urecord->updates[i].washidden = false;

			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute update: position %d deletes bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					// Only case delete fails if bin is not found that is 
					// as good as delete. Ignore return code !!
					udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute update: position %d sets bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					rc = udf_aerospike_setbin(urecord, k, v, h);
					if (rc) {
						failindex = i;
						goto Rollback;
					}
				}
			}
		}
	}
	
	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	for(int i = 0; i < urecord->nupdates; i++ ) {
		if ((urecord->updates[i].dirty)
				&& (urecord->updates[i].oldvalue)) {
			as_val_destroy(urecord->updates[i].oldvalue);
			cf_debug(AS_UDF, "REGULAR as_val_destroy()");
		}
	}
	// Commit successful do miscellaneous task
	// Set updated flag to true
	urecord->flag |= UDF_RECORD_FLAG_HAS_UPDATES;

	// Set up record to be flushed to storage
	urecord->rd->write_to_device = true;

	// Before committing to storage set the rec_type_bits ..
	cf_detail(AS_RW, "TO INDEX              Digest=%"PRIx64" bits %d %p",
		*(uint64_t *)&urecord->tr->keyd.digest[8], urecord->ldt_rectype_bits, urecord);
	as_index_set_flags(rd->r, urecord->ldt_rectype_bits);

	// Clean up cache and start from 0 update again. All the changes
	// made here will if flush from write buffer to storage goes
	// then will never be backed out.
	udf_record_cache_free(urecord);
	return rc;

Rollback:
	cf_debug(AS_UDF, "Rollback Called: FailIndex(%d)", failindex);
	for(int i = 0; i <= failindex; i++) {
		if (urecord->updates[i].dirty) {
			char *      k = urecord->updates[i].name;
			// Pick the oldvalue for rollback
			as_val *    v = urecord->updates[i].oldvalue;
			bool        h = urecord->updates[i].washidden;
			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute rollback: position %d deletes bin %s", i, k);
					rc = udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute rollback: position %d sets bin %s", i, k);
					rc = udf_aerospike_setbin(urecord, k, v, h);
					if (rc) {
						cf_warning(AS_UDF, "Rollback failed .. not good ... !!");
					}
				}
			}
			if (v) {
				as_val_destroy(v);
				cf_debug(AS_UDF, "ROLLBACK as_val_destroy()");
			}
		}
	}
	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// Do not clean up the cache in case of failure
	return -1;
}
Beispiel #4
0
/* Internal Function: Does the post processing for the UDF record after the
 *					  UDF execution. Does the following:
 *		1. Record is closed
 *		2. urecord_op is updated to delete in case there is no bin left in it.
 *		3. record->pickled_buf is populated before the record is close in case
 *		   it was write operation
 *		4. UDF updates cache is cleared
 *
 *	Returns: Nothing
 *
 *	Parameters: urecord          - UDF record to operate on
 *				urecord_op (out) - Populated with the optype
 */
void
udf_rw_post_processing(udf_record *urecord, udf_optype *urecord_op, uint16_t set_id)
{
	as_storage_rd      *rd   = urecord->rd;
	as_transaction     *tr   = urecord->tr;
	as_index_ref    *r_ref   = urecord->r_ref;

	// INIT
	urecord->pickled_buf     = NULL;
	urecord->pickled_sz      = 0;
	urecord->pickled_void_time     = 0;
	as_rec_props_clear(&urecord->pickled_rec_props);
	bool udf_xdr_ship_op = false;

	// TODO: optimize not to allocate buffer if it is single
	// node cluster. No remote to send data to
	// Check if UDF has updates.
	if (urecord->flag & UDF_RECORD_FLAG_HAS_UPDATES) {
		// Check if the record is not deleted after an update
		if ( urecord->flag & UDF_RECORD_FLAG_OPEN) {
			*urecord_op = UDF_OPTYPE_WRITE;
			udf_xdr_ship_op = true;
		} 
		else {
			// If the record has updates and it is not open, 
			// and if it pre-existed it's an update followed by a delete.
			if ( urecord->flag & UDF_RECORD_FLAG_PREEXISTS) {
				*urecord_op = UDF_OPTYPE_DELETE;
				udf_xdr_ship_op = true;
			} 
			// If the record did not pre-exist and is updated
			// and it is not open, then it is create followed by
			// delete essentially no_op.
			else {
				*urecord_op = UDF_OPTYPE_NONE;
			}
		}
	} else if ((urecord->flag & UDF_RECORD_FLAG_PREEXISTS)
			   && !(urecord->flag & UDF_RECORD_FLAG_OPEN)) {
		*urecord_op  = UDF_OPTYPE_DELETE;
		udf_xdr_ship_op = true;
	} else {
		*urecord_op  = UDF_OPTYPE_READ;
	}

	cf_detail(AS_UDF, "FINISH working with LDT Record %p %p %p %p %d", &urecord,
			urecord->tr, urecord->r_ref, urecord->rd,
			(urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN));

	// If there exists a record reference but no bin of the record is in use,
	// delete the record. remove from the tree. Only LDT_RECORD here not needed
	// for LDT_SUBRECORD (only do it if requested by UDF). All the SUBRECORD of
	// removed LDT_RECORD will be lazily cleaned up by defrag.
	if (!(urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD)
			&& urecord->flag & UDF_RECORD_FLAG_OPEN
			&& !as_bin_inuse_has(rd)) {
		as_index_delete(tr->rsv.tree, &tr->keyd);
		urecord->starting_memory_bytes = 0;
		*urecord_op                    = UDF_OPTYPE_DELETE;
		udf_xdr_ship_op = true;
	} else if (*urecord_op == UDF_OPTYPE_WRITE)	{
		cf_detail(AS_UDF, "Committing Changes %"PRIx64" n_bins %d", rd->keyd, as_bin_get_n_bins(r_ref->r, rd));

		size_t  rec_props_data_size = as_storage_record_rec_props_size(rd);
		uint8_t rec_props_data[rec_props_data_size];
		if (rec_props_data_size > 0) {
			as_storage_record_set_rec_props(rd, rec_props_data);
		}

		write_local_post_processing(tr, tr->rsv.ns, NULL, &urecord->pickled_buf,
			&urecord->pickled_sz, &urecord->pickled_void_time,
			&urecord->pickled_rec_props, true/*increment_generation*/,
			NULL, r_ref->r, rd, urecord->starting_memory_bytes);

		// Now ok to accommodate a new stored key...
		if (! as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && rd->key) {
			if (rd->ns->storage_data_in_memory) {
				as_record_allocate_key(r_ref->r, rd->key, rd->key_size);
			}

			as_index_set_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED);
		}
		// ... or drop a stored key.
		else if (as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && ! rd->key) {
			if (rd->ns->storage_data_in_memory) {
				as_record_remove_key(r_ref->r);
			}

			as_index_clear_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED);
		}
	}

	// Collect the record information (for XDR) before closing the record
	as_generation generation = 0;
	if (urecord->flag & UDF_RECORD_FLAG_OPEN) {
		generation = r_ref->r->generation;
		set_id = as_index_get_set_id(r_ref->r);
	}
	// Close the record for all the cases
	udf_record_close(urecord, false);

	// Write to XDR pipe after closing the record, in order to release the record lock as
	// early as possible.
	if (udf_xdr_ship_op == true) {
		if (UDF_OP_IS_WRITE(*urecord_op)) {
			cf_detail(AS_UDF, "UDF write shipping for key %" PRIx64, tr->keyd);
			xdr_write(tr->rsv.ns, tr->keyd, generation, 0, false, set_id);
		} else if (UDF_OP_IS_DELETE(*urecord_op)) {
			cf_detail(AS_UDF, "UDF delete shipping for key %" PRIx64, tr->keyd);
			xdr_write(tr->rsv.ns, tr->keyd, generation, 0, true, set_id);
		}
	}

	// Replication happens when the main record replicates
	if (urecord->particle_data) {
		cf_free(urecord->particle_data);
		urecord->particle_data = 0;
	}
	udf_record_cache_free(urecord);
}