C++ (Cpp) cf_atomic32_get Examples

Programming Language: C++ (Cpp)

Method/Function: cf_atomic32_get

Examples at hotexamples.com: 5

C++ (Cpp) cf_atomic32_get - 5 examples found. These are the top rated real world C++ (Cpp) examples of cf_atomic32_get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: cl_scan2.c Project: benjolitz/aerospike-client-c

void
cl_cluster_scan_shutdown(cl_cluster* asc)
{
	// Check whether we ever (lazily) initialized scan machinery.
	if (cf_atomic32_get(asc->scan_initialized) == 0 && ! asc->scan_q) {
		return;
	}

	// This tells the worker threads to stop. We do this (instead of using a
	// "running" flag) to allow the workers to "wait forever" on processing the
	// work dispatch queue, which has minimum impact when the queue is empty.
	// This also means all queued requests get processed when shutting down.
	for (int i = 0; i < NUM_SCAN_THREADS; i++) {
		cl_scan_task task;
		task.asc = NULL;
		cf_queue_push(asc->scan_q, &task);
	}

	for (int i = 0; i < NUM_SCAN_THREADS; i++) {
		pthread_join(asc->scan_threads[i], NULL);
	}

	cf_queue_destroy(asc->scan_q);
	asc->scan_q = NULL;
	cf_atomic32_set(&asc->scan_initialized, 0);
}

Example #2

Show file

File: namespace.c Project: Steve888888/aerospike-server

static void
append_set_props(as_set *p_set, cf_dyn_buf *db)
{
	cf_dyn_buf_append_string(db, "n_objects=");
	cf_dyn_buf_append_uint64(db, cf_atomic64_get(p_set->num_elements));
	cf_dyn_buf_append_char(db, ':');
	cf_dyn_buf_append_string(db, "n-bytes-memory=");
	cf_dyn_buf_append_uint64(db, cf_atomic64_get(p_set->n_bytes_memory));
	cf_dyn_buf_append_char(db, ':');
	cf_dyn_buf_append_string(db, "stop-writes-count=");
	cf_dyn_buf_append_uint64(db, cf_atomic64_get(p_set->stop_writes_count));
	cf_dyn_buf_append_char(db, ':');
	cf_dyn_buf_append_string(db, "set-enable-xdr=");
	if (cf_atomic32_get(p_set->enable_xdr) == AS_SET_ENABLE_XDR_TRUE) {
		cf_dyn_buf_append_string(db, "true");
	}
	else if (cf_atomic32_get(p_set->enable_xdr) == AS_SET_ENABLE_XDR_FALSE) {
		cf_dyn_buf_append_string(db, "false");
	}
	else if (cf_atomic32_get(p_set->enable_xdr) == AS_SET_ENABLE_XDR_DEFAULT) {
		cf_dyn_buf_append_string(db, "use-default");
	}
	else {
		cf_dyn_buf_append_uint32(db, cf_atomic32_get(p_set->enable_xdr));
	}
	cf_dyn_buf_append_char(db, ':');
	cf_dyn_buf_append_string(db, "disable-eviction=");
	if (IS_SET_EVICTION_DISABLED(p_set)) {
		cf_dyn_buf_append_string(db, "true");
	}
	else {
		cf_dyn_buf_append_string(db, "false");
	}
	cf_dyn_buf_append_char(db, ':');
	cf_dyn_buf_append_string(db, "set-delete=");
	if (IS_SET_DELETED(p_set)) {
		cf_dyn_buf_append_string(db, "true");
	}
	else {
		cf_dyn_buf_append_string(db, "false");
	}
	cf_dyn_buf_append_char(db, ';');
}

Example #3

Show file

File: cl_async.c Project: chango/aerospike-client-c

static void* 
async_receiver_fn(void *thdata)
{
	int 		rv = -1;
	bool 		network_error = false;
	cl_async_work	*workitem = NULL;
	// cl_async_work	*tmpworkitem = NULL;
	as_msg 		msg;
	cf_queue	*q_to_use = NULL;
	cl_cluster_node	*thisnode = NULL;

	uint8_t		rd_stack_buf[STACK_BUF_SZ];	
	uint8_t		*rd_buf = rd_stack_buf;
	size_t		rd_buf_sz = 0;

	uint64_t	acktrid;
	// uint64_t	starttime, endtime;
	int		progress_timeout_ms;
	unsigned int 	thread_id = cf_atomic32_incr(&g_thread_count);

	if (thdata == NULL) {
		q_to_use = g_cl_async_q;
	} else {
		thisnode = (cl_cluster_node *)thdata;
		q_to_use = thisnode->asyncwork_q;
	}
    
	//Infinite loop which keeps picking work items from the list and try to find the end result 
	while(1) {
		network_error = false;
#if ONEASYNCFD
		if(thisnode->dunned == true) {
			do {
				rv = cf_queue_pop(thisnode->asyncwork_q, &workitem, CF_QUEUE_NOWAIT);
				if (rv == CF_QUEUE_OK) {
					cl_cluster_node_put(thisnode);
					free(workitem);
				}
			} while (rv == CF_QUEUE_OK);

			//We want to delete all the workitems of this node
			shash_reduce_delete(g_cl_async_hashtab, cl_del_node_asyncworkitems, thisnode);
			break;
		}
#endif
		//This call will block if there is no element in the queue
		cf_queue_pop(q_to_use, &workitem, CF_QUEUE_FOREVER);
		//TODO: What if the node gets dunned while this pop call is blocked ?
#if ONEASYNCFD
		//cf_debug("Elements remaining in this node's queue=%d, Hash table size=%d",
		//		cf_queue_sz(thisnode->asyncwork_q), shash_get_size(g_cl_async_hashtab));
#endif

		// If we have no progress in 50ms, we should move to the next workitem 
		// and revisit this workitem at a later stage
		progress_timeout_ms = DEFAULT_PROGRESS_TIMEOUT;

		// Read into this fine cl_msg, which is the short header
		rv = cf_socket_read_timeout(workitem->fd, (uint8_t *) &msg, sizeof(as_msg), workitem->deadline, progress_timeout_ms);
		if (rv) {
#if DEBUG
			cf_debug("Citrusleaf: error when reading header from server - rv %d fd %d", rv, workitem->fd);
#endif
			if (rv != ETIMEDOUT) {
				cf_error("Citrusleaf: error when reading header from server - rv %d fd %d",rv,workitem->fd);
				network_error = true;
				goto Error;
			} else {
				goto Retry;
			}

		}
#ifdef DEBUG_VERBOSE
		dump_buf("read header from cluster", (uint8_t *) &msg, sizeof(cl_msg));
#endif
		cl_proto_swap(&msg.proto);
		cl_msg_swap_header(&msg.m);

		// second read for the remainder of the message 
		rd_buf_sz =  msg.proto.sz  - msg.m.header_sz;
		if (rd_buf_sz > 0) {
			if (rd_buf_sz > sizeof(rd_stack_buf)) {
				rd_buf = malloc(rd_buf_sz);
				if (!rd_buf) {
					cf_error("malloc fail: trying %zu",rd_buf_sz);
					rv = -1; 
					goto Error; 
				}
			}

			rv = cf_socket_read_timeout(workitem->fd, rd_buf, rd_buf_sz, workitem->deadline, progress_timeout_ms);
			if (rv) {
				//We already read some part of the message before but failed to read the
				//remaining data for whatever reason (network error or timeout). We cannot
				//reread as we already read partial data. Declare this as error.
				cf_error("Timeout after reading the header but before reading the body");
				goto Error;
			}
#ifdef DEBUG_VERBOSE
			dump_buf("read body from cluster", rd_buf, rd_buf_sz);
#endif	
		}

		rv = CITRUSLEAF_OK;
		goto Ok;

Retry:
		//We are trying to postpone the reading
		if (workitem->deadline && workitem->deadline < cf_getms()) {
			cf_error("async receiver: out of time : deadline %"PRIu64" now %"PRIu64,
					workitem->deadline, cf_getms());
			//cf_error("async receiver: Workitem missed the final deadline");
			rv = CITRUSLEAF_FAIL_TIMEOUT;
			goto Error;
		} else {
			//We have time. Push the element back to the queue to be considered later
			cf_queue_push(q_to_use, &workitem);
		}

		//If we allocated memory in this loop, release it.
		if (rd_buf && (rd_buf != rd_stack_buf)) {
			free(rd_buf);
		}

		cf_atomic_int_incr(&g_async_stats.retries);

		continue;

Error:
		if (network_error == true) {
			/* 
			 * In case of Async work (for XDS), it may be extreme to
			 * dun a node in case of network error. We just cleanup
			 * things and retry to connect to the remote cluster.
			 * The network error may be a transient one.
			 */
		} 

#if ONEASYNCFD
//Do not close FD
#else
		//We do not know the state of FD. It may have pending data to be read.
		//We cannot reuse the FD. So, close it to be on safe side.
		cf_error("async receiver: Closing the fd %d because of error", workitem->fd);
		cf_close(workitem->fd);
		workitem->fd = -1;
#endif
		cf_atomic_int_incr(&g_async_stats.dropouts);
		//Continue down with what we do during an Ok

		//Inform the caller that there is no response from the server for this workitem.
		//No response does not mean that the work is not done. The work might be 
		//successfully completed on the server side, we just didnt get response for it.
		if (g_fail_cb_fn) {
			g_fail_cb_fn(workitem->udata, rv, workitem->starttime);
		}
Ok:
		//rd_buf may not be there during an error condition.
		if (rd_buf && (rv == CITRUSLEAF_OK)) {
			//As of now, async functionality is there only for put call.
			//In put call, we do not get anything back other than the trid field.
			//So, just pass variable to get back the trid and ignore others.
			if (0 != cl_parse(&msg.m, rd_buf, rd_buf_sz, NULL, NULL, NULL, &acktrid, NULL)) {
				rv = CITRUSLEAF_FAIL_UNKNOWN;
			}
			else {
				rv = msg.m.result_code;
				if (workitem->trid != acktrid) {
#if ONEASYNCFD
					//It is likely that we may get response for a different trid.
					//Just delete the correct one from the queue 
					//put back the current workitem back in the queue.
					shash_get(g_cl_async_hashtab, &acktrid, &tmpworkitem);
					cf_queue_delete(q_to_use, &tmpworkitem, true);
					cf_queue_push(q_to_use, &workitem);
					//From now on workitem will be the one for which we got ack
					workitem = tmpworkitem;
#endif
#ifdef DEBUG
					cf_debug("Got reply for a different trid. Expected=%"PRIu64" Got=%"PRIu64" FD=%d",
							workitem->trid, acktrid, workitem->fd);
#endif
				}
			}

			if (g_success_cb_fn) {
				g_success_cb_fn(workitem->udata, rv, workitem->starttime);
			}
		}

		//Remember to put back the FD into the pool, if it is re-usable.
		if (workitem->fd != -1) {
			cl_cluster_node_fd_put(workitem->node, workitem->fd, true);
		}
		//Also decrement the reference count for this node
		cl_cluster_node_put(workitem->node);

#if ONEASYNCFD
		//Delete the item from the global hashtable
		if (shash_delete(g_cl_async_hashtab, &workitem->trid) != SHASH_OK)
		{
#if DEBUG
			cf_debug("Failure while trying to delete trid=%"PRIu64" from hashtable", workitem->trid);
#endif
		}
#endif

		//Push it back into the free pool. If the attempt fails, free it.
		if (cf_queue_push(g_cl_workitems_freepool_q, &workitem) == -1) {
			free(workitem);
		}

		//If we allocated memory in this loop, release it.
		if (rd_buf && (rd_buf != rd_stack_buf)) {
			free(rd_buf);
		}

		// Kick this thread out if its ID is greater than total
		if (thread_id > cf_atomic32_get(g_async_num_threads)) {
			cf_atomic32_decr(&g_thread_count);
			return NULL;
		}
	}//The infnite loop

	return NULL;
}

Example #4

Show file

File: cl_async.c Project: chango/aerospike-client-c

//Same as do_the_full_monte, but only till the command is sent to the node.
//Most of the code is duplicated. Bad.
int
cl_do_async_monte(cl_cluster *asc, int info1, int info2, const char *ns, const char *set, const cl_object *key,
			const cf_digest *digest, cl_bin **values, cl_operator operator, cl_operation **operations, 
			int *n_values, uint32_t *cl_gen, const cl_write_parameters *cl_w_p, uint64_t *trid, void *udata)

{
	cl_async_work	*workitem = NULL;

	uint8_t		wr_stack_buf[STACK_BUF_SZ];
	uint8_t		*wr_buf = wr_stack_buf;
	size_t		wr_buf_sz = sizeof(wr_stack_buf);
	int        	progress_timeout_ms;
	uint64_t 	deadline_ms;
	uint64_t	starttime, endtime;
	bool 		network_error;
	int 		fd = -1;
	int		rv = CITRUSLEAF_FAIL_CLIENT;	//Assume that this is a failure;

	// as_msg 		msg;
	cf_digest	d_ret;
	cl_cluster_node	*node = 0;

#if ONEASYNCFD
	if (shash_get_size(g_cl_async_hashtab) >= g_async_h_szlimit) {
		//cf_error("Async hashtab is full. Cannot insert any more elements");
		return CITRUSLEAF_FAIL_ASYNCQ_FULL;
	}
#else
	//If the async buffer is at the max limit, do not entertain more requests.
	if (cf_queue_sz(g_cl_async_q) >= cf_atomic32_get(g_async_q_szlimit)) {
		//cf_error("Async buffer is full. Cannot insert any more elements");
		return CITRUSLEAF_FAIL_ASYNCQ_FULL;
	}
#endif

	//Allocate memory for work item that will be added to the async work list

	if (cf_queue_sz(g_cl_workitems_freepool_q) > 0) {
		cf_queue_pop(g_cl_workitems_freepool_q, &workitem, CF_QUEUE_FOREVER);
	} else {
		workitem = malloc(sizeof(cl_async_work));
		if (workitem == NULL) {
			return CITRUSLEAF_FAIL_CLIENT;
		}
	}

	//Compile the write buffer to be sent to the cluster
	if (n_values && ( values || operations) ){
		cl_compile(info1, info2, 0, ns, set, key, digest, values?*values:NULL, operator, operations?*operations:NULL,
				*n_values , &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/);
	}else{
		cl_compile(info1, info2, 0, ns, set, key, digest, 0, 0, 0, 0, &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/);
	}	

	deadline_ms = 0;
	progress_timeout_ms = 0;
	if (cl_w_p && cl_w_p->timeout_ms) {
		deadline_ms = cf_getms() + cl_w_p->timeout_ms;
		// policy: if asking for a long timeout, give enough time to try twice
		if (cl_w_p->timeout_ms > 700) {
			progress_timeout_ms = cl_w_p->timeout_ms / 2;
		}
		else {
			progress_timeout_ms = cl_w_p->timeout_ms;
		}
	}
	else {
		progress_timeout_ms = g_async_nw_progress_timeout;
	}

	//Initialize the async work unit
	workitem->trid = *trid;
	workitem->deadline = deadline_ms;
	workitem->starttime = cf_getms();
	workitem->udata = udata;

    as_msg *msgp;
    // Hate special cases, but we have to clear the verify bit on delete verify
    if ( (info2 & CL_MSG_INFO2_DELETE) && (info1 & CL_MSG_INFO1_VERIFY))
    {
        msgp = (as_msg *)wr_buf;
        msgp->m.info1 &= ~CL_MSG_INFO1_VERIFY;
    }
    
    if (asc->compression_stat.compression_threshold > 0 
     && wr_buf_sz > (size_t)asc->compression_stat.compression_threshold)
    {
        /* Compression is enabled.
         * Packet size is above threshold.
         * Compress the data
         */
        uint8_t *compressed_buf = NULL;
        size_t compressed_buf_sz = 0;

        // Contstruct packet for compressed data.
        cf_packet_compression (wr_buf, wr_buf_sz, &compressed_buf, &compressed_buf_sz);
        if (compressed_buf)
        {
            // If original packet size is > 16k, cl_compile had allocated memory for it.
            // Free that memory.
            // cf_packet_compression will allocate memory for compressed packet
            if (wr_buf != wr_stack_buf) {
                free(wr_buf);
            }
             // Update stats.
            citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, compressed_buf_sz);	
            wr_buf =  compressed_buf;
            wr_buf_sz = compressed_buf_sz;
            //memcpy (wr_buf, compressed_buf, compressed_buf_sz);
            //wr_buf_sz = compressed_buf_sz;
            //free (compressed_buf);
        }
        //else compression failed, continue with uncompressed packet
        else
        {
            // Set compression stat
            citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, wr_buf_sz);	
        }
    }

	int try = 0;
	// retry request based on the write_policy
	do {
		network_error = false;
		try++;
#ifdef DEBUG		
		if (try > 1) {
			cf_debug("request retrying try %d tid %zu", try, (uint64_t)pthread_self());
		}
#endif        

		// Get an FD from a cluster. First get the probable node for the given digest.
		node = cl_cluster_node_get(asc, ns, &d_ret, info2 & CL_MSG_INFO2_WRITE ? true : false);
		if (!node) {
#ifdef DEBUG
			cf_debug("warning: no healthy nodes in cluster, retrying");
#endif
			usleep(10000);	//Sleep for 10ms
			goto Retry;
		}

		// Now get the dedicated async FD of this node
		starttime = cf_getms();
		fd = cl_cluster_node_fd_get(node, true);
		endtime = cf_getms();
		if ((endtime - starttime) > 10) {
			cf_debug("Time to get FD for a node (>10ms)=%"PRIu64, (endtime - starttime));
		}
		if (fd == -1) {
#ifdef DEBUG			
			cf_debug("warning: node %s has no async file descriptors, retrying transaction (tid %zu)",node->name,(uint64_t)pthread_self() );
#endif			
			usleep(1000);
			goto Retry;
		}

		// Send the command to the node
		starttime = cf_getms();
		rv = cf_socket_write_timeout(fd, wr_buf, wr_buf_sz, deadline_ms, progress_timeout_ms);
		endtime = cf_getms();
		if ((endtime - starttime) > 10) {
			cf_debug("Time to write to the socket (>10ms)=%"PRIu64, (endtime - starttime));
		}
		if (rv != 0) {
			cf_debug("Citrusleaf: write timeout or error when writing header to server - %d fd %d errno %d (tid %zu)",
					rv,fd,errno,(uint64_t)pthread_self());
			if (rv != ETIMEDOUT)
				network_error = true;
			goto Retry;
		}
		goto Ok;

Retry:
		if (network_error == true) {
			/* 
			 * In case of Async work (for XDS), it may be extreme to
			 * dun a node in case of network error. We just cleanup
			 * things and retry to connect to the remote cluster.
			 * The network error may be a transient one. As this is a
			 * network error, its is better to wait for some significant
			 * time before retrying.
			 */
			sleep(1);	//Sleep for 1sec
#if ONEASYNCFD
//Do not close the FD
#else
			cf_error("async sender: Closing the fd %d because of network error", fd);
			cf_close(fd);
			fd = -1;
#endif
		}

		if (fd != -1) {
			cf_error("async sender: Closing the fd %d because of retry", fd);
			cf_close(fd);
			fd = -1;
		}

		if (node) {
			cl_cluster_node_put(node); 
			node = 0; 
		}

		if (deadline_ms && (deadline_ms < cf_getms() ) ) {
#ifdef DEBUG            
			cf_debug("async sender: out of time : deadline %"PRIu64" now %"PRIu64, deadline_ms, cf_getms());
#endif            
			rv = CITRUSLEAF_FAIL_TIMEOUT;
			goto Error;
		}
	} while ( (cl_w_p == 0) || (cl_w_p->w_pol == CL_WRITE_RETRY) );

Error:	
#ifdef DEBUG	
	cf_debug("exiting with failure: network_error %d wpol %d timeleft %d rv %d",
			(int)network_error, (int)(cl_w_p ? cl_w_p->w_pol : 0), 
			(int)(deadline_ms - cf_getms() ), rv );
#endif	

	if (wr_buf != wr_stack_buf) {
		free(wr_buf);
	}

#if ONEASYNCFD
	//Do not close the FD
#else
	//If it is a network error, the fd would be closed and set to -1.
	//So, we reach this place with a valid FD in case of timeout.
	if (fd != -1) {
		cf_error("async sender: Closing the fd %d because of timeout", fd);
		cf_close(fd);
	}
#endif

	return(rv);
Ok:
	/*
	 * We cannot release the node here as the asyc FD associated
	 * with this node may get closed. We should do it only when
	 * we got back the ack for the async command that we just did.
	 */

	//As we sent the command successfully, add it to the async work list
	workitem->node = node;
	workitem->fd = fd;
	//We are storing only the pointer to the workitem
#if ONEASYNCFD
	if (shash_put_unique(g_cl_async_hashtab, trid, &workitem) != SHASH_OK) {
		//This should always succeed.
		cf_error("Unable to add unique entry into the hash table");
	}
	cf_queue_push(node->asyncwork_q, &workitem);	//Also put in the node's q
#else
	cf_queue_push(g_cl_async_q, &workitem);
#endif

	if (wr_buf != wr_stack_buf) {
		free(wr_buf);
	}

	rv = CITRUSLEAF_OK;
	return rv;

}

int citrusleaf_async_reinit(int size_limit, unsigned int num_receiver_threads)
{
	// int num_threads;

	if (0 == cf_atomic32_get(g_async_initialized)) {
		cf_error("Async client not initialized cannot reinit");
		return -1;
	}
	
	if (num_receiver_threads > MAX_ASYNC_RECEIVER_THREADS) {
			//Limit the threads to the max value even if caller asks for it
			num_receiver_threads = MAX_ASYNC_RECEIVER_THREADS;
	}

	// If number of thread is increased create more threads
	if (num_receiver_threads > g_async_num_threads) {
		unsigned int i;
		for (i = g_async_num_threads; i < num_receiver_threads; i++) {
			pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL);
		}
	}
	else {
		// else just reset the number the async threads will kill themselves
		cf_atomic32_set(&g_async_num_threads, num_receiver_threads);
	}

	cf_atomic32_set(&g_async_q_szlimit , size_limit);
	return ( 0 );

}
int citrusleaf_async_init(int size_limit, int num_receiver_threads, cl_async_fail_cb fail_cb_fn, cl_async_success_cb success_cb_fn)
{
	int i, num_threads;

	//Make sure that we do the initialization only once
	if (1 == cf_atomic32_incr(&g_async_initialized)) {

		// Start the receiver threads
		num_threads = num_receiver_threads;
		if (num_threads > MAX_ASYNC_RECEIVER_THREADS) {
			//Limit the threads to the max value even if caller asks for it
			num_threads = MAX_ASYNC_RECEIVER_THREADS;
		}

#if ONEASYNCFD
		g_async_h_szlimit = size_limit * 3;	//Max number of elements in the hash table
		g_async_h_buckets = g_async_h_szlimit/10;//Number of buckets in the hash table

		if (shash_create(&g_cl_async_hashtab, async_trid_hash, sizeof(uint64_t), sizeof(cl_async_work *),
					g_async_h_buckets, SHASH_CR_MT_BIGLOCK) != SHASH_OK) {
			cf_error("Failed to initialize the async work hastable");
			cf_atomic32_decr(&g_async_initialized);
			return -1;
		}
#else
		// create work queue
		g_async_q_szlimit = size_limit;
		if ((g_cl_async_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) {
			cf_error("Failed to initialize the async work queue");
			cf_atomic32_decr(&g_async_initialized);
			return -1;
		}

		for (i=0; i<num_threads; i++) {
			pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL);
		}
		g_async_num_threads = num_threads;
#endif

		if ((g_cl_workitems_freepool_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) {
			cf_error("Failed to create memory pool for workitems");
			return -1;
		}

		g_fail_cb_fn = fail_cb_fn;
		g_success_cb_fn = success_cb_fn;

		// Initialize the stats
		g_async_stats.retries = 0;
		g_async_stats.dropouts = 0;

	}
	
	return(0);	
}

Example #5

Show file

File: udf_aerospike.c Project: CowLeo/aerospike-server

/*
 * Internal function: udf_aerospike__apply_update_atomic
 *
 * Parameters:
 * 		rec --	udf_record to be updated
 *
 * Return Values:
 * 		 0 success
 * 		-1 failure
 *
 * Description:
 * 		This function applies all the updates atomically. That is,
 * 		if one of the bin update/delete/create fails, the entire function
 * 		will fail. If the nth update fails, all the n-1 updates are rolled
 * 		back to their initial values
 *
 * 		Special Notes:
 * 		i. The basic checks of bin name being too long or if there is enough space
 * 		on the disk for the bin values is done before allocating space for any
 * 		of the bins.
 *
 * 		ii. If one of the updates to be rolled back is a bin creation,
 * 		udf_aerospike_delbin is called. This will not free up the bin metadata.
 * 		So there will be a small memory mismatch b/w replica (which did not get the
 * 		record at all and hence no memory is accounted) and the master will be seen.
 * 		To avoid such cases, we are doing checks upfront.
 *
 * 		Callers:
 * 		udf_aerospike__execute_updates
 * 		In this function, if udf_aerospike__apply_update_atomic fails, the record
 * 		is not committed to the storage. On success, record is closed which commits to
 * 		the storage and reopened for the next set of udf updates.
 * 		The return value from udf_aerospike__apply_update_atomic is passed on to the
 * 		callers of this function.
 */
int
udf_aerospike__apply_update_atomic(udf_record *urecord)
{
	int rc						= 0;
	int failmax					= 0;
	int new_bins				= 0;	// How many new bins have to be created in this update
	as_storage_rd * rd			= urecord->rd;
	as_namespace * ns			= rd->ns;
	bool has_sindex				= as_sindex_ns_has_sindex(ns);
	bool is_record_dirty		= false;
	bool is_record_flag_dirty	= false;
	uint8_t old_index_flags		= as_index_get_flags(rd->r);
	uint8_t new_index_flags		= 0;

	// This will iterate over all the updates and apply them to storage.
	// The items will remain, and be used as cache values. If an error
	// occurred during setbin(), we rollback all the operation which
	// is and return failure
	cf_detail(AS_UDF, "execute updates: %d updates", urecord->nupdates);

	// loop twice to make sure the updates are performed first so in case
	// something wrong it can be rolled back. The deletes will go through
	// successfully generally.

	// In first iteration, just calculate how many new bins need to be created
	for(uint32_t i = 0; i < urecord->nupdates; i++ ) {
		if ( urecord->updates[i].dirty ) {
			char *      k = urecord->updates[i].name;
			if ( k != NULL ) {
				if ( !as_bin_get(rd, k) ) {
					new_bins++;
				}
			}
		}
	}
	// Free bins - total bins not in use in the record
	// Delta bins - new bins that need to be created
	int inuse_bins = as_bin_inuse_count(rd);
	int free_bins  = rd->n_bins - inuse_bins;
	int delta_bins = new_bins - free_bins;
	cf_detail(AS_UDF, "Total bins %d, In use bins %d, Free bins %d , New bins %d, Delta bins %d",
			  rd->n_bins, as_bin_inuse_count(urecord->rd), free_bins, new_bins, delta_bins);

	// Check bin usage limit.
	if ((inuse_bins + new_bins > UDF_RECORD_BIN_ULIMIT) ||
			(urecord->flag & UDF_RECORD_FLAG_TOO_MANY_BINS)) {
		cf_warning(AS_UDF, "bin limit of %d for UDF exceeded: %d bins in use, %d bins free, %s%d new bins needed",
				(int)UDF_RECORD_BIN_ULIMIT, inuse_bins, free_bins,
				(urecord->flag & UDF_RECORD_FLAG_TOO_MANY_BINS) ? ">" : "", new_bins);
		goto Rollback;
	}

	// Allocate space for all the new bins that need to be created beforehand
	if (delta_bins > 0 && rd->ns->storage_data_in_memory && ! rd->ns->single_bin) {
		as_bin_allocate_bin_space(urecord->r_ref->r, rd, delta_bins);
	}

	if (!rd->ns->storage_data_in_memory && !urecord->particle_data) {
		// 256 as upper bound on the LDT control bin, we may write version below
		// leave it at the end for its use
		urecord->particle_data = cf_malloc(rd->ns->storage_write_block_size + 256);
		urecord->cur_particle_data = urecord->particle_data;
		urecord->end_particle_data = urecord->particle_data + rd->ns->storage_write_block_size;
	}

	if (has_sindex) {
		SINDEX_GRLOCK();
	}

	// In second iteration apply updates.
	for(uint32_t i = 0; i < urecord->nupdates; i++ ) {
		urecord->updates[i].oldvalue  = NULL;
		urecord->updates[i].washidden = false;
		if ( urecord->updates[i].dirty && rc == 0) {

			char *      k = urecord->updates[i].name;
			as_val *    v = urecord->updates[i].value;
			bool        h = urecord->updates[i].ishidden;

			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute update: position %d deletes bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					// Only case delete fails if bin is not found that is 
					// as good as delete. Ignore return code !!
					udf_aerospike_delbin(urecord, k);

					if (urecord->dirty != NULL) {
						xdr_fill_dirty_bins(urecord->dirty);
					}
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute update: position %d sets bin %s", i, k);
					urecord->updates[i].oldvalue = udf_record_storage_get(urecord, k);
					urecord->updates[i].washidden = udf_record_bin_ishidden(urecord, k);
					rc = udf_aerospike_setbin(urecord, i, k, v, h);
					if (rc) {
						if (urecord->updates[i].oldvalue) {
							as_val_destroy(urecord->updates[i].oldvalue);
							urecord->updates[i].oldvalue = NULL;
						} 
						failmax = i;
						goto Rollback;
					}

					if (urecord->dirty != NULL) {
						xdr_add_dirty_bin(ns, urecord->dirty, k, strlen(k));
					}
				}
			}

			is_record_dirty = true;
		}
	}

	if (urecord->ldt_rectype_bit_update) {
		if (urecord->ldt_rectype_bit_update < 0) {
			// ldt_rectype_bit_update is negative in case we want to reset the bits 
			uint8_t rectype_bits = urecord->ldt_rectype_bit_update * -1; 
			new_index_flags = old_index_flags & ~rectype_bits;
		} else { 
			new_index_flags = old_index_flags | urecord->ldt_rectype_bit_update;  
		} 

		if (new_index_flags != old_index_flags) {
			as_index_clear_flags(rd->r, old_index_flags);
			as_index_set_flags(rd->r, new_index_flags);
			is_record_flag_dirty = true;
			cf_detail_digest(AS_RW, &urecord->tr->keyd, "Setting index flags from %d to %d new flag %d", old_index_flags, new_index_flags, as_index_get_flags(rd->r));
		}
	}

	{
		// This is _NOT_ for writing to the storage but for simply performing sizing
		// calculation. If we know the upper bounds of size of rec_props.. we could 
		// avoid this work and check with that much correction ... 
		//
		// See
		//  - udf_rw_post_processing for building rec_props for replication
		//  - udf_record_close for building rec_props for writing it to storage
		size_t  rec_props_data_size = as_storage_record_rec_props_size(rd);
		uint8_t rec_props_data[rec_props_data_size];
		if (rec_props_data_size > 0) {
			as_storage_record_set_rec_props(rd, rec_props_data);
		}

		// Version is set in the end after record size check. Setting version won't change the size of
		// the record. And if it were before size check then this setting of version as well needs to
		// be backed out.
		// TODO: Add backout logic would work till very first create call of LDT end up crossing over
		// record boundary
		if (rd->ns->ldt_enabled && as_ldt_record_is_parent(rd->r)) {
			int rv = as_ldt_parent_storage_set_version(rd, urecord->lrecord->version, urecord->end_particle_data, __FILE__, __LINE__);
			if (rv < 0) {
				cf_warning(AS_LDT, "udf_aerospike__apply_update_atomic: Internal Error "
							" [Failed to set the version on storage rv=%d]... Fail",rv);
				goto Rollback;
			}
			// TODO - if size check below fails, won't write to device -
			// different behavior than write_to_device flag - OK?
			is_record_dirty = true;
		}

		if (! as_storage_record_size_and_check(rd)) {
			cf_warning(AS_UDF, "record failed storage size check, will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (cf_atomic32_get(rd->ns->stop_writes) == 1) {
			cf_warning(AS_UDF, "UDF failed by stop-writes, record will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (! as_storage_has_space(rd->ns)) {
			cf_warning(AS_UDF, "drives full, record will not be updated");
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}

		if (! is_valid_ttl(rd->ns, urecord->tr->msgp->msg.record_ttl)) {
			cf_warning(AS_UDF, "invalid ttl %u", urecord->tr->msgp->msg.record_ttl);
			failmax = (int)urecord->nupdates;
			goto Rollback;
		}
	}

	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// If there were updates do miscellaneous successful commit
	// tasks
	if (is_record_dirty 
			|| is_record_flag_dirty
			|| (urecord->flag & UDF_RECORD_FLAG_METADATA_UPDATED)) {
		urecord->flag |= UDF_RECORD_FLAG_HAS_UPDATES; // will write to storage
	}
	urecord->ldt_rectype_bit_update = 0;

	// Clean up oldvalue cache and reset dirty. All the changes made 
	// here has made to the particle buffer. Nothing will now be backed out.
	for (uint32_t i = 0; i < urecord->nupdates; i++) {
		udf_record_bin * bin = &urecord->updates[i];
		if (bin->oldvalue != NULL ) {
			as_val_destroy(bin->oldvalue);
			bin->oldvalue = NULL;
		}
		bin->dirty    = false;
	}
	return rc;

Rollback:
	cf_debug(AS_UDF, "Rollback Called: failmax %d", failmax);
	for (int i = 0; i < failmax; i++) {
		if (urecord->updates[i].dirty) {
			char *      k = urecord->updates[i].name;
			// Pick the oldvalue for rollback
			as_val *    v = urecord->updates[i].oldvalue;
			bool        h = urecord->updates[i].washidden;
			if ( k != NULL ) {
				if ( v == NULL || v->type == AS_NIL ) {
					// if the value is NIL, then do a delete
					cf_detail(AS_UDF, "execute rollback: position %d deletes bin %s", i, k);
					rc = udf_aerospike_delbin(urecord, k);
				}
				else {
					// otherwise, it is a set
					cf_detail(AS_UDF, "execute rollback: position %d sets bin %s", i, k);
					rc = udf_aerospike_setbin(urecord, i, k, v, h);
					if (rc) {
						cf_warning(AS_UDF, "Rollback failed .. not good ... !!");
					}
				}
			}
			if (v) {
				as_val_destroy(v);
				cf_debug(AS_UDF, "ROLLBACK as_val_destroy()");
			}
		}
	}

	if (is_record_dirty && urecord->dirty != NULL) {
		xdr_clear_dirty_bins(urecord->dirty);
	}

	if (is_record_flag_dirty) {
		as_index_clear_flags(rd->r, new_index_flags);
		as_index_set_flags(rd->r, old_index_flags);
		is_record_flag_dirty = false;
	}
	urecord->ldt_rectype_bit_update = 0;

	if (has_sindex) {
		SINDEX_GUNLOCK();
	}

	// Reset the flat size in case the stuff is backedout !!! it should not
	// fail in the backout code ...
	if (! as_storage_record_size_and_check(rd)) {
		cf_warning(AS_LDT, "Does not fit even after rollback... it is trouble");
	}

	// Do not clean up the cache in case of failure
	return -1;
}