Ejemplo n.º 1
0
void *
work_fn(void *gcc_is_ass)
{
	// Forever,
	do {
		// Pick a key to use. Look it up to see if anyone else is using it.
		uint32_t	key = rand_64() % g_config.n_keys;
		
		uint32_t    die = rand_64() & 0x03;
		
		if (SHASH_OK == shash_put_unique(g_config.in_progress_hash, &key, 0) ) 
		{
			cl_rv rv;
		
			// Make the key into a string
			char key_s[g_config.key_len+1];
			my_itoa(key_s, key, g_config.key_len);
			
			// Make an cl_object that represents the key
			cl_object key_o;
			citrusleaf_object_init_str(&key_o, key_s);
			

			cf_digest d;
			citrusleaf_calculate_digest(g_config.set, &key_o, &d);

			if (VALUE_UNINIT == g_config.values[key]) {

				// simply set the value to something - can't really check anything because we don't know the state				
				if (0 != write_new_value(key, &key_o, &d)) {
					if (g_config.strict)   					goto Fail;
				}
				atomic_int_add(g_config.key_counter, 1);
			}
			else if (VALUE_DELETED == g_config.values[key]) {
				
				// Shouldn't exist
				cl_bin *cl_v = 0;
				int		cl_v_len;
				rv = citrusleaf_get_all(g_config.asc, g_config.ns, g_config.set, &key_o, &cl_v, &cl_v_len, g_config.timeout_ms, NULL);
				if (rv != CITRUSLEAF_FAIL_NOTFOUND) {
					fprintf(stderr, "Get after delete returned improper value when should be deleted %d key %s digest %"PRIx64"\n",rv,key_s, *(uint64_t *)&d);
					if (g_config.strict)    goto Fail;
				}
				if (cl_v)	free(cl_v);

				atomic_int_add(g_config.read_counter, 1);  // did two ops here						
				
				// write a new value
				if (die < 2) {
					if (0 != write_new_value(key, &key_o, &d)) {
						if (g_config.strict)   goto Fail;
					}
					atomic_int_add(g_config.key_counter, 1);
				}
			}
			// Value is well known. Check to see that it's still right.
			else {			
				
				cl_bin values[1];
				strcpy(values[0].bin_name, g_config.bin);
				citrusleaf_object_init(&values[0].object);
				
				// Make string version of old value for checking
				char new_value_str[g_config.value_len+1];
				my_itoa(new_value_str, g_config.values[key], g_config.value_len); 
				citrusleaf_object_init_str(&values[0].object, new_value_str);
				
				rv = citrusleaf_verify(g_config.asc, g_config.ns, g_config.set, &key_o, values, 1, g_config.timeout_ms, NULL);
				if (rv != 0) {
					fprintf(stderr, "Get returned improper value %d when should be set : key %d digest %"PRIx64"\n",rv,key, *(uint64_t *)&d);
					if (g_config.strict)   goto Fail;
					goto V1;
				}
				
				// test!
				if (values[0].object.type != CL_STR) {
					fprintf(stderr, "read value has wrong type: expect string (3) got %d\n",(int)values[0].object.type);  
					if (g_config.strict)   return((void *)-1);
				}
				else if (strcmp(values[0].object.u.str, new_value_str) != 0) {
					fprintf(stderr, "read value does not match set value.\n");
					fprintf(stderr, "  expecting: %s\n",new_value_str);
					fprintf(stderr, "  got: %s\n",values[0].object.u.str);
					if (g_config.strict)   goto Fail;
				}
				
				citrusleaf_object_free(&values[0].object);
				atomic_int_add(g_config.read_counter, 1);
				
				// Delete, write new value, what's your pleasure?
			V1:				
				if (die < 2) {
					if (0 != write_new_value(key, &key_o, &d)) {
						if (g_config.strict)   return((void *)-1);
					}
				}
				// Delete!
				else if (die == 2) {
					rv = citrusleaf_delete_verify(g_config.asc, g_config.ns, g_config.set, &key_o, 0);
					if (rv != 0) {
						fprintf(stderr, "Delete returned improper value %d, fail: key %d digest %"PRIx64"\n",rv, key, *(uint64_t *)&d);
						if (g_config.strict)   goto Fail;
					}

					cl_bin values[1];
					strcpy(values[0].bin_name, g_config.bin);
					citrusleaf_object_init(&values[0].object);
					
					rv = citrusleaf_get(g_config.asc, g_config.ns, g_config.set, &key_o, values, 1, g_config.timeout_ms, NULL);
					if (rv != CITRUSLEAF_FAIL_NOTFOUND) {
						fprintf(stderr, "Get after delete returned improper value %d digest %"PRIx64"\n",rv, *(uint64_t *)&d);
						if (g_config.strict)   goto Fail;
					}
					
					citrusleaf_object_free(&values[0].object);
					
					g_config.values[key] = VALUE_DELETED;
					atomic_int_add(g_config.read_counter, 1);  // did two ops here
					atomic_int_add(g_config.delete_counter, 1);
					atomic_int_add(g_config.key_counter, -1);
					
				}
				
			}
			
			// remove my lock on this key
			shash_delete(g_config.in_progress_hash, &key);
			

		}		
	} while (1);
	
	
Fail:	
	abort();
	return((void *)-1);
}
Ejemplo n.º 2
0
//Same as do_the_full_monte, but only till the command is sent to the node.
//Most of the code is duplicated. Bad.
int
cl_do_async_monte(cl_cluster *asc, int info1, int info2, const char *ns, const char *set, const cl_object *key,
			const cf_digest *digest, cl_bin **values, cl_operator operator, cl_operation **operations, 
			int *n_values, uint32_t *cl_gen, const cl_write_parameters *cl_w_p, uint64_t *trid, void *udata)

{
	cl_async_work	*workitem = NULL;

	uint8_t		wr_stack_buf[STACK_BUF_SZ];
	uint8_t		*wr_buf = wr_stack_buf;
	size_t		wr_buf_sz = sizeof(wr_stack_buf);
	int        	progress_timeout_ms;
	uint64_t 	deadline_ms;
	uint64_t	starttime, endtime;
	bool 		network_error;
	int 		fd = -1;
	int		rv = CITRUSLEAF_FAIL_CLIENT;	//Assume that this is a failure;

	// as_msg 		msg;
	cf_digest	d_ret;
	cl_cluster_node	*node = 0;

#if ONEASYNCFD
	if (shash_get_size(g_cl_async_hashtab) >= g_async_h_szlimit) {
		//cf_error("Async hashtab is full. Cannot insert any more elements");
		return CITRUSLEAF_FAIL_ASYNCQ_FULL;
	}
#else
	//If the async buffer is at the max limit, do not entertain more requests.
	if (cf_queue_sz(g_cl_async_q) >= cf_atomic32_get(g_async_q_szlimit)) {
		//cf_error("Async buffer is full. Cannot insert any more elements");
		return CITRUSLEAF_FAIL_ASYNCQ_FULL;
	}
#endif

	//Allocate memory for work item that will be added to the async work list

	if (cf_queue_sz(g_cl_workitems_freepool_q) > 0) {
		cf_queue_pop(g_cl_workitems_freepool_q, &workitem, CF_QUEUE_FOREVER);
	} else {
		workitem = malloc(sizeof(cl_async_work));
		if (workitem == NULL) {
			return CITRUSLEAF_FAIL_CLIENT;
		}
	}

	//Compile the write buffer to be sent to the cluster
	if (n_values && ( values || operations) ){
		cl_compile(info1, info2, 0, ns, set, key, digest, values?*values:NULL, operator, operations?*operations:NULL,
				*n_values , &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/);
	}else{
		cl_compile(info1, info2, 0, ns, set, key, digest, 0, 0, 0, 0, &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/);
	}	

	deadline_ms = 0;
	progress_timeout_ms = 0;
	if (cl_w_p && cl_w_p->timeout_ms) {
		deadline_ms = cf_getms() + cl_w_p->timeout_ms;
		// policy: if asking for a long timeout, give enough time to try twice
		if (cl_w_p->timeout_ms > 700) {
			progress_timeout_ms = cl_w_p->timeout_ms / 2;
		}
		else {
			progress_timeout_ms = cl_w_p->timeout_ms;
		}
	}
	else {
		progress_timeout_ms = g_async_nw_progress_timeout;
	}

	//Initialize the async work unit
	workitem->trid = *trid;
	workitem->deadline = deadline_ms;
	workitem->starttime = cf_getms();
	workitem->udata = udata;

    as_msg *msgp;
    // Hate special cases, but we have to clear the verify bit on delete verify
    if ( (info2 & CL_MSG_INFO2_DELETE) && (info1 & CL_MSG_INFO1_VERIFY))
    {
        msgp = (as_msg *)wr_buf;
        msgp->m.info1 &= ~CL_MSG_INFO1_VERIFY;
    }
    
    if (asc->compression_stat.compression_threshold > 0 
     && wr_buf_sz > (size_t)asc->compression_stat.compression_threshold)
    {
        /* Compression is enabled.
         * Packet size is above threshold.
         * Compress the data
         */
        uint8_t *compressed_buf = NULL;
        size_t compressed_buf_sz = 0;

        // Contstruct packet for compressed data.
        cf_packet_compression (wr_buf, wr_buf_sz, &compressed_buf, &compressed_buf_sz);
        if (compressed_buf)
        {
            // If original packet size is > 16k, cl_compile had allocated memory for it.
            // Free that memory.
            // cf_packet_compression will allocate memory for compressed packet
            if (wr_buf != wr_stack_buf) {
                free(wr_buf);
            }
             // Update stats.
            citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, compressed_buf_sz);	
            wr_buf =  compressed_buf;
            wr_buf_sz = compressed_buf_sz;
            //memcpy (wr_buf, compressed_buf, compressed_buf_sz);
            //wr_buf_sz = compressed_buf_sz;
            //free (compressed_buf);
        }
        //else compression failed, continue with uncompressed packet
        else
        {
            // Set compression stat
            citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, wr_buf_sz);	
        }
    }

	int try = 0;
	// retry request based on the write_policy
	do {
		network_error = false;
		try++;
#ifdef DEBUG		
		if (try > 1) {
			cf_debug("request retrying try %d tid %zu", try, (uint64_t)pthread_self());
		}
#endif        

		// Get an FD from a cluster. First get the probable node for the given digest.
		node = cl_cluster_node_get(asc, ns, &d_ret, info2 & CL_MSG_INFO2_WRITE ? true : false);
		if (!node) {
#ifdef DEBUG
			cf_debug("warning: no healthy nodes in cluster, retrying");
#endif
			usleep(10000);	//Sleep for 10ms
			goto Retry;
		}

		// Now get the dedicated async FD of this node
		starttime = cf_getms();
		fd = cl_cluster_node_fd_get(node, true);
		endtime = cf_getms();
		if ((endtime - starttime) > 10) {
			cf_debug("Time to get FD for a node (>10ms)=%"PRIu64, (endtime - starttime));
		}
		if (fd == -1) {
#ifdef DEBUG			
			cf_debug("warning: node %s has no async file descriptors, retrying transaction (tid %zu)",node->name,(uint64_t)pthread_self() );
#endif			
			usleep(1000);
			goto Retry;
		}

		// Send the command to the node
		starttime = cf_getms();
		rv = cf_socket_write_timeout(fd, wr_buf, wr_buf_sz, deadline_ms, progress_timeout_ms);
		endtime = cf_getms();
		if ((endtime - starttime) > 10) {
			cf_debug("Time to write to the socket (>10ms)=%"PRIu64, (endtime - starttime));
		}
		if (rv != 0) {
			cf_debug("Citrusleaf: write timeout or error when writing header to server - %d fd %d errno %d (tid %zu)",
					rv,fd,errno,(uint64_t)pthread_self());
			if (rv != ETIMEDOUT)
				network_error = true;
			goto Retry;
		}
		goto Ok;

Retry:
		if (network_error == true) {
			/* 
			 * In case of Async work (for XDS), it may be extreme to
			 * dun a node in case of network error. We just cleanup
			 * things and retry to connect to the remote cluster.
			 * The network error may be a transient one. As this is a
			 * network error, its is better to wait for some significant
			 * time before retrying.
			 */
			sleep(1);	//Sleep for 1sec
#if ONEASYNCFD
//Do not close the FD
#else
			cf_error("async sender: Closing the fd %d because of network error", fd);
			cf_close(fd);
			fd = -1;
#endif
		}

		if (fd != -1) {
			cf_error("async sender: Closing the fd %d because of retry", fd);
			cf_close(fd);
			fd = -1;
		}

		if (node) {
			cl_cluster_node_put(node); 
			node = 0; 
		}

		if (deadline_ms && (deadline_ms < cf_getms() ) ) {
#ifdef DEBUG            
			cf_debug("async sender: out of time : deadline %"PRIu64" now %"PRIu64, deadline_ms, cf_getms());
#endif            
			rv = CITRUSLEAF_FAIL_TIMEOUT;
			goto Error;
		}
	} while ( (cl_w_p == 0) || (cl_w_p->w_pol == CL_WRITE_RETRY) );

Error:	
#ifdef DEBUG	
	cf_debug("exiting with failure: network_error %d wpol %d timeleft %d rv %d",
			(int)network_error, (int)(cl_w_p ? cl_w_p->w_pol : 0), 
			(int)(deadline_ms - cf_getms() ), rv );
#endif	

	if (wr_buf != wr_stack_buf) {
		free(wr_buf);
	}

#if ONEASYNCFD
	//Do not close the FD
#else
	//If it is a network error, the fd would be closed and set to -1.
	//So, we reach this place with a valid FD in case of timeout.
	if (fd != -1) {
		cf_error("async sender: Closing the fd %d because of timeout", fd);
		cf_close(fd);
	}
#endif

	return(rv);
Ok:
	/*
	 * We cannot release the node here as the asyc FD associated
	 * with this node may get closed. We should do it only when
	 * we got back the ack for the async command that we just did.
	 */

	//As we sent the command successfully, add it to the async work list
	workitem->node = node;
	workitem->fd = fd;
	//We are storing only the pointer to the workitem
#if ONEASYNCFD
	if (shash_put_unique(g_cl_async_hashtab, trid, &workitem) != SHASH_OK) {
		//This should always succeed.
		cf_error("Unable to add unique entry into the hash table");
	}
	cf_queue_push(node->asyncwork_q, &workitem);	//Also put in the node's q
#else
	cf_queue_push(g_cl_async_q, &workitem);
#endif

	if (wr_buf != wr_stack_buf) {
		free(wr_buf);
	}

	rv = CITRUSLEAF_OK;
	return rv;

}

int citrusleaf_async_reinit(int size_limit, unsigned int num_receiver_threads)
{
	// int num_threads;

	if (0 == cf_atomic32_get(g_async_initialized)) {
		cf_error("Async client not initialized cannot reinit");
		return -1;
	}
	
	if (num_receiver_threads > MAX_ASYNC_RECEIVER_THREADS) {
			//Limit the threads to the max value even if caller asks for it
			num_receiver_threads = MAX_ASYNC_RECEIVER_THREADS;
	}

	// If number of thread is increased create more threads
	if (num_receiver_threads > g_async_num_threads) {
		unsigned int i;
		for (i = g_async_num_threads; i < num_receiver_threads; i++) {
			pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL);
		}
	}
	else {
		// else just reset the number the async threads will kill themselves
		cf_atomic32_set(&g_async_num_threads, num_receiver_threads);
	}

	cf_atomic32_set(&g_async_q_szlimit , size_limit);
	return ( 0 );

}
int citrusleaf_async_init(int size_limit, int num_receiver_threads, cl_async_fail_cb fail_cb_fn, cl_async_success_cb success_cb_fn)
{
	int i, num_threads;

	//Make sure that we do the initialization only once
	if (1 == cf_atomic32_incr(&g_async_initialized)) {

		// Start the receiver threads
		num_threads = num_receiver_threads;
		if (num_threads > MAX_ASYNC_RECEIVER_THREADS) {
			//Limit the threads to the max value even if caller asks for it
			num_threads = MAX_ASYNC_RECEIVER_THREADS;
		}

#if ONEASYNCFD
		g_async_h_szlimit = size_limit * 3;	//Max number of elements in the hash table
		g_async_h_buckets = g_async_h_szlimit/10;//Number of buckets in the hash table

		if (shash_create(&g_cl_async_hashtab, async_trid_hash, sizeof(uint64_t), sizeof(cl_async_work *),
					g_async_h_buckets, SHASH_CR_MT_BIGLOCK) != SHASH_OK) {
			cf_error("Failed to initialize the async work hastable");
			cf_atomic32_decr(&g_async_initialized);
			return -1;
		}
#else
		// create work queue
		g_async_q_szlimit = size_limit;
		if ((g_cl_async_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) {
			cf_error("Failed to initialize the async work queue");
			cf_atomic32_decr(&g_async_initialized);
			return -1;
		}

		for (i=0; i<num_threads; i++) {
			pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL);
		}
		g_async_num_threads = num_threads;
#endif

		if ((g_cl_workitems_freepool_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) {
			cf_error("Failed to create memory pool for workitems");
			return -1;
		}

		g_fail_cb_fn = fail_cb_fn;
		g_success_cb_fn = success_cb_fn;

		// Initialize the stats
		g_async_stats.retries = 0;
		g_async_stats.dropouts = 0;

	}
	
	return(0);	
}