/**
 *	Scan the records in the specified namespace and set for a single node.
 *
 *	The callback function will be called for each record scanned. When all records have
 *	been scanned, then callback will be called with a NULL value for the record.
 *
 *	~~~~~~~~~~{.c}
 *	char* node_names = NULL;
 *	int n_nodes = 0;
 *	as_cluster_get_node_names(as->cluster, &n_nodes, &node_names);
 *
 *	if (n_nodes <= 0)
 *		return <error>;
 *
 *	as_scan scan;
 *	as_scan_init(&scan, "test", "demo");
 *
 *	if (aerospike_scan_node(&as, &err, NULL, &scan, node_names[0], callback, NULL) != AEROSPIKE_OK ) {
 *		fprintf(stderr, "error(%d) %s at [%s:%d]", err.code, err.message, err.file, err.line);
 *	}
 *
 *	free(node_names);
 *	as_scan_destroy(&scan);
 *	~~~~~~~~~~
 *
 *	@param as			The aerospike instance to use for this operation.
 *	@param err			The as_error to be populated if an error occurs.
 *	@param policy		The policy to use for this operation. If NULL, then the default policy will be used.
 *	@param scan			The scan to execute against the cluster.
 *	@param node_name	The node name to scan.
 *	@param callback		The function to be called for each record scanned.
 *	@param udata		User-data to be passed to the callback.
 *
 *	@return AEROSPIKE_OK on success. Otherwise an error occurred.
 */
as_status aerospike_scan_node(
	aerospike * as, as_error * err, const as_policy_scan * policy,
	const as_scan * scan,  const char* node_name,
	aerospike_scan_foreach_callback callback, void * udata)
{
	as_error_reset(err);
	
	if (! policy) {
		policy = &as->config.policies.scan;
	}

	// Retrieve node.
	as_node* node = as_node_get_by_name(as->cluster, node_name);
	
	if (! node) {
		return as_error_update(err, AEROSPIKE_ERR_PARAM, "Invalid node name: %s", node_name);
	}

	// Create scan command
	uint64_t task_id = cf_get_rand64() / 2;
	as_buffer argbuffer;
	uint16_t n_fields = 0;
	size_t size = as_scan_command_size(scan, &n_fields, &argbuffer);
	uint8_t* cmd = as_command_init(size);
	size = as_scan_command_init(cmd, policy, scan, task_id, n_fields, &argbuffer);
	
	// Initialize task.
	uint32_t error_mutex = 0;
	as_scan_task task;
	task.node = node;
	task.cluster = as->cluster;
	task.policy = policy;
	task.scan = scan;
	task.callback = callback;
	task.udata = udata;
	task.err = err;
	task.complete_q = 0;
	task.error_mutex = &error_mutex;
	task.task_id = task_id;
	task.cmd = cmd;
	task.cmd_size = size;
	
	// Run scan.
	as_status status = as_scan_command_execute(&task);
		
	// Free command memory.
	as_command_free(cmd, size);
	
	// Release node.
	as_node_release(node);
	
	// If completely successful, make the callback that signals completion.
	if (callback && status == AEROSPIKE_OK) {
		callback(NULL, udata);
	}
	return status;
}
static as_status
as_scan_generic(
	aerospike* as, as_error* err, const as_policy_scan* policy, const as_scan* scan,
	aerospike_scan_foreach_callback callback, void* udata, uint64_t* task_id_ptr)
{
	as_error_reset(err);
	
	if (! policy) {
		policy = &as->config.policies.scan;
	}
	
	as_cluster* cluster = as->cluster;
	as_nodes* nodes = as_nodes_reserve(cluster);
	uint32_t n_nodes = nodes->size;
	
	if (n_nodes == 0) {
		as_nodes_release(nodes);
		return as_error_set_message(err, AEROSPIKE_ERR_SERVER, "Scan command failed because cluster is empty.");
	}
	
	// Reserve each node in cluster.
	for (uint32_t i = 0; i < n_nodes; i++) {
		as_node_reserve(nodes->array[i]);
	}
	
	uint64_t task_id;
	if (task_id_ptr) {
		if (*task_id_ptr == 0) {
			*task_id_ptr = cf_get_rand64() / 2;
		}
		task_id = *task_id_ptr;
	}
	else {
		task_id = cf_get_rand64() / 2;
	}

	// Create scan command
	as_buffer argbuffer;
	uint16_t n_fields = 0;
	size_t size = as_scan_command_size(scan, &n_fields, &argbuffer);
	uint8_t* cmd = as_command_init(size);
	size = as_scan_command_init(cmd, policy, scan, task_id, n_fields, &argbuffer);
	
	// Initialize task.
	uint32_t error_mutex = 0;
	as_scan_task task;
	task.cluster = as->cluster;
	task.policy = policy;
	task.scan = scan;
	task.callback = callback;
	task.udata = udata;
	task.err = err;
	task.error_mutex = &error_mutex;
	task.task_id = task_id;
	task.cmd = cmd;
	task.cmd_size = size;
	
	as_status status = AEROSPIKE_OK;
	
	if (scan->concurrent) {
		uint32_t n_wait_nodes = n_nodes;
		task.complete_q = cf_queue_create(sizeof(as_scan_complete_task), true);

		// Run node scans in parallel.
		for (uint32_t i = 0; i < n_nodes; i++) {
			// Stack allocate task for each node.  It should be fine since the task
			// only needs to be valid within this function.
			as_scan_task* task_node = alloca(sizeof(as_scan_task));
			memcpy(task_node, &task, sizeof(as_scan_task));
			task_node->node = nodes->array[i];
			
			int rc = as_thread_pool_queue_task(&cluster->thread_pool, as_scan_worker, task_node);
			
			if (rc) {
				// Thread could not be added. Abort entire scan.
				if (ck_pr_fas_32(task.error_mutex, 1) == 0) {
					status = as_error_update(task.err, AEROSPIKE_ERR_CLIENT, "Failed to add scan thread: %d", rc);
				}
				
				// Reset node count to threads that were run.
				n_wait_nodes = i;
				break;
			}
		}

		// Wait for tasks to complete.
		for (uint32_t i = 0; i < n_wait_nodes; i++) {
			as_scan_complete_task complete;
			cf_queue_pop(task.complete_q, &complete, CF_QUEUE_FOREVER);
			
			if (complete.result != AEROSPIKE_OK && status == AEROSPIKE_OK) {
				status = complete.result;
			}
		}
		
		// Release temporary queue.
		cf_queue_destroy(task.complete_q);
	}
	else {
		task.complete_q = 0;
		
		// Run node scans in series.
		for (uint32_t i = 0; i < n_nodes && status == AEROSPIKE_OK; i++) {
			task.node = nodes->array[i];
			status = as_scan_command_execute(&task);
		}
	}
	
	// Release each node in cluster.
	for (uint32_t i = 0; i < n_nodes; i++) {
		as_node_release(nodes->array[i]);
	}
	
	// Release nodes array.
	as_nodes_release(nodes);

	// Free command memory.
	as_command_free(cmd, size);

	// If user aborts query, command is considered successful.
	if (status == AEROSPIKE_ERR_CLIENT_ABORT) {
		status = AEROSPIKE_OK;
	}

	// If completely successful, make the callback that signals completion.
	if (callback && status == AEROSPIKE_OK) {
		callback(NULL, udata);
	}
	return status;
}
static as_status
as_scan_async(
	aerospike* as, as_error* err, const as_policy_scan* policy, const as_scan* scan, uint64_t* scan_id,
	as_async_scan_listener listener, void* udata, as_event_loop* event_loop,
	as_node** nodes, uint32_t n_nodes
	)
{
	if (! policy) {
		policy = &as->config.policies.scan;
	}
	
	// Assign task id.
	uint64_t task_id;
	if (scan_id) {
		if (*scan_id == 0) {
			*scan_id = as_random_get_uint64();
		}
		task_id = *scan_id;
	}
	else {
		task_id = as_random_get_uint64();
	}
	
	bool daisy_chain = ! (scan->concurrent || n_nodes == 1);

	// Scan will be split up into a command for each node.
	// Allocate scan data shared by each command.
	as_async_scan_executor* executor = cf_malloc(sizeof(as_async_scan_executor));
	as_event_executor* exec = &executor->executor;
	pthread_mutex_init(&exec->lock, NULL);
	exec->commands = cf_malloc(sizeof(as_event_command*) * n_nodes);
	exec->event_loop = as_event_assign(event_loop);
	exec->complete_fn = as_scan_complete_async;
	exec->udata = udata;
	exec->err = NULL;
	exec->ns = NULL;
	exec->cluster_key = 0;
	exec->max_concurrent = daisy_chain ? 1 : n_nodes;
	exec->max = n_nodes;
	exec->count = 0;
	exec->queued = 0;
	exec->notify = true;
	exec->valid = true;
	executor->listener = listener;

	// Create scan command buffer.
	as_buffer argbuffer;
	uint16_t n_fields = 0;
	uint32_t predexp_sz = 0;
	size_t size = as_scan_command_size(scan, &n_fields, &argbuffer, &predexp_sz);
	uint8_t* cmd_buf = as_command_buffer_init(size);
	size = as_scan_command_init(cmd_buf, policy, scan, task_id, n_fields, &argbuffer, predexp_sz);
	
	// Allocate enough memory to cover, then, round up memory size in 8KB increments to allow socket
	// read to reuse buffer.
	size_t s = (sizeof(as_async_scan_command) + size + AS_AUTHENTICATION_MAX_SIZE + 8191) & ~8191;

	// Create all scan commands.
	for (uint32_t i = 0; i < n_nodes; i++) {
		as_event_command* cmd = cf_malloc(s);
		cmd->total_deadline = policy->base.total_timeout;
		cmd->socket_timeout = policy->base.socket_timeout;
		cmd->max_retries = policy->base.max_retries;
		cmd->iteration = 0;
		cmd->replica = AS_POLICY_REPLICA_MASTER;
		cmd->event_loop = exec->event_loop;
		cmd->cluster = as->cluster;
		cmd->node = nodes[i];
		cmd->ns = NULL;
		cmd->partition = NULL;
		cmd->udata = executor;  // Overload udata to be the executor.
		cmd->parse_results = as_scan_parse_records_async;
		cmd->pipe_listener = NULL;
		cmd->buf = ((as_async_scan_command*)cmd)->space;
		cmd->write_len = (uint32_t)size;
		cmd->read_capacity = (uint32_t)(s - size - sizeof(as_async_scan_command));
		cmd->type = AS_ASYNC_TYPE_SCAN;
		cmd->state = AS_ASYNC_STATE_UNREGISTERED;
		cmd->flags = AS_ASYNC_FLAGS_MASTER;
		cmd->flags2 = scan->deserialize_list_map ? AS_ASYNC_FLAGS2_DESERIALIZE : 0;
		memcpy(cmd->buf, cmd_buf, size);
		exec->commands[i] = cmd;
	}

	// Free command buffer.
	as_command_buffer_free(cmd_buf, size);

	if (policy->fail_on_cluster_change && (nodes[0]->features & AS_FEATURES_CLUSTER_STABLE)) {
		// Verify migrations are not in progress.
		return as_query_validate_begin_async(exec, scan->ns, err);
	}

	// Run scan commands.
	for (uint32_t i = 0; i < exec->max_concurrent; i++) {
		exec->queued++;
		as_event_command* cmd = exec->commands[i];
		as_status status = as_event_command_execute(cmd, err);

		if (status != AEROSPIKE_OK) {
			as_event_executor_cancel(exec, i);
			return status;
		}
	}
	return AEROSPIKE_OK;
}