Example #1
0
struct rf_packet *cb_get_slot(struct rf_packet_buffer *b)
{
	if (cb_is_full(b)) {
		cb_read(b); /* drop a packet */
		return cb_get_slot(b);
	}
	int end = (b->start + b->count) % b->size;
	struct rf_packet *ret = &b->elems[end];
	++b->count;	
	return ret;
}
Example #2
0
/**
 * Interface (extern): Computes the k nearest neighbors for a given set of test points
 * stored in *Xtest and stores the results in two arrays *distances and *indices.
 *
 * @param *Xtest Pointer to the set of query/test points (stored as FLOAT_TYPE)
 * @param nXtest The number of query points
 * @param dXtest The dimension of each query point
 * @param *distances The distances array (FLOAT_TYPE) used to store the computed distances
 * @param ndistances The number of query points
 * @param ddistances The number of distance values for each query point
 * @param *indices Pointer to arrray storing the indices of the k nearest neighbors for each query point
 * @param nindices The number of query points
 * @param dindices The number of indices comptued for each query point
 * @param *tree_record Pointer to struct storing all relevant information for model
 * @param *params Pointer to struct containing all relevant parameters
 *
 */
void neighbors_extern(FLOAT_TYPE * Xtest,
		INT_TYPE nXtest,
		INT_TYPE dXtest,
		FLOAT_TYPE *distances,
		INT_TYPE ndistances,
		INT_TYPE ddistances,
		INT_TYPE *indices,
		INT_TYPE nindices,
		INT_TYPE dindices,
		TREE_RECORD *tree_record,
		TREE_PARAMETERS *params) {

	START_MY_TIMER(tree_record->timers + 1);

	UINT_TYPE i, j;
	tree_record->find_leaf_idx_calls = 0;
	tree_record->empty_all_buffers_calls = 0;
	tree_record->Xtest = Xtest;
	tree_record->nXtest = nXtest;
	tree_record->dist_mins_global = distances;
	tree_record->idx_mins_global = indices;

	long device_mem_bytes = tree_record->device_infos.device_mem_bytes;
	double test_mem_bytes = get_test_tmp_mem_device_bytes(tree_record, params);
	PRINT(params)("Memory needed for test patterns: %f (GB)\n", test_mem_bytes / MEM_GB);
	if (test_mem_bytes > device_mem_bytes * params->allowed_test_mem_percent) {
		PRINT(params)("Too much memory used for test patterns and temporary data!\n");
		FREE_OPENCL_DEVICES(tree_record, params);
		exit(EXIT_FAILURE);
	}

	double total_device_bytes = get_total_mem_device_bytes(tree_record, params);
	PRINT(params)("Total memory needed on device: %f (GB)\n", total_device_bytes / MEM_GB);

	START_MY_TIMER(tree_record->timers + 4);

	/* ------------------------------------- OPENCL -------------------------------------- */
	INIT_ARRAYS(tree_record, params);
	/* ------------------------------------- OPENCL -------------------------------------- */

	// initialize leaf buffer for test queries (circular buffers)
	tree_record->buffers = (circular_buffer **) malloc(tree_record->n_leaves * sizeof(circular_buffer*));
	for (i = 0; i < tree_record->n_leaves; i++) {
		tree_record->buffers[i] = (circular_buffer *) malloc(sizeof(circular_buffer));
		cb_init(tree_record->buffers[i], tree_record->leaves_initial_buffer_sizes);
	}

	tree_record->buffer_full_warning = 0;

	// initialize queue "input" (we can have at most number_test_patterns in there)
	cb_init(&(tree_record->queue_reinsert), tree_record->nXtest);

	/* ------------------------------------- OPENCL -------------------------------------- */
	START_MY_TIMER(tree_record->timers + 3);
	ALLOCATE_MEMORY_OPENCL_DEVICES(tree_record, params);
	STOP_MY_TIMER(tree_record->timers + 3);
	/* ------------------------------------- OPENCL -------------------------------------- */

	UINT_TYPE iter = 0;
	UINT_TYPE test_printed = 0;

	// allocate space for the indices added in each round; we cannot have more than original test patterns ...
	INT_TYPE *all_next_indices = (INT_TYPE *) malloc(
			tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE));

	// allocate space for all return values (by FIND_LEAF_IDX_BATCH)
	tree_record->leaf_indices_batch_ret_vals = (INT_TYPE *) malloc(
			tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE));

	UINT_TYPE num_elts_added;
	tree_record->current_test_index = 0;
	INT_TYPE reinsert_counter = 0;

	PRINT(params)("Starting Querying process via buffer tree...\n");

	STOP_MY_TIMER(tree_record->timers + 4);
	START_MY_TIMER(tree_record->timers + 2);

	do {

		iter++;

		// try to get elements from both queues until buffers are full
		// (each buffer is either empty or has at least space for leaves_buffer_sizes_threshold elements)
		num_elts_added = 0;

		// add enough elements to the buffers ("batch filling")
		while (num_elts_added < tree_record->approx_number_of_avail_buffer_slots
				&& (tree_record->current_test_index < tree_record->nXtest
						|| !cb_is_empty(&(tree_record->queue_reinsert)))) {

			// we remove indices from both queues here (add one element from each queue, if not empty)
			if (!cb_is_empty(&(tree_record->queue_reinsert))) {
				cb_read(&(tree_record->queue_reinsert), all_next_indices + num_elts_added);
			} else {
				all_next_indices[num_elts_added] = tree_record->current_test_index;
				tree_record->current_test_index++;
			}
			num_elts_added++;
		}

		/* ------------------------------------- OPENCL -------------------------------------- */
		FIND_LEAF_IDX_BATCH(all_next_indices, num_elts_added, tree_record->leaf_indices_batch_ret_vals, tree_record,
				params);
		/* ------------------------------------- OPENCL -------------------------------------- */

		// we have added num_elts_added indices to the all_next_indices array
		for (j = 0; j < num_elts_added; j++) {

			INT_TYPE leaf_idx = tree_record->leaf_indices_batch_ret_vals[j];

			// if not done: add the index to the appropriate buffer
			if (leaf_idx != -1) {

				// enlarge buffer if needed
				if (cb_is_full(tree_record->buffers[leaf_idx])) {
					PRINT(params)("Increasing buffer size ...\n");
					tree_record->buffers[leaf_idx] = cb_double_size(tree_record->buffers[leaf_idx]);
				}

				// add next_indices[j] to buffer leaf_idx
				cb_write(tree_record->buffers[leaf_idx], all_next_indices + j);

				if (cb_get_number_items(tree_record->buffers[leaf_idx]) >= tree_record->leaves_buffer_sizes_threshold) {
					tree_record->buffer_full_warning = 1;
				}

			} // else: traversal of test pattern has reached root: done!
		}

		/* ------------------------------------- OPENCL -------------------------------------- */
		PROCESS_ALL_BUFFERS(tree_record, params);
		/* ------------------------------------- OPENCL -------------------------------------- */

		if (tree_record->current_test_index == tree_record->nXtest && !test_printed) {
			PRINT(params)("All query indices are in the buffer tree now (buffers or reinsert queue)...\n");
			test_printed = 1;
		}

	} while (tree_record->current_test_index < tree_record->nXtest || !cb_is_empty(&(tree_record->queue_reinsert)));

	STOP_MY_TIMER(tree_record->timers + 2);

	START_MY_TIMER(tree_record->timers + 5);
	/* ------------------------------------- OPENCL -------------------------------------- */
	GET_DISTANCES_AND_INDICES(tree_record, params);
	/* ------------------------------------- OPENCL -------------------------------------- */

	// free space generated by testing
	for (i = 0; i < tree_record->n_leaves; i++) {
		cb_free(tree_record->buffers[i]);
	}
	STOP_MY_TIMER(tree_record->timers + 5);
	STOP_MY_TIMER(tree_record->timers + 1);

	PRINT(params)("Buffer full indices (overhead)=%i\n", reinsert_counter);
	PRINT(params)("\nNumber of iterations in while loop: \t\t\t\t\t\t\t%i\n", iter);
	PRINT(params)("Number of empty_all_buffers calls: \t\t\t\t\t\t\t%i\n", tree_record->empty_all_buffers_calls);
	PRINT(params)("Number of find_leaf_idx_calls: \t\t\t\t\t\t\t\t%i\n\n", tree_record->find_leaf_idx_calls);

	PRINT(params)("Elapsed total time for querying: \t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 1));
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("(Overhead)  Elapsed time for BEFORE WHILE: \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 4));
	PRINT(params)("(Overhead)  -> ALLOCATE_MEMORY_OPENCL_DEVICES: \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 3));

	PRINT(params)(
			"-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("Elapsed time in while-loop: \t\t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 2));
	PRINT(params)("(I)    Elapsed time for PROCESS_ALL_BUFFERS: \t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 12));
	PRINT(params)("(I.A)  Function: retrieve_indices_from_buffers_gpu: \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 11));
	PRINT(params)("(I.B)  Do brute-force (do_brute.../process_buffers_...chunks_gpu : \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 18));
	PRINT(params)("(I.B.1) -> Elapsed time for clEnqueueWriteBuffer (INTERLEAVED): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 19));
	PRINT(params)("(I.B.1) -> Elapsed time for memcpy (INTERLEAVED): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 21));
	PRINT(params)("(I.B.1) -> Elapsed time for waiting for chunk (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 22));
	PRINT(params)("(I.B.2) -> Number of copy calls: %i\n", tree_record->counters[0]);

	if (!training_chunks_inactive(tree_record, params)) {
		PRINT(params)("(I.B.4) -> Overhead distributing indices to chunks (in seconds): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 23));
		PRINT(params)("(I.B.5) -> Processing of whole chunk (all three phases, in seconds): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 24));
		PRINT(params)("(I.B.6) -> Processing of chunk before brute (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 25));
		PRINT(params)("(I.B.7) -> Processing of chunk after brute (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 26));
		PRINT(params)("(I.B.8) -> Processing of chunk after brute, buffer release (in seconds): \t%2.10f\n", GET_MY_TIMER(tree_record->timers + 27));
		PRINT(params)("(I.B.9) -> Number of release buffer calls: %i\n", tree_record->counters[0]);
	}
	if (USE_GPU) {

		PRINT(params)("(I.B.3)   -> Elapsed time for TEST_SUBSET (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 13));
		PRINT(params)("(I.B.4)   -> Elapsed time for NN Search (in seconds): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 14));
		PRINT(params)("(I.B.5)   -> Elapsed time for UPDATE (in seconds): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 15));
		PRINT(params)("(I.B.6)   -> Elapsed time for OVERHEAD (in seconds): \t\t\t\t%2.10f\n",
				GET_MY_TIMER(tree_record->timers + 12)
				- GET_MY_TIMER(tree_record->timers + 14)
		    	- GET_MY_TIMER(tree_record->timers + 15)
				- GET_MY_TIMER(tree_record->timers + 13));

	}

	PRINT(params)("(II)   FIND_LEAF_IDX_BATCH : \t\t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("(III) Elapsed time for final brute-force step : \t\t\t\t%2.10f\n\n",
			GET_MY_TIMER(tree_record->timers + 20));

	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("(DIFF) While - PROCESS_ALL_BUFFERS - FIND_LEAF_IDX_BATCH: \t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 2) - GET_MY_TIMER(tree_record->timers + 12)
					- GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("(Overhead)  Elapsed time for AFTER WHILE : \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 5));
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n\n");

	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("QUERY RUNTIME: %2.10f ", GET_MY_TIMER(tree_record->timers + 1));
	PRINT(params)("PROCESS_ALL_BUFFERS: %2.10f ", GET_MY_TIMER(tree_record->timers + 12));
	PRINT(params)("FIND_LEAF_IDX_BATCH: %2.10f ", GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("WHILE_OVERHEAD: %2.10f ",
			GET_MY_TIMER(tree_record->timers + 2) - GET_MY_TIMER(tree_record->timers + 12)
					- GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("\n");
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");

	// free all allocated memory related to querying
	for (i = 0; i < tree_record->n_leaves; i++) {
		free(tree_record->buffers[i]);
	}
	free(tree_record->buffers);

	// free arrays
	free(tree_record->all_stacks);
	free(tree_record->all_depths);
	free(tree_record->all_idxs);
	free(all_next_indices);
	free(tree_record->leaf_indices_batch_ret_vals);

}