Esempio n. 1
0
struct rf_packet *cb_peek(struct rf_packet_buffer *b)
{
	struct rf_packet *ret = NULL;
	if (!cb_is_empty(b)) {
		ret = &b->elems[b->start];
	}
	return ret;
}
Esempio n. 2
0
struct rf_packet *cb_read(struct rf_packet_buffer *b)
{
	struct rf_packet *ret = NULL;
	if (!cb_is_empty(b)) {
		ret = &b->elems[b->start];
		b->start = (b->start + 1) % b->size;
		b->count--;	
	}
	return ret;
}
int main(int argc, char ** argv)
{
    circular_buffer cb;
    elem_type elem = {0};

    int test_buffer_size = 10; /*arbitrary size*/
    cbinit(&cb, test_buffer_size);
   
    for(elem.value = 0; elem.value < 3*test_buffer_size; elem.value++) {
        cb_write(&cb, &elem);
    }

    while(!cb_is_empty(&cb)) {
        cb_read(&cb, &elem);
        printf("%d\n",elem.value);
    }

    cbfree(&cb);
    return 0;
}
Esempio n. 4
0
/**
 * Processes all buffers on the CPU.
 *
 * @param *tree_record Pointer to struct instance storing the model
 * @param *params Pointer to struct instance storing all model parameters
 */
void process_all_buffers_cpu(TREE_RECORD *tree_record,
		TREE_PARAMETERS *params) {

	INT_TYPE no_input_patterns_left = (tree_record->current_test_index == tree_record->nXtest
			&& cb_is_empty(&(tree_record->queue_reinsert)));

	if (tree_record->buffer_full_warning || no_input_patterns_left) {
		START_MY_TIMER(tree_record->timers + 11);
		tree_record->empty_all_buffers_calls++;
		UINT_TYPE leaf_idx;
		UINT_TYPE i;

		// get the total number of indices removed in this round (needed for space allocation)
		INT_TYPE total_number_test_indices_removed = 0;
		for (leaf_idx = 0; leaf_idx < tree_record->n_leaves; leaf_idx++) {
			if (cb_get_number_items(tree_record->buffers[leaf_idx]) > 0) { // we can always empty ALL buffers, no overhead!
				total_number_test_indices_removed += cb_get_number_items(
						tree_record->buffers[leaf_idx]);
			}
		}

		// intialize arrays that need to be transferred to the GPU
		INT_TYPE *test_indices_removed_from_all_buffers = (INT_TYPE *) malloc(
				total_number_test_indices_removed * sizeof(INT_TYPE));
		INT_TYPE *fr_indices = (INT_TYPE *) malloc(
				total_number_test_indices_removed * sizeof(INT_TYPE));
		INT_TYPE *to_indices = (INT_TYPE *) malloc(
				total_number_test_indices_removed * sizeof(INT_TYPE));
		INT_TYPE number_added_test_elements = 0;
		INT_TYPE number_t_indices_in_buffer;
		for (leaf_idx = 0; leaf_idx < tree_record->n_leaves; leaf_idx++) {
			if (cb_get_number_items(tree_record->buffers[leaf_idx]) > 0) {
				// get number of indices that are in the buffer
				number_t_indices_in_buffer = cb_get_number_items(
						tree_record->buffers[leaf_idx]);
				// read indices from buffer
				cb_read_batch(tree_record->buffers[leaf_idx],
						test_indices_removed_from_all_buffers
								+ number_added_test_elements,
						number_t_indices_in_buffer);
				// we generate copies of data items that are needed by ALL KERNELS (to avoid bank conflicts)
				// Thus, we are transferring more elements as needed to the GPU
				for (i = 0; i < number_t_indices_in_buffer; i++) {
					fr_indices[number_added_test_elements + i] = tree_record->leaves[leaf_idx
							* LEAF_WIDTH]; // fr_idx
					to_indices[number_added_test_elements + i] = tree_record->leaves[leaf_idx
							* LEAF_WIDTH + 1];     // to_idx
					// reinsert test pattern into test queue: can already be done here, we have to wait at the end once...
					cb_add_elt(&(tree_record->queue_reinsert),
							test_indices_removed_from_all_buffers
									+ number_added_test_elements + i);
				}
				// increase number of added elements (needed as offset)
				number_added_test_elements += number_t_indices_in_buffer;

			}
		}

		// all buffers are empty now
		tree_record->buffer_full_warning = 0;
		STOP_MY_TIMER(tree_record->timers + 11);
		START_MY_TIMER(tree_record->timers + 12);
		do_bruteforce_all_leaves_cpu(test_indices_removed_from_all_buffers,
				total_number_test_indices_removed, fr_indices, to_indices,
				tree_record, params);
		STOP_MY_TIMER(tree_record->timers + 12);
		// free memory
		free(test_indices_removed_from_all_buffers);
		free(fr_indices);
		free(to_indices);
		// all buffers are empty now: let's check if enough work is still there for another round!
		INT_TYPE num_elts_in_queue = cb_get_number_items(&(tree_record->queue_reinsert));
		if (tree_record->current_test_index == tree_record->nXtest
				&& num_elts_in_queue < params->bf_remaining_threshold) {
			START_MY_TIMER(tree_record->timers + 12);
			process_queue_via_brute_force_cpu(tree_record, params);
			STOP_MY_TIMER(tree_record->timers + 12);
		}
	}
}
Esempio n. 5
0
/**
 * Interface (extern): Computes the k nearest neighbors for a given set of test points
 * stored in *Xtest and stores the results in two arrays *distances and *indices.
 *
 * @param *Xtest Pointer to the set of query/test points (stored as FLOAT_TYPE)
 * @param nXtest The number of query points
 * @param dXtest The dimension of each query point
 * @param *distances The distances array (FLOAT_TYPE) used to store the computed distances
 * @param ndistances The number of query points
 * @param ddistances The number of distance values for each query point
 * @param *indices Pointer to arrray storing the indices of the k nearest neighbors for each query point
 * @param nindices The number of query points
 * @param dindices The number of indices comptued for each query point
 * @param *tree_record Pointer to struct storing all relevant information for model
 * @param *params Pointer to struct containing all relevant parameters
 *
 */
void neighbors_extern(FLOAT_TYPE * Xtest,
		INT_TYPE nXtest,
		INT_TYPE dXtest,
		FLOAT_TYPE *distances,
		INT_TYPE ndistances,
		INT_TYPE ddistances,
		INT_TYPE *indices,
		INT_TYPE nindices,
		INT_TYPE dindices,
		TREE_RECORD *tree_record,
		TREE_PARAMETERS *params) {

	START_MY_TIMER(tree_record->timers + 1);

	UINT_TYPE i, j;
	tree_record->find_leaf_idx_calls = 0;
	tree_record->empty_all_buffers_calls = 0;
	tree_record->Xtest = Xtest;
	tree_record->nXtest = nXtest;
	tree_record->dist_mins_global = distances;
	tree_record->idx_mins_global = indices;

	long device_mem_bytes = tree_record->device_infos.device_mem_bytes;
	double test_mem_bytes = get_test_tmp_mem_device_bytes(tree_record, params);
	PRINT(params)("Memory needed for test patterns: %f (GB)\n", test_mem_bytes / MEM_GB);
	if (test_mem_bytes > device_mem_bytes * params->allowed_test_mem_percent) {
		PRINT(params)("Too much memory used for test patterns and temporary data!\n");
		FREE_OPENCL_DEVICES(tree_record, params);
		exit(EXIT_FAILURE);
	}

	double total_device_bytes = get_total_mem_device_bytes(tree_record, params);
	PRINT(params)("Total memory needed on device: %f (GB)\n", total_device_bytes / MEM_GB);

	START_MY_TIMER(tree_record->timers + 4);

	/* ------------------------------------- OPENCL -------------------------------------- */
	INIT_ARRAYS(tree_record, params);
	/* ------------------------------------- OPENCL -------------------------------------- */

	// initialize leaf buffer for test queries (circular buffers)
	tree_record->buffers = (circular_buffer **) malloc(tree_record->n_leaves * sizeof(circular_buffer*));
	for (i = 0; i < tree_record->n_leaves; i++) {
		tree_record->buffers[i] = (circular_buffer *) malloc(sizeof(circular_buffer));
		cb_init(tree_record->buffers[i], tree_record->leaves_initial_buffer_sizes);
	}

	tree_record->buffer_full_warning = 0;

	// initialize queue "input" (we can have at most number_test_patterns in there)
	cb_init(&(tree_record->queue_reinsert), tree_record->nXtest);

	/* ------------------------------------- OPENCL -------------------------------------- */
	START_MY_TIMER(tree_record->timers + 3);
	ALLOCATE_MEMORY_OPENCL_DEVICES(tree_record, params);
	STOP_MY_TIMER(tree_record->timers + 3);
	/* ------------------------------------- OPENCL -------------------------------------- */

	UINT_TYPE iter = 0;
	UINT_TYPE test_printed = 0;

	// allocate space for the indices added in each round; we cannot have more than original test patterns ...
	INT_TYPE *all_next_indices = (INT_TYPE *) malloc(
			tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE));

	// allocate space for all return values (by FIND_LEAF_IDX_BATCH)
	tree_record->leaf_indices_batch_ret_vals = (INT_TYPE *) malloc(
			tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE));

	UINT_TYPE num_elts_added;
	tree_record->current_test_index = 0;
	INT_TYPE reinsert_counter = 0;

	PRINT(params)("Starting Querying process via buffer tree...\n");

	STOP_MY_TIMER(tree_record->timers + 4);
	START_MY_TIMER(tree_record->timers + 2);

	do {

		iter++;

		// try to get elements from both queues until buffers are full
		// (each buffer is either empty or has at least space for leaves_buffer_sizes_threshold elements)
		num_elts_added = 0;

		// add enough elements to the buffers ("batch filling")
		while (num_elts_added < tree_record->approx_number_of_avail_buffer_slots
				&& (tree_record->current_test_index < tree_record->nXtest
						|| !cb_is_empty(&(tree_record->queue_reinsert)))) {

			// we remove indices from both queues here (add one element from each queue, if not empty)
			if (!cb_is_empty(&(tree_record->queue_reinsert))) {
				cb_read(&(tree_record->queue_reinsert), all_next_indices + num_elts_added);
			} else {
				all_next_indices[num_elts_added] = tree_record->current_test_index;
				tree_record->current_test_index++;
			}
			num_elts_added++;
		}

		/* ------------------------------------- OPENCL -------------------------------------- */
		FIND_LEAF_IDX_BATCH(all_next_indices, num_elts_added, tree_record->leaf_indices_batch_ret_vals, tree_record,
				params);
		/* ------------------------------------- OPENCL -------------------------------------- */

		// we have added num_elts_added indices to the all_next_indices array
		for (j = 0; j < num_elts_added; j++) {

			INT_TYPE leaf_idx = tree_record->leaf_indices_batch_ret_vals[j];

			// if not done: add the index to the appropriate buffer
			if (leaf_idx != -1) {

				// enlarge buffer if needed
				if (cb_is_full(tree_record->buffers[leaf_idx])) {
					PRINT(params)("Increasing buffer size ...\n");
					tree_record->buffers[leaf_idx] = cb_double_size(tree_record->buffers[leaf_idx]);
				}

				// add next_indices[j] to buffer leaf_idx
				cb_write(tree_record->buffers[leaf_idx], all_next_indices + j);

				if (cb_get_number_items(tree_record->buffers[leaf_idx]) >= tree_record->leaves_buffer_sizes_threshold) {
					tree_record->buffer_full_warning = 1;
				}

			} // else: traversal of test pattern has reached root: done!
		}

		/* ------------------------------------- OPENCL -------------------------------------- */
		PROCESS_ALL_BUFFERS(tree_record, params);
		/* ------------------------------------- OPENCL -------------------------------------- */

		if (tree_record->current_test_index == tree_record->nXtest && !test_printed) {
			PRINT(params)("All query indices are in the buffer tree now (buffers or reinsert queue)...\n");
			test_printed = 1;
		}

	} while (tree_record->current_test_index < tree_record->nXtest || !cb_is_empty(&(tree_record->queue_reinsert)));

	STOP_MY_TIMER(tree_record->timers + 2);

	START_MY_TIMER(tree_record->timers + 5);
	/* ------------------------------------- OPENCL -------------------------------------- */
	GET_DISTANCES_AND_INDICES(tree_record, params);
	/* ------------------------------------- OPENCL -------------------------------------- */

	// free space generated by testing
	for (i = 0; i < tree_record->n_leaves; i++) {
		cb_free(tree_record->buffers[i]);
	}
	STOP_MY_TIMER(tree_record->timers + 5);
	STOP_MY_TIMER(tree_record->timers + 1);

	PRINT(params)("Buffer full indices (overhead)=%i\n", reinsert_counter);
	PRINT(params)("\nNumber of iterations in while loop: \t\t\t\t\t\t\t%i\n", iter);
	PRINT(params)("Number of empty_all_buffers calls: \t\t\t\t\t\t\t%i\n", tree_record->empty_all_buffers_calls);
	PRINT(params)("Number of find_leaf_idx_calls: \t\t\t\t\t\t\t\t%i\n\n", tree_record->find_leaf_idx_calls);

	PRINT(params)("Elapsed total time for querying: \t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 1));
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("(Overhead)  Elapsed time for BEFORE WHILE: \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 4));
	PRINT(params)("(Overhead)  -> ALLOCATE_MEMORY_OPENCL_DEVICES: \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 3));

	PRINT(params)(
			"-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("Elapsed time in while-loop: \t\t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 2));
	PRINT(params)("(I)    Elapsed time for PROCESS_ALL_BUFFERS: \t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 12));
	PRINT(params)("(I.A)  Function: retrieve_indices_from_buffers_gpu: \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 11));
	PRINT(params)("(I.B)  Do brute-force (do_brute.../process_buffers_...chunks_gpu : \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 18));
	PRINT(params)("(I.B.1) -> Elapsed time for clEnqueueWriteBuffer (INTERLEAVED): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 19));
	PRINT(params)("(I.B.1) -> Elapsed time for memcpy (INTERLEAVED): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 21));
	PRINT(params)("(I.B.1) -> Elapsed time for waiting for chunk (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 22));
	PRINT(params)("(I.B.2) -> Number of copy calls: %i\n", tree_record->counters[0]);

	if (!training_chunks_inactive(tree_record, params)) {
		PRINT(params)("(I.B.4) -> Overhead distributing indices to chunks (in seconds): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 23));
		PRINT(params)("(I.B.5) -> Processing of whole chunk (all three phases, in seconds): \t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 24));
		PRINT(params)("(I.B.6) -> Processing of chunk before brute (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 25));
		PRINT(params)("(I.B.7) -> Processing of chunk after brute (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 26));
		PRINT(params)("(I.B.8) -> Processing of chunk after brute, buffer release (in seconds): \t%2.10f\n", GET_MY_TIMER(tree_record->timers + 27));
		PRINT(params)("(I.B.9) -> Number of release buffer calls: %i\n", tree_record->counters[0]);
	}
	if (USE_GPU) {

		PRINT(params)("(I.B.3)   -> Elapsed time for TEST_SUBSET (in seconds): \t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 13));
		PRINT(params)("(I.B.4)   -> Elapsed time for NN Search (in seconds): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 14));
		PRINT(params)("(I.B.5)   -> Elapsed time for UPDATE (in seconds): \t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 15));
		PRINT(params)("(I.B.6)   -> Elapsed time for OVERHEAD (in seconds): \t\t\t\t%2.10f\n",
				GET_MY_TIMER(tree_record->timers + 12)
				- GET_MY_TIMER(tree_record->timers + 14)
		    	- GET_MY_TIMER(tree_record->timers + 15)
				- GET_MY_TIMER(tree_record->timers + 13));

	}

	PRINT(params)("(II)   FIND_LEAF_IDX_BATCH : \t\t\t\t\t\t\t%2.10f\n", GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("(III) Elapsed time for final brute-force step : \t\t\t\t%2.10f\n\n",
			GET_MY_TIMER(tree_record->timers + 20));

	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("(DIFF) While - PROCESS_ALL_BUFFERS - FIND_LEAF_IDX_BATCH: \t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 2) - GET_MY_TIMER(tree_record->timers + 12)
					- GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("(Overhead)  Elapsed time for AFTER WHILE : \t\t\t\t\t%2.10f\n",
			GET_MY_TIMER(tree_record->timers + 5));
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n\n");

	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");
	PRINT(params)("QUERY RUNTIME: %2.10f ", GET_MY_TIMER(tree_record->timers + 1));
	PRINT(params)("PROCESS_ALL_BUFFERS: %2.10f ", GET_MY_TIMER(tree_record->timers + 12));
	PRINT(params)("FIND_LEAF_IDX_BATCH: %2.10f ", GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("WHILE_OVERHEAD: %2.10f ",
			GET_MY_TIMER(tree_record->timers + 2) - GET_MY_TIMER(tree_record->timers + 12)
					- GET_MY_TIMER(tree_record->timers + 16));
	PRINT(params)("\n");
	PRINT(params)("-----------------------------------------------------------------------------------------------------------------------------\n");

	// free all allocated memory related to querying
	for (i = 0; i < tree_record->n_leaves; i++) {
		free(tree_record->buffers[i]);
	}
	free(tree_record->buffers);

	// free arrays
	free(tree_record->all_stacks);
	free(tree_record->all_depths);
	free(tree_record->all_idxs);
	free(all_next_indices);
	free(tree_record->leaf_indices_batch_ret_vals);

}