예제 #1
0
파일: base.c 프로젝트: chidcha/bufferkdtree
/* --------------------------------------------------------------------------------
 * Fits the nearest neighbor model (build kd-tree)
 * --------------------------------------------------------------------------------
 */
void fit_extern(FLOAT_TYPE *Xtrain, int nXtrain, int dXtrain,
		KD_TREE_RECORD *kdtree_record, KD_TREE_PARAMETERS *params) {

	kd_tree_init_tree_record(kdtree_record, params->tree_depth, Xtrain, nXtrain, dXtrain);
	kd_tree_generate_training_patterns_indices(kdtree_record);
	kd_tree_build_tree(kdtree_record, params);

}
예제 #2
0
파일: base.c 프로젝트: gieseke/bufferkdtree
/**
 * Builds a buffer k-d-tree
 *
 * @param *Xtrain Pointer to array of type "FLOAT_TYPE" (either "float" or "double")
 * @param nXtrain Number of rows in *X (i.e., points/patterns)
 * @param dXtrain Number of columns in *X (one column per point/pattern)
 * @param *tree_record Pointer to struct instance storing the model
 * @param *params Pointer to struct instance storing all model parameters
 */
void build_bufferkdtree(FLOAT_TYPE * Xtrain,
		INT_TYPE nXtrain,
		INT_TYPE dXtrain,
		TREE_RECORD *tree_record,
		TREE_PARAMETERS *params) {

	int i;
	for (i = 0; i < 25; i++) {
		INIT_MY_TIMER(tree_record->timers + i);
	}

	int err_device = get_device_infos(params->platform_id, params->device_id, &(tree_record->device_infos));

	if (err_device < 0){
		printf("Error: Could not retrieve device information!");
		exit(EXIT_FAILURE);
	}

	// update tree record parameters
	tree_record->dXtrain = dXtrain;
	tree_record->nXtrain = nXtrain;
	tree_record->n_nodes = pow(2, params->tree_depth) - 1;
	tree_record->n_leaves = pow(2, params->tree_depth);
	tree_record->max_visited = 6 * (tree_record->n_leaves - 3 + 1);

	// variable buffer sizes: increase/decrease depending on the three depth
	if (params->tree_depth > 16) {
		tree_record->leaves_initial_buffer_sizes = 128;
		PRINT(params)("Warning: tree depth %i might be too large (memory consumption)!", params->tree_depth);
	} else {
		tree_record->leaves_initial_buffer_sizes = pow(2, 24 - params->tree_depth);
	}

	// memory needed for storing training data (in bytes)
	double device_mem_bytes = (double)tree_record->device_infos.device_mem_bytes;
	double device_max_alloc_bytes = (double)tree_record->device_infos.device_max_alloc_bytes;

	double train_mem_bytes = get_raw_train_mem_device_bytes(tree_record, params);
	PRINT(params)("Memory needed for all training patterns: %f (GB)\n", train_mem_bytes / MEM_GB);

	if (train_mem_bytes / params->n_train_chunks > device_mem_bytes * params->allowed_train_mem_percent_chunk) {
		params->n_train_chunks = (INT_TYPE) ceil(train_mem_bytes / (device_mem_bytes * params->allowed_train_mem_percent_chunk));
		// if set automatically, then use at least 3 chunks (hide computations and data transfer)
		if (params->n_train_chunks < 3){
			params->n_train_chunks = 3;
		}

		PRINT(params)("WARNING: Increasing number of chunks to %i ...\n", params->n_train_chunks);
	}

	double train_chunk_gb = get_train_mem_with_chunks_device_bytes(tree_record, params);
	if (params->n_train_chunks > 1){
		PRINT(params)("Memory allocated for both chunks: %f (GB)\n", (2*train_chunk_gb) / MEM_GB);
	}


	// we empty a buffer as soon as it has reached a certain filling status (here: 50%)
	tree_record->leaves_buffer_sizes_threshold = 0.9 * tree_record->leaves_initial_buffer_sizes;

	// the amount of indices removed from both queues (input and reinsert) in each round; has to
	// be reasonably large to provide sufficient work for a call to FIND_LEAF_IDX_BATCH
	tree_record->approx_number_of_avail_buffer_slots = 10 * tree_record->leaves_initial_buffer_sizes;

	PRINT(params)("Number of nodes (internal and leaves) in the top tree: %i\n",
			tree_record->n_nodes + tree_record->n_leaves);
	PRINT(params)("Number of buffers attached to the top tree: %i\n", tree_record->n_leaves);
	PRINT(params)("Buffer sizes (leaf structure): %i\n", tree_record->leaves_initial_buffer_sizes);
	PRINT(params)("Buffer empty thresholds: %i\n", tree_record->leaves_buffer_sizes_threshold);
	PRINT(params)("Indices fetched in each round (to fill buffers): %i\n",
			tree_record->approx_number_of_avail_buffer_slots);

	// array that contains the training patterns and the (original indices)
	tree_record->XtrainI = (void*) malloc(
			tree_record->nXtrain * (sizeof(FLOAT_TYPE) * tree_record->dXtrain + sizeof(INT_TYPE)));

	// the nodes and leaves arrays of the buffer kd-tree (host)
	tree_record->nodes = (TREE_NODE *) malloc(tree_record->n_nodes * sizeof(TREE_NODE));
	tree_record->leaves = (FLOAT_TYPE *) malloc(tree_record->n_leaves * LEAF_WIDTH * sizeof(FLOAT_TYPE));

	// create copy of training patterns (along with the original indices)
	kd_tree_generate_training_patterns_indices(tree_record->XtrainI, Xtrain, tree_record->nXtrain,
			tree_record->dXtrain);

	// build kd-tree and store it in nodes (medians) and leaves (fr,to values)
	kd_tree_build_tree(tree_record, params);

	// create copy of sorted training patterns (will be called by brute-force NN search)
	tree_record->Xtrain_sorted = (FLOAT_TYPE *) malloc(tree_record->nXtrain * tree_record->dXtrain * sizeof(FLOAT_TYPE));

	// create copy of original training indices on host system
	tree_record->Itrain_sorted = (INT_TYPE*) malloc(tree_record->nXtrain * sizeof(INT_TYPE));

	WRITE_SORTED_TRAINING_PATTERNS(tree_record, params);

	/* ------------------------------------- OPENCL -------------------------------------- */
	INIT_OPENCL_DEVICES(tree_record, params);
	/* ------------------------------------- OPENCL -------------------------------------- */

}