/* -------------------------------------------------------------------------------- * Fits the nearest neighbor model (build kd-tree) * -------------------------------------------------------------------------------- */ void fit_extern(FLOAT_TYPE *Xtrain, int nXtrain, int dXtrain, KD_TREE_RECORD *kdtree_record, KD_TREE_PARAMETERS *params) { kd_tree_init_tree_record(kdtree_record, params->tree_depth, Xtrain, nXtrain, dXtrain); kd_tree_generate_training_patterns_indices(kdtree_record); kd_tree_build_tree(kdtree_record, params); }
/** * Builds a buffer k-d-tree * * @param *Xtrain Pointer to array of type "FLOAT_TYPE" (either "float" or "double") * @param nXtrain Number of rows in *X (i.e., points/patterns) * @param dXtrain Number of columns in *X (one column per point/pattern) * @param *tree_record Pointer to struct instance storing the model * @param *params Pointer to struct instance storing all model parameters */ void build_bufferkdtree(FLOAT_TYPE * Xtrain, INT_TYPE nXtrain, INT_TYPE dXtrain, TREE_RECORD *tree_record, TREE_PARAMETERS *params) { int i; for (i = 0; i < 25; i++) { INIT_MY_TIMER(tree_record->timers + i); } int err_device = get_device_infos(params->platform_id, params->device_id, &(tree_record->device_infos)); if (err_device < 0){ printf("Error: Could not retrieve device information!"); exit(EXIT_FAILURE); } // update tree record parameters tree_record->dXtrain = dXtrain; tree_record->nXtrain = nXtrain; tree_record->n_nodes = pow(2, params->tree_depth) - 1; tree_record->n_leaves = pow(2, params->tree_depth); tree_record->max_visited = 6 * (tree_record->n_leaves - 3 + 1); // variable buffer sizes: increase/decrease depending on the three depth if (params->tree_depth > 16) { tree_record->leaves_initial_buffer_sizes = 128; PRINT(params)("Warning: tree depth %i might be too large (memory consumption)!", params->tree_depth); } else { tree_record->leaves_initial_buffer_sizes = pow(2, 24 - params->tree_depth); } // memory needed for storing training data (in bytes) double device_mem_bytes = (double)tree_record->device_infos.device_mem_bytes; double device_max_alloc_bytes = (double)tree_record->device_infos.device_max_alloc_bytes; double train_mem_bytes = get_raw_train_mem_device_bytes(tree_record, params); PRINT(params)("Memory needed for all training patterns: %f (GB)\n", train_mem_bytes / MEM_GB); if (train_mem_bytes / params->n_train_chunks > device_mem_bytes * params->allowed_train_mem_percent_chunk) { params->n_train_chunks = (INT_TYPE) ceil(train_mem_bytes / (device_mem_bytes * params->allowed_train_mem_percent_chunk)); // if set automatically, then use at least 3 chunks (hide computations and data transfer) if (params->n_train_chunks < 3){ params->n_train_chunks = 3; } PRINT(params)("WARNING: Increasing number of chunks to %i ...\n", params->n_train_chunks); } double train_chunk_gb = get_train_mem_with_chunks_device_bytes(tree_record, params); if (params->n_train_chunks > 1){ PRINT(params)("Memory allocated for both chunks: %f (GB)\n", (2*train_chunk_gb) / MEM_GB); } // we empty a buffer as soon as it has reached a certain filling status (here: 50%) tree_record->leaves_buffer_sizes_threshold = 0.9 * tree_record->leaves_initial_buffer_sizes; // the amount of indices removed from both queues (input and reinsert) in each round; has to // be reasonably large to provide sufficient work for a call to FIND_LEAF_IDX_BATCH tree_record->approx_number_of_avail_buffer_slots = 10 * tree_record->leaves_initial_buffer_sizes; PRINT(params)("Number of nodes (internal and leaves) in the top tree: %i\n", tree_record->n_nodes + tree_record->n_leaves); PRINT(params)("Number of buffers attached to the top tree: %i\n", tree_record->n_leaves); PRINT(params)("Buffer sizes (leaf structure): %i\n", tree_record->leaves_initial_buffer_sizes); PRINT(params)("Buffer empty thresholds: %i\n", tree_record->leaves_buffer_sizes_threshold); PRINT(params)("Indices fetched in each round (to fill buffers): %i\n", tree_record->approx_number_of_avail_buffer_slots); // array that contains the training patterns and the (original indices) tree_record->XtrainI = (void*) malloc( tree_record->nXtrain * (sizeof(FLOAT_TYPE) * tree_record->dXtrain + sizeof(INT_TYPE))); // the nodes and leaves arrays of the buffer kd-tree (host) tree_record->nodes = (TREE_NODE *) malloc(tree_record->n_nodes * sizeof(TREE_NODE)); tree_record->leaves = (FLOAT_TYPE *) malloc(tree_record->n_leaves * LEAF_WIDTH * sizeof(FLOAT_TYPE)); // create copy of training patterns (along with the original indices) kd_tree_generate_training_patterns_indices(tree_record->XtrainI, Xtrain, tree_record->nXtrain, tree_record->dXtrain); // build kd-tree and store it in nodes (medians) and leaves (fr,to values) kd_tree_build_tree(tree_record, params); // create copy of sorted training patterns (will be called by brute-force NN search) tree_record->Xtrain_sorted = (FLOAT_TYPE *) malloc(tree_record->nXtrain * tree_record->dXtrain * sizeof(FLOAT_TYPE)); // create copy of original training indices on host system tree_record->Itrain_sorted = (INT_TYPE*) malloc(tree_record->nXtrain * sizeof(INT_TYPE)); WRITE_SORTED_TRAINING_PATTERNS(tree_record, params); /* ------------------------------------- OPENCL -------------------------------------- */ INIT_OPENCL_DEVICES(tree_record, params); /* ------------------------------------- OPENCL -------------------------------------- */ }