void Allocator::defrag() { this->defragged = !this->defragged; for(index_t pointer_id = 0; pointer_id < this->total_blocks; ++pointer_id) { if(get_start_block(pointer_id) != -1) { index_t start_block = get_start_block(pointer_id); size_t required_blocks = get_size_blocks(pointer_id); index_t new_position = find_position(required_blocks); if(new_position == -1) { throw AllocError(AllocErrorType::NoMemory); } move(start_block, new_position, required_blocks, required_blocks); set_start_block(pointer_id, new_position); set_n_blocks(pointer_id, required_blocks); } } }
void Allocator::shrink(index_t pointer_id, size_t required_blocks) { index_t start_block = get_start_block(pointer_id); size_t original_blocks = get_size_blocks(pointer_id); fill_map(start_block + required_blocks, original_blocks - required_blocks, false); this->hash_map[pointer_id].n_blocks = required_blocks; }
void Allocator::free(Pointer &p) { index_t start_block = get_start_block(p.get_id()); size_t n_blocks = get_size_blocks(p.get_id()); fill_map(start_block, n_blocks, false); remove(p.get_id()); p.set_id(-1); }
void* Allocator::resolve(index_t pointer_id) { if (pointer_id == -1) { return nullptr; } return get_address(get_start_block(pointer_id)); }
std::string Allocator::dump() { std::stringstream result; // size_t free_blocks = 0; // for(index_t i = 0; i < n_blocks; ++i) // { // if(memory_map[i] == false && free_blocks > 0) // { // result << "\tUsed memory " << free_blocks << " (from " << i - free_blocks << " to " << i - 1 << ")\n"; // free_blocks = 0; // } // else // { // free_blocks++; // } // } // result << "\n=====================\n"; for(index_t pointer_id = 0; pointer_id < total_blocks; ++pointer_id) { if(get_start_block(pointer_id) != -1) { result << "Pointer_id" << pointer_id << ":\n"; result << "\t\tstart_block: " << get_start_block(pointer_id) << "\n"; result << "\t\tn_blocks: " << get_size_blocks(pointer_id) << "\n"; result << "Contains:\n"; // result << std::hex; // unsigned char *start = (unsigned char*)resolve(pointer_id); // for(size_t i = 0; i < get_size_bytes(pointer_id); ++i) // { // result << "\\x" << (int) start[i] << " "; // } // result << "\n:::::::::::::\n"; } } return result.str(); }
bool Allocator::realloc_move(Pointer &p, size_t required_blocks) { index_t pointer_id = p.get_id(); index_t new_position = find_position(required_blocks); if(new_position == -1) { throw AllocError(AllocErrorType::NoMemory); } move(get_start_block(pointer_id), new_position, get_size_blocks(pointer_id), required_blocks); set_n_blocks(pointer_id, required_blocks); set_start_block(pointer_id, new_position); return true; }
void log_ioa(const struct io_activity *ioa) { int i; LOG("%s, bytes to transfer: %d, delta count: %d", ioa->rw? "W" : "R", ioa->data_size, ioa->delta_count); for (i = 0; i < ioa->delta_count; ++i) { int count; struct block_delta *delta = &ioa->deltas[i]; LOG("bv #%d: len %d, offset %d, %d sectors (%d..%d) or %d blocks (%d..%d)", i, delta->size, delta->offset, get_sectors_count(delta), delta->start_sector, delta->end_sector, get_blocks_count(delta), get_start_block(delta), get_end_block(delta)); count = find_word_count("DAMN", delta->data, delta->size); if (count > 0) LOG("Found DAMN from deltas %d times", count); } }
bool Allocator::realloc_inplace(Pointer &p, size_t required_blocks) { index_t pointer_id = p.get_id(); size_t current_size = get_size_blocks(pointer_id); index_t end_block = get_start_block(pointer_id) + current_size; if(count_free_blocks(end_block) + current_size >= required_blocks) { fill_map(end_block, required_blocks - current_size, true); hash_map[pointer_id].n_blocks = required_blocks; return true; } else { return false; } }
inline int get_blocks_count(const struct block_delta *delta) { return get_end_block(delta) - get_start_block(delta) + 1; }
errcode_t mk_hugefiles(ext2_filsys fs, const char *device_name) { unsigned long i; ext2_ino_t dir; errcode_t retval; blk64_t fs_blocks, part_offset = 0; unsigned long align; int d, dsize; char *t; if (!get_bool_from_profile(fs_types, "make_hugefiles", 0)) return 0; if (!EXT2_HAS_INCOMPAT_FEATURE(fs->super, EXT3_FEATURE_INCOMPAT_EXTENTS)) return EXT2_ET_EXTENT_NOT_SUPPORTED; uid = get_int_from_profile(fs_types, "hugefiles_uid", 0); gid = get_int_from_profile(fs_types, "hugefiles_gid", 0); fs->umask = get_int_from_profile(fs_types, "hugefiles_umask", 077); num_files = get_int_from_profile(fs_types, "num_hugefiles", 0); t = get_string_from_profile(fs_types, "hugefiles_slack", "1M"); num_slack = parse_num_blocks2(t, fs->super->s_log_block_size); free(t); t = get_string_from_profile(fs_types, "hugefiles_size", "0"); num_blocks = parse_num_blocks2(t, fs->super->s_log_block_size); free(t); t = get_string_from_profile(fs_types, "hugefiles_align", "0"); align = parse_num_blocks2(t, fs->super->s_log_block_size); free(t); if (get_bool_from_profile(fs_types, "hugefiles_align_disk", 0)) { part_offset = get_partition_start(device_name) / (fs->blocksize / 512); if (part_offset % EXT2FS_CLUSTER_RATIO(fs)) { fprintf(stderr, _("Partition offset of %llu (%uk) blocks " "not compatible with cluster size %u.\n"), part_offset, fs->blocksize, EXT2_CLUSTER_SIZE(fs->super)); exit(1); } } num_blocks = round_up_align(num_blocks, align, 0); zero_hugefile = get_bool_from_profile(fs_types, "zero_hugefiles", zero_hugefile); t = get_string_from_profile(fs_types, "hugefiles_dir", "/"); retval = create_directory(fs, t, &dir); free(t); if (retval) return retval; fn_prefix = get_string_from_profile(fs_types, "hugefiles_name", "hugefile"); idx_digits = get_int_from_profile(fs_types, "hugefiles_digits", 5); d = int_log10(num_files) + 1; if (idx_digits > d) d = idx_digits; dsize = strlen(fn_prefix) + d + 16; fn_buf = malloc(dsize); if (!fn_buf) { free(fn_prefix); return ENOMEM; } strcpy(fn_buf, fn_prefix); fn_numbuf = fn_buf + strlen(fn_prefix); free(fn_prefix); fs_blocks = ext2fs_free_blocks_count(fs->super); if (fs_blocks < num_slack + align) return ENOSPC; fs_blocks -= num_slack + align; if (num_blocks && num_blocks > fs_blocks) return ENOSPC; if (num_blocks == 0 && num_files == 0) num_files = 1; if (num_files == 0 && num_blocks) { num_files = fs_blocks / num_blocks; fs_blocks -= (num_files / 16) + 1; fs_blocks -= calc_overhead(fs, num_blocks) * num_files; num_files = fs_blocks / num_blocks; } if (num_blocks == 0 && num_files > 1) { num_blocks = fs_blocks / num_files; fs_blocks -= (num_files / 16) + 1; fs_blocks -= calc_overhead(fs, num_blocks) * num_files; num_blocks = fs_blocks / num_files; } num_slack += calc_overhead(fs, num_blocks) * num_files; num_slack += (num_files / 16) + 1; /* space for dir entries */ goal = get_start_block(fs, num_slack); goal = round_up_align(goal, align, part_offset); if ((num_blocks ? num_blocks : fs_blocks) > (0x80000000UL / fs->blocksize)) fs->super->s_feature_ro_compat |= EXT2_FEATURE_RO_COMPAT_LARGE_FILE; if (!quiet) { if (zero_hugefile && verbose) printf("%s", _("Huge files will be zero'ed\n")); printf(_("Creating %lu huge file(s) "), num_files); if (num_blocks) printf(_("with %llu blocks each"), num_blocks); fputs(": ", stdout); } if (num_blocks == 0) num_blocks = ext2fs_blocks_count(fs->super) - goal; for (i=0; i < num_files; i++) { ext2_ino_t ino; retval = mk_hugefile(fs, num_blocks, dir, i, &ino); if (retval) { com_err(program_name, retval, _("while creating huge file %lu"), i); goto errout; } } if (!quiet) fputs(_("done\n"), stdout); errout: free(fn_buf); return retval; }
void ComparisonStageIR::build_stage() { assert(!is_tiled() || (is_tiled() && !track_progress())); // timer is only allowed for serial loops (just use it to get avg iterations per second or something like that) assert(!time_loop() || (time_loop() && !is_parallelized())); set_stage_function(create_stage_function()); set_user_function(create_user_function()); // stuff before the loop // build the return idx MVar *loop_start = new MVar(MScalarType::get_long_type()); // don't make a constant b/c it should be updateable loop_start->register_for_delete(); MStatement *set_loop_start = new MStatement(loop_start, MVar::create_constant<long>(0)); set_loop_start->register_for_delete(); MStatement *set_result = new MStatement(get_return_idx(), loop_start); set_result->register_for_delete(); set_start_block(new MBlock("start")); get_start_block()->register_for_delete(); get_start_block()->add_expr(set_loop_start); get_start_block()->add_expr(set_result); // When we don't parallelize, then make the inner loop's index outside of both the loops rather than within // the outer loop. This is a hack for llvm because if we have an alloca call within each iteration of the outer loop, // we will be "leaking" stack space each time that is called, so moving it outside of the loop prevents that. // However, it makes it hard to work with when we then parallelize because the code sees that inner loop index as a // free variable that needs to be added to the closure. This is not fun because our index is now a pointer to an index // and then we would need to update the index by going through the pointer, etc. Basically, it would cause some hacks on the // LLVM side (and unless this becomes something that is needed in the future, I don't want to deal with it). // So instead, it is dealt with below. Without parallelization, the inner loop index is initialized outside of the // nested loop, and then updated to the correct start right before the inner loop begins execution. // When parallelization is turned on, the inner loop index is made INSIDE the outer loop. This is because the // parallelized outer loop calls a function every iteration which is the outer loop body, and then within that the // inner loop is created. alloca is scoped at the function level, so the inner loop index gets a single alloca // in this function call, and then the inner loop is created. // This may not be required of other possible back-end languages that we choose, but it will depend on their scoping rules. // // TL;DR LLVM has function scoping for allocainst, so if we create the inner loop index as so // val outer_index... // for outer_index... // val inner_index... // for inner_index... // every iteration of the outer loop adds space to the stack which isn't released until the function ends. So we want // val outer_index... // val inner_index... // for outer_index... // for inner_index... MVar *inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); MBlock *preallocation_block = create_preallocator(); get_start_block()->add_expr(preallocation_block); MTimer *timer = nullptr; timer = new MTimer(); timer->register_for_delete(); MFor *outer_loop_skeleton_1 = nullptr; MFor *inner_loop_skeleton_1 = nullptr; MFor *outer_loop_skeleton_2 = nullptr; MFor *inner_loop_skeleton_2 = nullptr; MBlock *inner_loop_body = nullptr; // think of all comparisons as being in an NxM matrix where N is the left input and M is the right input. // N is the outermost iteration tile_size_N = MVar::create_constant<long>(2); tile_size_M = MVar::create_constant<long>(2); MVar *final_loop_bound; if (!is_tiled() || !is_tileable()) { // No tiling // To make sure that the inner loop doesn't get replace with a different bound if parallelizing, copy // the bound to a different variable and use that MVar *bound_copy = new MVar(MScalarType::get_long_type()); bound_copy->register_for_delete(); MStatement *set_copy = new MStatement(bound_copy, get_stage_function()->/*get_args()*/get_loaded_args()[3]); set_copy->register_for_delete(); get_start_block()->add_expr(set_copy); // loop components MVar *outer_loop_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start, MVar::create_constant<long>(1), get_stage_function()->/*get_args()*/get_loaded_args()[1], false, get_start_block()); if (is_parallelizable() && is_parallelized()) { outer_loop_skeleton_1->set_exec_type(PARALLEL); } MVar *_inner_start = nullptr; if ((left_input || right_input) && !_force_commutative) { _inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); } else { MAdd *add = new MAdd(outer_loop_skeleton_1->get_loop_index(), MVar::create_constant<long>(1)); outer_loop_skeleton_1->get_body_block()->add_expr(add); add->register_for_delete(); _inner_start = add->get_result(); } if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } MStatement *set_inner_start = new MStatement(inner_start, _inner_start); set_inner_start->register_for_delete(); outer_loop_skeleton_1->get_body_block()->add_expr(set_inner_start); MBlock *temp_block = new MBlock(); temp_block->register_for_delete(); inner_loop_skeleton_1 = create_stage_for_loop(inner_start, MVar::create_constant<long>(1), bound_copy, true, temp_block); // TODO hack, need to add the loop index initialization before the outer loop, but we have to add the outer loop before this since // the inner_start depends on the outer loop get_start_block()->insert_at(temp_block, get_start_block()->get_exprs().size() - 2); // insert right before the outer loop // stuff for calling the user function in the loop inner_loop_body = inner_loop_skeleton_1->get_body_block(); } else if (is_tiled() && is_tileable()) { // tiling // loop components MDiv *_outer_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[1], tile_size_N); _outer_1_bound->register_for_delete(); MDiv *_inner_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[3], tile_size_M); _inner_1_bound->register_for_delete(); // compensate for when the number of elements isn't a multiple of the tile size MAdd *outer_1_bound = new MAdd(_outer_1_bound->get_result(), MVar::create_constant<long>(1)); outer_1_bound->register_for_delete(); MAdd *inner_1_bound = new MAdd(_inner_1_bound->get_result(), MVar::create_constant<long>(1)); inner_1_bound->register_for_delete(); get_start_block()->add_expr(_outer_1_bound); get_start_block()->add_expr(_inner_1_bound); get_start_block()->add_expr(outer_1_bound); get_start_block()->add_expr(inner_1_bound); MVar *outer_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_1->override_name("outer_loop_start_1"); MVar *inner_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_1->override_name("inner_loop_start_1"); MVar *outer_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_2->override_name("outer_loop_start_2"); MVar *inner_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_2->override_name("inner_loop_start_2"); // n = 0 to N/tile_size_N + 1 outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start_1, MVar::create_constant<long>(1), outer_1_bound->get_result(), true, get_start_block()); outer_loop_skeleton_1->override_name("outer_loop_skeleton1"); // // if (!time_loop()) { // get_start_block()->add_expr(outer_loop_skeleton_1); // } else { // get_start_block()->add_expr(timer); // timer->get_timer_block()->add_expr(outer_loop_skeleton_1); // } // m = 0 to M/tile_size_M + 1 inner_loop_skeleton_1 = create_stage_for_loop(inner_loop_start_1, MVar::create_constant<long>(1), inner_1_bound->get_result(), true, get_start_block()); inner_loop_skeleton_1->override_name("inner_loop_skeleton1"); // nn = 0 to tile_size_N outer_loop_skeleton_2 = create_stage_for_loop(outer_loop_start_2, MVar::create_constant<long>(1), tile_size_N, true, get_start_block()); outer_loop_skeleton_2->override_name("outer_loop_skeleton2"); // mm = 0 to tile_size_M inner_loop_skeleton_2 = create_stage_for_loop(inner_loop_start_2, MVar::create_constant<long>(1), tile_size_M, true, get_start_block()); inner_loop_skeleton_2->override_name("inner_loop_skeleton2"); if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); inner_loop_body = inner_loop_skeleton_2->get_body_block(); } MBlock *user_arg_block; std::vector<MVar *> args = create_user_function_inputs(&user_arg_block, outer_loop_skeleton_1, outer_loop_skeleton_2, inner_loop_skeleton_1, inner_loop_skeleton_2, nullptr, false, nullptr, nullptr, get_stage_function()->/*get_args()*/get_loaded_args()[1], get_stage_function()->/*get_args()*/get_loaded_args()[3]); if (!is_tiled() || !is_tileable()) { inner_loop_body->add_expr(user_arg_block); } // if tiled, this is already added in the create_user_function_inputs inner_loop_body = user_arg_block; int bucket_idx = inner_loop_body->get_exprs().size(); MFunctionCall *call = call_user_function(get_user_function(), args); inner_loop_body->add_expr(call); // handle the output of the user call MBlock *processed_call = process_user_function_call(call, NULL, false); inner_loop_body->add_expr(processed_call); // do any other postprocessing needed in the loop before the next iteration MBlock *extra = loop_extras(); inner_loop_body->add_expr(extra); if (track_progress() && !is_parallelized()) { // still return the original loop bound MBlock *temp = new MBlock(); temp->register_for_delete(); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); std::pair<MFor *, MFor *> splits = ProgressTracker::create_progress_tracker(outer_loop_skeleton_1, inner_loop_skeleton_1, get_num_tracking_splits(), temp, true); // find the original outer_loop_skeleton_1 in the block and remove it. Then replace with the new one in splits.first int idx = 0; if (!time_loop()) { for (std::vector<MExpr *>::const_iterator iter = get_start_block()->get_exprs().cbegin(); iter != get_start_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } get_start_block()->remove_at(idx); } else { for (std::vector<MExpr *>::const_iterator iter = timer->get_timer_block()->get_exprs().cbegin(); iter != timer->get_timer_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } timer->get_timer_block()->remove_at(idx); } outer_loop_skeleton_1 = splits.first; // do the replacement // outer_loop_skeleton_1 added to temp block in the progress tracker function if (!time_loop()) { get_stage_function()->add_body_block(temp); } else { timer->get_timer_block()->insert_at(temp, idx); } } else { outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); } // modify this loop if it needs to be parallelized if (is_parallelizable() && is_parallelized()) { parallelize_main_loop(get_start_block(), outer_loop_skeleton_1, inner_loop_skeleton_1); } // // if (is_tiled() && is_tileable()) { // inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); // outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); // } // postprocessing after the outer loop is done (no postprocessing needed after the inner loop since it just goes back to the outer loop) MBlock *after_loop = time_loop() ? timer->get_after_timer_block() : outer_loop_skeleton_1->get_end_block(); MBlock *finished = finish_stage(nullptr, final_loop_bound); MBlock *deletion = delete_fields(); after_loop->add_expr(deletion); after_loop->add_expr(finished); get_stage_function()->insert_body_block_at(get_start_block(), 1); // insert before the temp block, which would have been added if doing tracking. Insert after the stage arg loading though. // the temp block has the loop now, so it can't come before everything else }