static void cycle(struct wlc_compositor *compositor) { struct wl_list *l = wlc_space_get_userdata(wlc_compositor_get_focused_space(compositor)); if (!l) return; struct wlc_view *v; uint32_t count = 0; wlc_view_for_each_user(v, l) if (is_tiled(v)) ++count; // Check that we have at least two tiled views // so we don't get in infinite loop. if (count <= 1) return; // Cycle until we hit next tiled view. struct wl_list *p; do { p = l->prev; wl_list_remove(l->prev); wl_list_insert(l, p); } while (!is_tiled(wlc_view_from_user_link(p))); relayout(wlc_compositor_get_focused_space(compositor)); }
static void relayout(struct wlc_space *space) { if (!space) return; struct wl_list *views; if (!(views = wlc_space_get_userdata(space))) return; uint32_t rwidth, rheight; struct wlc_output *output = wlc_space_get_output(space); wlc_output_get_resolution(output, &rwidth, &rheight); struct wlc_view *v; uint32_t count = 0; wlc_view_for_each_user(v, views) if (is_tiled(v)) ++count; bool toggle = false; uint32_t y = 0, height = rheight / (count > 1 ? count - 1 : 1); uint32_t fheight = (rheight > height * (count - 1) ? height + (rheight - height * (count - 1)) : height); wlc_view_for_each_user(v, views) { if (wlc_view_get_state(v) & WLC_BIT_FULLSCREEN) { wlc_view_resize(v, rwidth, rheight); wlc_view_position(v, 0, 0); } if (wlc_view_get_type(v) & WLC_BIT_SPLASH) wlc_view_position(v, rwidth * 0.5 - wlc_view_get_width(v) * 0.5, rheight * 0.5 - wlc_view_get_height(v) * 0.5); struct wlc_view *parent; if (is_managed(v) && !is_or(v) && (parent = wlc_view_get_parent(v))) layout_parent(v, parent, wlc_view_get_width(v), wlc_view_get_height(v)); if (!is_tiled(v)) continue; uint32_t slave = rwidth * loliwm.cut; wlc_view_set_state(v, WLC_BIT_MAXIMIZED, true); wlc_view_resize(v, (count > 1 ? (toggle ? slave : rwidth - slave) : rwidth), (toggle ? (y == 0 ? fheight : height) : rheight)); wlc_view_position(v, (toggle ? rwidth - slave : 0), y); if (toggle) y += (y == 0 ? fheight : height); toggle = true; } }
void gsc_hw_set_in_image_format(struct gsc_ctx *ctx) { struct gsc_dev *dev = ctx->gsc_dev; struct gsc_frame *frame = &ctx->s_frame; u32 i, depth = 0; u32 cfg; cfg = readl(dev->regs + GSC_IN_CON); cfg &= ~(GSC_IN_RGB_TYPE_MASK | GSC_IN_YUV422_1P_ORDER_MASK | GSC_IN_CHROMA_ORDER_MASK | GSC_IN_FORMAT_MASK | GSC_IN_TILE_TYPE_MASK | GSC_IN_TILE_MODE | GSC_IN_CHROM_STRIDE_SEL_MASK); writel(cfg, dev->regs + GSC_IN_CON); if (is_rgb(frame->fmt->pixelformat)) { gsc_hw_set_in_image_rgb(ctx); return; } for (i = 0; i < frame->fmt->num_planes; i++) depth += frame->fmt->depth[i]; switch (frame->fmt->nr_comp) { case 1: cfg |= GSC_IN_YUV422_1P; if (frame->fmt->yorder == GSC_LSB_Y) cfg |= GSC_IN_YUV422_1P_ORDER_LSB_Y; else cfg |= GSC_IN_YUV422_1P_OEDER_LSB_C; if (frame->fmt->corder == GSC_CBCR && !rt_is_flag(GSC_REORDER)) cfg |= GSC_IN_CHROMA_ORDER_CBCR; else cfg |= GSC_IN_CHROMA_ORDER_CRCB; break; case 2: if (depth == 12) cfg |= GSC_IN_YUV420_2P; else cfg |= GSC_IN_YUV422_2P; if (frame->fmt->corder == GSC_CBCR && !rt_is_flag(GSC_REORDER)) cfg |= GSC_IN_CHROMA_ORDER_CBCR; else cfg |= GSC_IN_CHROMA_ORDER_CRCB; break; case 3: if (depth == 12) cfg |= GSC_IN_YUV420_3P; else cfg |= GSC_IN_YUV422_3P; break; }; if (is_AYV12(frame->fmt->pixelformat)) gsc_hw_set_in_chrom_stride(ctx); if (is_tiled(frame->fmt)) cfg |= GSC_IN_TILE_C_16x8 | GSC_IN_TILE_MODE; writel(cfg, dev->regs + GSC_IN_CON); }
void gsc_hw_set_out_image_format(struct gsc_ctx *ctx) { struct gsc_dev *dev = ctx->gsc_dev; struct gsc_frame *frame = &ctx->d_frame; u32 i, depth = 0; u32 cfg; cfg = readl(dev->regs + GSC_OUT_CON); cfg &= ~(GSC_OUT_RGB_TYPE_MASK | GSC_OUT_YUV422_1P_ORDER_MASK | GSC_OUT_CHROMA_ORDER_MASK | GSC_OUT_FORMAT_MASK | GSC_OUT_TILE_TYPE_MASK | GSC_OUT_TILE_MODE); writel(cfg, dev->regs + GSC_OUT_CON); if (is_rgb(frame->fmt->color)) { gsc_hw_set_out_image_rgb(ctx); return; } if (ctx->out_path != GSC_DMA) { cfg |= GSC_OUT_YUV444; goto end_set; } for (i = 0; i < frame->fmt->num_planes; i++) depth += frame->fmt->depth[i]; switch (frame->fmt->nr_comp) { case 1: cfg |= GSC_OUT_YUV422_1P; if (frame->fmt->yorder == GSC_LSB_Y) cfg |= GSC_OUT_YUV422_1P_ORDER_LSB_Y; else cfg |= GSC_OUT_YUV422_1P_OEDER_LSB_C; if (frame->fmt->corder == GSC_CBCR) cfg |= GSC_OUT_CHROMA_ORDER_CBCR; else cfg |= GSC_OUT_CHROMA_ORDER_CRCB; break; case 2: if (depth == 12) cfg |= GSC_OUT_YUV420_2P; else cfg |= GSC_OUT_YUV422_2P; if (frame->fmt->corder == GSC_CBCR) cfg |= GSC_OUT_CHROMA_ORDER_CBCR; else cfg |= GSC_OUT_CHROMA_ORDER_CRCB; break; case 3: cfg |= GSC_OUT_YUV420_3P; break; }; if (is_tiled(frame->fmt)) cfg |= GSC_OUT_TILE_C_16x8 | GSC_OUT_TILE_MODE; end_set: writel(cfg, dev->regs + GSC_OUT_CON); }
void focus_node(monitor_t *m, desktop_t *d, node_t *n, bool is_mapped) { if (n == NULL) return; PRINTF("focus node %X\n", n->client->window); split_mode = MODE_AUTOMATIC; n->client->urgent = false; if (is_mapped) { if (mon != m) { for (desktop_t *cd = mon->desk_head; cd != NULL; cd = cd->next) window_draw_border(cd->focus, true, false); for (desktop_t *cd = m->desk_head; cd != NULL; cd = cd->next) if (cd != d) window_draw_border(cd->focus, true, true); if (d->focus == n) window_draw_border(n, true, true); } if (d->focus != n) { window_draw_border(d->focus, false, true); window_draw_border(n, true, true); } xcb_set_input_focus(dpy, XCB_INPUT_FOCUS_POINTER_ROOT, n->client->window, XCB_CURRENT_TIME); } if (focus_follows_pointer) { xcb_window_t win = XCB_NONE; get_pointed_window(&win); if (win != n->client->window) enable_motion_recorder(); else disable_motion_recorder(); } if (!is_tiled(n->client)) { if (!adaptative_raise || !might_cover(d, n)) window_raise(n->client->window); } else { window_pseudo_raise(d, n->client->window); } if (d->focus != n) { d->last_focus = d->focus; d->focus = n; } ewmh_update_active_window(); put_status(); }
void nearest_leaf(monitor_t *m, desktop_t *d, node_t *n, nearest_arg_t dir, skip_client_t skip) { if (n == NULL) return; PUTS("nearest leaf"); node_t *x = NULL; for (node_t *f = first_extrema(d->root); f != NULL; f = next_leaf(f)) if (skip == CLIENT_SKIP_NONE || (skip == CLIENT_SKIP_TILED && !is_tiled(f->client)) || (skip == CLIENT_SKIP_FLOATING && is_tiled(f->client)) || (skip == CLIENT_SKIP_CLASS_DIFFER && strcmp(f->client->class_name, n->client->class_name) == 0) || (skip == CLIENT_SKIP_CLASS_EQUAL && strcmp(f->client->class_name, n->client->class_name) != 0)) if ((dir == NEAREST_OLDER && (f->client->uid < n->client->uid) && (x == NULL || f->client->uid > x->client->uid)) || (dir == NEAREST_NEWER && (f->client->uid > n->client->uid) && (x == NULL || f->client->uid < x->client->uid))) x = f; focus_node(m, d, x, true); }
void cycle_leaf(monitor_t *m, desktop_t *d, node_t *n, cycle_dir_t dir, skip_client_t skip) { if (n == NULL) return; PUTS("cycle leaf"); node_t *f = (dir == CYCLE_PREV ? prev_leaf(n) : next_leaf(n)); if (f == NULL) f = (dir == CYCLE_PREV ? second_extrema(d->root) : first_extrema(d->root)); while (f != n) { bool tiled = is_tiled(f->client); if (skip == CLIENT_SKIP_NONE || (skip == CLIENT_SKIP_TILED && !tiled) || (skip == CLIENT_SKIP_FLOATING && tiled) || (skip == CLIENT_SKIP_CLASS_DIFFER && strcmp(f->client->class_name, n->client->class_name) == 0) || (skip == CLIENT_SKIP_CLASS_EQUAL && strcmp(f->client->class_name, n->client->class_name) != 0)) { focus_node(m, d, f, true); return; } f = (dir == CYCLE_PREV ? prev_leaf(f) : next_leaf(f)); if (f == NULL) f = (dir == CYCLE_PREV ? second_extrema(d->root) : first_extrema(d->root)); } }
void grab_pointer(pointer_action_t pac) { PRINTF("grab pointer %u\n", pac); xcb_window_t win = XCB_NONE; xcb_point_t pos; query_pointer(&win, &pos); coordinates_t loc; if (locate_window(win, &loc)) { client_t *c = NULL; frozen_pointer->position = pos; frozen_pointer->action = pac; c = loc.node->client; frozen_pointer->monitor = loc.monitor; frozen_pointer->desktop = loc.desktop; frozen_pointer->node = loc.node; frozen_pointer->client = c; frozen_pointer->window = c->window; frozen_pointer->horizontal_fence = NULL; frozen_pointer->vertical_fence = NULL; switch (pac) { case ACTION_FOCUS: if (loc.node != mon->desk->focus) { bool backup = pointer_follows_monitor; pointer_follows_monitor = false; focus_node(loc.monitor, loc.desktop, loc.node); pointer_follows_monitor = backup; } else if (focus_follows_pointer) { stack(loc.node, STACK_ABOVE); } frozen_pointer->action = ACTION_NONE; break; case ACTION_MOVE: case ACTION_RESIZE_SIDE: case ACTION_RESIZE_CORNER: if (is_tiled(c)) { frozen_pointer->rectangle = c->tiled_rectangle; frozen_pointer->is_tiled = true; } else if (is_floating(c)) { frozen_pointer->rectangle = c->floating_rectangle; frozen_pointer->is_tiled = false; } else { frozen_pointer->action = ACTION_NONE; return; } if (pac == ACTION_RESIZE_SIDE) { float W = frozen_pointer->rectangle.width; float H = frozen_pointer->rectangle.height; float ratio = W / H; float x = pos.x - frozen_pointer->rectangle.x; float y = pos.y - frozen_pointer->rectangle.y; float diag_a = ratio * y; float diag_b = W - diag_a; if (x < diag_a) { if (x < diag_b) frozen_pointer->side = SIDE_LEFT; else frozen_pointer->side = SIDE_BOTTOM; } else { if (x < diag_b) frozen_pointer->side = SIDE_TOP; else frozen_pointer->side = SIDE_RIGHT; } } else if (pac == ACTION_RESIZE_CORNER) { int16_t mid_x = frozen_pointer->rectangle.x + (frozen_pointer->rectangle.width / 2); int16_t mid_y = frozen_pointer->rectangle.y + (frozen_pointer->rectangle.height / 2); if (pos.x > mid_x) { if (pos.y > mid_y) frozen_pointer->corner = CORNER_BOTTOM_RIGHT; else frozen_pointer->corner = CORNER_TOP_RIGHT; } else { if (pos.y > mid_y) frozen_pointer->corner = CORNER_BOTTOM_LEFT; else frozen_pointer->corner = CORNER_TOP_LEFT; } } if (frozen_pointer->is_tiled) { if (pac == ACTION_RESIZE_SIDE) { switch (frozen_pointer->side) { case SIDE_TOP: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_UP); break; case SIDE_RIGHT: frozen_pointer->vertical_fence = find_fence(loc.node, DIR_RIGHT); break; case SIDE_BOTTOM: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_DOWN); break; case SIDE_LEFT: frozen_pointer->vertical_fence = find_fence(loc.node, DIR_LEFT); break; } } else if (pac == ACTION_RESIZE_CORNER) { switch (frozen_pointer->corner) { case CORNER_TOP_LEFT: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_UP); frozen_pointer->vertical_fence = find_fence(loc.node, DIR_LEFT); break; case CORNER_TOP_RIGHT: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_UP); frozen_pointer->vertical_fence = find_fence(loc.node, DIR_RIGHT); break; case CORNER_BOTTOM_RIGHT: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_DOWN); frozen_pointer->vertical_fence = find_fence(loc.node, DIR_RIGHT); break; case CORNER_BOTTOM_LEFT: frozen_pointer->horizontal_fence = find_fence(loc.node, DIR_DOWN); frozen_pointer->vertical_fence = find_fence(loc.node, DIR_LEFT); break; } } if (frozen_pointer->horizontal_fence != NULL) frozen_pointer->horizontal_ratio = frozen_pointer->horizontal_fence->split_ratio; if (frozen_pointer->vertical_fence != NULL) frozen_pointer->vertical_ratio = frozen_pointer->vertical_fence->split_ratio; } break; case ACTION_NONE: break; } } else { if (pac == ACTION_FOCUS) { monitor_t *m = monitor_from_point(pos); if (m != NULL && m != mon) focus_node(m, m->desk, m->desk->focus); } frozen_pointer->action = ACTION_NONE; } }
void track_pointer(int root_x, int root_y) { if (frozen_pointer->action == ACTION_NONE) return; int16_t delta_x, delta_y, x = 0, y = 0, w = 1, h = 1; uint16_t width, height; pointer_action_t pac = frozen_pointer->action; monitor_t *m = frozen_pointer->monitor; desktop_t *d = frozen_pointer->desktop; node_t *n = frozen_pointer->node; client_t *c = frozen_pointer->client; xcb_window_t win = frozen_pointer->window; xcb_rectangle_t rect = frozen_pointer->rectangle; node_t *vertical_fence = frozen_pointer->vertical_fence; node_t *horizontal_fence = frozen_pointer->horizontal_fence; delta_x = root_x - frozen_pointer->position.x; delta_y = root_y - frozen_pointer->position.y; switch (pac) { case ACTION_MOVE: if (frozen_pointer->is_tiled) { xcb_window_t pwin = XCB_NONE; query_pointer(&pwin, NULL); if (pwin == win) return; coordinates_t loc; bool is_managed = (pwin == XCB_NONE ? false : locate_window(pwin, &loc)); if (is_managed && is_tiled(loc.node->client) && loc.monitor == m) { swap_nodes(m, d, n, m, d, loc.node); arrange(m, d); } else { if (is_managed && loc.monitor == m) { return; } else if (!is_managed) { xcb_point_t pt = (xcb_point_t) {root_x, root_y}; monitor_t *pmon = monitor_from_point(pt); if (pmon == NULL || pmon == m) { return; } else { loc.monitor = pmon; loc.desktop = pmon->desk; } } bool focused = (n == mon->desk->focus); transfer_node(m, d, n, loc.monitor, loc.desktop, loc.desktop->focus); if (focused) focus_node(loc.monitor, loc.desktop, n); frozen_pointer->monitor = loc.monitor; frozen_pointer->desktop = loc.desktop; } } else { x = rect.x + delta_x; y = rect.y + delta_y; window_move(win, x, y); c->floating_rectangle.x = x; c->floating_rectangle.y = y; xcb_point_t pt = (xcb_point_t) {root_x, root_y}; monitor_t *pmon = monitor_from_point(pt); if (pmon == NULL || pmon == m) return; bool focused = (n == mon->desk->focus); transfer_node(m, d, n, pmon, pmon->desk, pmon->desk->focus); if (focused) focus_node(pmon, pmon->desk, n); frozen_pointer->monitor = pmon; frozen_pointer->desktop = pmon->desk; } break; case ACTION_RESIZE_SIDE: case ACTION_RESIZE_CORNER: if (frozen_pointer->is_tiled) { if (vertical_fence != NULL) { double sr = frozen_pointer->vertical_ratio + (double) delta_x / vertical_fence->rectangle.width; sr = MAX(0, sr); sr = MIN(1, sr); vertical_fence->split_ratio = sr; } if (horizontal_fence != NULL) { double sr = frozen_pointer->horizontal_ratio + (double) delta_y / horizontal_fence->rectangle.height; sr = MAX(0, sr); sr = MIN(1, sr); horizontal_fence->split_ratio = sr; } arrange(mon, mon->desk); } else { if (pac == ACTION_RESIZE_SIDE) { switch (frozen_pointer->side) { case SIDE_TOP: x = rect.x; y = rect.y + delta_y; w = rect.width; h = rect.height - delta_y; break; case SIDE_RIGHT: x = rect.x; y = rect.y; w = rect.width + delta_x; h = rect.height; break; case SIDE_BOTTOM: x = rect.x; y = rect.y; w = rect.width; h = rect.height + delta_y; break; case SIDE_LEFT: x = rect.x + delta_x; y = rect.y; w = rect.width - delta_x; h = rect.height; break; } width = MAX(1, w); height = MAX(1, h); window_move_resize(win, x, y, width, height); c->floating_rectangle = (xcb_rectangle_t) {x, y, width, height}; window_draw_border(n, d->focus == n, mon == m); } else if (pac == ACTION_RESIZE_CORNER) { switch (frozen_pointer->corner) { case CORNER_TOP_LEFT: x = rect.x + delta_x; y = rect.y + delta_y; w = rect.width - delta_x; h = rect.height - delta_y; break; case CORNER_TOP_RIGHT: x = rect.x; y = rect.y + delta_y; w = rect.width + delta_x; h = rect.height - delta_y; break; case CORNER_BOTTOM_LEFT: x = rect.x + delta_x; y = rect.y; w = rect.width - delta_x; h = rect.height + delta_y; break; case CORNER_BOTTOM_RIGHT: x = rect.x; y = rect.y; w = rect.width + delta_x; h = rect.height + delta_y; break; } width = MAX(1, w); height = MAX(1, h); window_move_resize(win, x, y, width, height); c->floating_rectangle = (xcb_rectangle_t) {x, y, width, height}; window_draw_border(n, d->focus == n, mon == m); } } break; case ACTION_FOCUS: case ACTION_NONE: break; } }
void apply_layout(monitor_t *m, desktop_t *d, node_t *n, xcb_rectangle_t rect, xcb_rectangle_t root_rect) { if (n == NULL) return; n->rectangle = rect; if (is_leaf(n)) { if (n->client->fullscreen) return; if (is_floating(n->client) && n->client->border_width != border_width) { int ds = 2 * (border_width - n->client->border_width); n->client->floating_rectangle.width += ds; n->client->floating_rectangle.height += ds; } if (borderless_monocle && is_tiled(n->client) && d->layout == LAYOUT_MONOCLE) n->client->border_width = 0; else n->client->border_width = border_width; xcb_rectangle_t r; if (is_tiled(n->client)) { if (d->layout == LAYOUT_TILED) r = rect; else if (d->layout == LAYOUT_MONOCLE) r = root_rect; int wg = (gapless_monocle && d->layout == LAYOUT_MONOCLE ? 0 : window_gap); int bleed = wg + 2 * n->client->border_width; r.width = (bleed < r.width ? r.width - bleed : 1); r.height = (bleed < r.height ? r.height - bleed : 1); n->client->tiled_rectangle = r; } else { r = n->client->floating_rectangle; } window_move_resize(n->client->window, r.x, r.y, r.width, r.height); window_border_width(n->client->window, n->client->border_width); window_draw_border(n, n == d->focus, m == mon); } else { xcb_rectangle_t first_rect; xcb_rectangle_t second_rect; if (n->first_child->vacant || n->second_child->vacant) { first_rect = second_rect = rect; } else { unsigned int fence; if (n->split_type == TYPE_VERTICAL) { fence = rect.width * n->split_ratio; first_rect = (xcb_rectangle_t) {rect.x, rect.y, fence, rect.height}; second_rect = (xcb_rectangle_t) {rect.x + fence, rect.y, rect.width - fence, rect.height}; } else if (n->split_type == TYPE_HORIZONTAL) { fence = rect.height * n->split_ratio; first_rect = (xcb_rectangle_t) {rect.x, rect.y, rect.width, fence}; second_rect = (xcb_rectangle_t) {rect.x, rect.y + fence, rect.width, rect.height - fence}; } } apply_layout(m, d, n->first_child, first_rect, root_rect); apply_layout(m, d, n->second_child, second_rect, root_rect); } }
void ComparisonStageIR::build_stage() { assert(!is_tiled() || (is_tiled() && !track_progress())); // timer is only allowed for serial loops (just use it to get avg iterations per second or something like that) assert(!time_loop() || (time_loop() && !is_parallelized())); set_stage_function(create_stage_function()); set_user_function(create_user_function()); // stuff before the loop // build the return idx MVar *loop_start = new MVar(MScalarType::get_long_type()); // don't make a constant b/c it should be updateable loop_start->register_for_delete(); MStatement *set_loop_start = new MStatement(loop_start, MVar::create_constant<long>(0)); set_loop_start->register_for_delete(); MStatement *set_result = new MStatement(get_return_idx(), loop_start); set_result->register_for_delete(); set_start_block(new MBlock("start")); get_start_block()->register_for_delete(); get_start_block()->add_expr(set_loop_start); get_start_block()->add_expr(set_result); // When we don't parallelize, then make the inner loop's index outside of both the loops rather than within // the outer loop. This is a hack for llvm because if we have an alloca call within each iteration of the outer loop, // we will be "leaking" stack space each time that is called, so moving it outside of the loop prevents that. // However, it makes it hard to work with when we then parallelize because the code sees that inner loop index as a // free variable that needs to be added to the closure. This is not fun because our index is now a pointer to an index // and then we would need to update the index by going through the pointer, etc. Basically, it would cause some hacks on the // LLVM side (and unless this becomes something that is needed in the future, I don't want to deal with it). // So instead, it is dealt with below. Without parallelization, the inner loop index is initialized outside of the // nested loop, and then updated to the correct start right before the inner loop begins execution. // When parallelization is turned on, the inner loop index is made INSIDE the outer loop. This is because the // parallelized outer loop calls a function every iteration which is the outer loop body, and then within that the // inner loop is created. alloca is scoped at the function level, so the inner loop index gets a single alloca // in this function call, and then the inner loop is created. // This may not be required of other possible back-end languages that we choose, but it will depend on their scoping rules. // // TL;DR LLVM has function scoping for allocainst, so if we create the inner loop index as so // val outer_index... // for outer_index... // val inner_index... // for inner_index... // every iteration of the outer loop adds space to the stack which isn't released until the function ends. So we want // val outer_index... // val inner_index... // for outer_index... // for inner_index... MVar *inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); MBlock *preallocation_block = create_preallocator(); get_start_block()->add_expr(preallocation_block); MTimer *timer = nullptr; timer = new MTimer(); timer->register_for_delete(); MFor *outer_loop_skeleton_1 = nullptr; MFor *inner_loop_skeleton_1 = nullptr; MFor *outer_loop_skeleton_2 = nullptr; MFor *inner_loop_skeleton_2 = nullptr; MBlock *inner_loop_body = nullptr; // think of all comparisons as being in an NxM matrix where N is the left input and M is the right input. // N is the outermost iteration tile_size_N = MVar::create_constant<long>(2); tile_size_M = MVar::create_constant<long>(2); MVar *final_loop_bound; if (!is_tiled() || !is_tileable()) { // No tiling // To make sure that the inner loop doesn't get replace with a different bound if parallelizing, copy // the bound to a different variable and use that MVar *bound_copy = new MVar(MScalarType::get_long_type()); bound_copy->register_for_delete(); MStatement *set_copy = new MStatement(bound_copy, get_stage_function()->/*get_args()*/get_loaded_args()[3]); set_copy->register_for_delete(); get_start_block()->add_expr(set_copy); // loop components MVar *outer_loop_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start, MVar::create_constant<long>(1), get_stage_function()->/*get_args()*/get_loaded_args()[1], false, get_start_block()); if (is_parallelizable() && is_parallelized()) { outer_loop_skeleton_1->set_exec_type(PARALLEL); } MVar *_inner_start = nullptr; if ((left_input || right_input) && !_force_commutative) { _inner_start = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); } else { MAdd *add = new MAdd(outer_loop_skeleton_1->get_loop_index(), MVar::create_constant<long>(1)); outer_loop_skeleton_1->get_body_block()->add_expr(add); add->register_for_delete(); _inner_start = add->get_result(); } if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } MStatement *set_inner_start = new MStatement(inner_start, _inner_start); set_inner_start->register_for_delete(); outer_loop_skeleton_1->get_body_block()->add_expr(set_inner_start); MBlock *temp_block = new MBlock(); temp_block->register_for_delete(); inner_loop_skeleton_1 = create_stage_for_loop(inner_start, MVar::create_constant<long>(1), bound_copy, true, temp_block); // TODO hack, need to add the loop index initialization before the outer loop, but we have to add the outer loop before this since // the inner_start depends on the outer loop get_start_block()->insert_at(temp_block, get_start_block()->get_exprs().size() - 2); // insert right before the outer loop // stuff for calling the user function in the loop inner_loop_body = inner_loop_skeleton_1->get_body_block(); } else if (is_tiled() && is_tileable()) { // tiling // loop components MDiv *_outer_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[1], tile_size_N); _outer_1_bound->register_for_delete(); MDiv *_inner_1_bound = new MDiv(get_stage_function()->/*get_args()*/get_loaded_args()[3], tile_size_M); _inner_1_bound->register_for_delete(); // compensate for when the number of elements isn't a multiple of the tile size MAdd *outer_1_bound = new MAdd(_outer_1_bound->get_result(), MVar::create_constant<long>(1)); outer_1_bound->register_for_delete(); MAdd *inner_1_bound = new MAdd(_inner_1_bound->get_result(), MVar::create_constant<long>(1)); inner_1_bound->register_for_delete(); get_start_block()->add_expr(_outer_1_bound); get_start_block()->add_expr(_inner_1_bound); get_start_block()->add_expr(outer_1_bound); get_start_block()->add_expr(inner_1_bound); MVar *outer_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_1->override_name("outer_loop_start_1"); MVar *inner_loop_start_1 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_1->override_name("inner_loop_start_1"); MVar *outer_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); outer_loop_start_2->override_name("outer_loop_start_2"); MVar *inner_loop_start_2 = initialize<long>(MScalarType::get_long_type(), 0, get_start_block()); inner_loop_start_2->override_name("inner_loop_start_2"); // n = 0 to N/tile_size_N + 1 outer_loop_skeleton_1 = create_stage_for_loop(outer_loop_start_1, MVar::create_constant<long>(1), outer_1_bound->get_result(), true, get_start_block()); outer_loop_skeleton_1->override_name("outer_loop_skeleton1"); // // if (!time_loop()) { // get_start_block()->add_expr(outer_loop_skeleton_1); // } else { // get_start_block()->add_expr(timer); // timer->get_timer_block()->add_expr(outer_loop_skeleton_1); // } // m = 0 to M/tile_size_M + 1 inner_loop_skeleton_1 = create_stage_for_loop(inner_loop_start_1, MVar::create_constant<long>(1), inner_1_bound->get_result(), true, get_start_block()); inner_loop_skeleton_1->override_name("inner_loop_skeleton1"); // nn = 0 to tile_size_N outer_loop_skeleton_2 = create_stage_for_loop(outer_loop_start_2, MVar::create_constant<long>(1), tile_size_N, true, get_start_block()); outer_loop_skeleton_2->override_name("outer_loop_skeleton2"); // mm = 0 to tile_size_M inner_loop_skeleton_2 = create_stage_for_loop(inner_loop_start_2, MVar::create_constant<long>(1), tile_size_M, true, get_start_block()); inner_loop_skeleton_2->override_name("inner_loop_skeleton2"); if (!time_loop()) { get_start_block()->add_expr(outer_loop_skeleton_1); } else { get_start_block()->add_expr(timer); timer->get_timer_block()->add_expr(outer_loop_skeleton_1); } inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); inner_loop_body = inner_loop_skeleton_2->get_body_block(); } MBlock *user_arg_block; std::vector<MVar *> args = create_user_function_inputs(&user_arg_block, outer_loop_skeleton_1, outer_loop_skeleton_2, inner_loop_skeleton_1, inner_loop_skeleton_2, nullptr, false, nullptr, nullptr, get_stage_function()->/*get_args()*/get_loaded_args()[1], get_stage_function()->/*get_args()*/get_loaded_args()[3]); if (!is_tiled() || !is_tileable()) { inner_loop_body->add_expr(user_arg_block); } // if tiled, this is already added in the create_user_function_inputs inner_loop_body = user_arg_block; int bucket_idx = inner_loop_body->get_exprs().size(); MFunctionCall *call = call_user_function(get_user_function(), args); inner_loop_body->add_expr(call); // handle the output of the user call MBlock *processed_call = process_user_function_call(call, NULL, false); inner_loop_body->add_expr(processed_call); // do any other postprocessing needed in the loop before the next iteration MBlock *extra = loop_extras(); inner_loop_body->add_expr(extra); if (track_progress() && !is_parallelized()) { // still return the original loop bound MBlock *temp = new MBlock(); temp->register_for_delete(); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); std::pair<MFor *, MFor *> splits = ProgressTracker::create_progress_tracker(outer_loop_skeleton_1, inner_loop_skeleton_1, get_num_tracking_splits(), temp, true); // find the original outer_loop_skeleton_1 in the block and remove it. Then replace with the new one in splits.first int idx = 0; if (!time_loop()) { for (std::vector<MExpr *>::const_iterator iter = get_start_block()->get_exprs().cbegin(); iter != get_start_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } get_start_block()->remove_at(idx); } else { for (std::vector<MExpr *>::const_iterator iter = timer->get_timer_block()->get_exprs().cbegin(); iter != timer->get_timer_block()->get_exprs().cend(); iter++) { if (*iter == outer_loop_skeleton_1) { break; } idx++; } timer->get_timer_block()->remove_at(idx); } outer_loop_skeleton_1 = splits.first; // do the replacement // outer_loop_skeleton_1 added to temp block in the progress tracker function if (!time_loop()) { get_stage_function()->add_body_block(temp); } else { timer->get_timer_block()->insert_at(temp, idx); } } else { outer_loop_skeleton_1->get_body_block()->add_expr(inner_loop_skeleton_1); final_loop_bound = outer_loop_skeleton_1->get_loop_bound(); inner_loop_body->insert_at(apply_buckets(args[0], args[1], inner_loop_skeleton_2 ? inner_loop_skeleton_2 : inner_loop_skeleton_1), bucket_idx); } // modify this loop if it needs to be parallelized if (is_parallelizable() && is_parallelized()) { parallelize_main_loop(get_start_block(), outer_loop_skeleton_1, inner_loop_skeleton_1); } // // if (is_tiled() && is_tileable()) { // inner_loop_skeleton_1->get_body_block()->add_expr(outer_loop_skeleton_2); // outer_loop_skeleton_2->get_body_block()->add_expr(inner_loop_skeleton_2); // } // postprocessing after the outer loop is done (no postprocessing needed after the inner loop since it just goes back to the outer loop) MBlock *after_loop = time_loop() ? timer->get_after_timer_block() : outer_loop_skeleton_1->get_end_block(); MBlock *finished = finish_stage(nullptr, final_loop_bound); MBlock *deletion = delete_fields(); after_loop->add_expr(deletion); after_loop->add_expr(finished); get_stage_function()->insert_body_block_at(get_start_block(), 1); // insert before the temp block, which would have been added if doing tracking. Insert after the stage arg loading though. // the temp block has the loop now, so it can't come before everything else }
// TODO once I get the indexing right, I can fix preallocation so that only the correct number of outputs are preallocated, not just N^2 // TODO Can also fix the number output (does that need to be fixed?) std::vector<MVar *> ComparisonStageIR::create_user_function_inputs(MBlock **mblock, MFor *outer_loop, MFor *outer_tiled_inner, MFor *inner_loop, MFor *inner_tiled_inner, MVar *, bool, MVar *, MVar *, MVar *original_num_inputs_left, MVar *original_num_inputs_right) { // body of the outer MFor passed in is the inner MFor loop std::vector<MVar *> stage_args = get_stage_function()->get_loaded_args();//get_args(); std::vector<MVar *> args; // Think of the indices into the two input arrays as coordinates into a matrix. The outer coordinate is for N, i.e. the row number. // The inner coordinate is for M, i.e. the column number. MVar *final_outer_coordinate; MVar *final_inner_coordinate; // get the outer and inner input elements // if tiled, the computation for the indices is different if (is_tiled() && is_tileable()) { if ((left_input || right_input) && !_force_commutative) { // N x M assert(original_num_inputs_left && original_num_inputs_right); // sanity check MVar *n = outer_loop->get_loop_index(); MVar *m = inner_loop->get_loop_index(); MVar *nn = outer_tiled_inner->get_loop_index(); MVar *mm = inner_tiled_inner->get_loop_index(); // outer = n * tile_size_N + nn final_outer_coordinate = get_element(stage_args[0], n, tile_size_N, nn, outer_tiled_inner->get_body_block(), inner_loop, &args, original_num_inputs_left, nullptr); // inner = m * M + mm final_inner_coordinate = get_element(stage_args[2], m, tile_size_M, mm, inner_tiled_inner->get_body_block(), outer_tiled_inner, &args, original_num_inputs_right, mblock); } else { // (N^2-N)/2 assert(original_num_inputs_left && original_num_inputs_right); // sanity check MVar *n = outer_loop->get_loop_index(); MVar *m = inner_loop->get_loop_index(); MVar *nn = outer_tiled_inner->get_loop_index(); MVar *mm = inner_tiled_inner->get_loop_index(); // outer = n * tile_size_N + nn final_outer_coordinate = get_element(stage_args[0], n, tile_size_N, nn, outer_tiled_inner->get_body_block(), inner_loop, &args, original_num_inputs_left, nullptr); // the outer doesn't change with commutativity // this code could almost be handled by get_element, but the conditional is more complex, so I just leave it here for now rather than // trying to refactor it. // inner = m * M + mm int inner_insert_idx = 0; MBlock *linear_inner = new MBlock(); linear_inner->register_for_delete(); MVar *inner_idx = compute_linear_index(m, tile_size_M, mm, linear_inner); inner_tiled_inner->get_body_block()->insert_at(linear_inner, inner_insert_idx++); final_inner_coordinate = inner_idx; // check that the inner index is still in range (< M) and that it is less than the outer idx // TODO this assumes that the integral value of true is 1. In the future, create an MTrue and MFalse type // that allows arithmetic to be done on it. Then I can plug in the actual values when generating the back end code, such as LLVM. MSLT *is_inner_in_range = new MSLT(inner_idx, original_num_inputs_right); is_inner_in_range->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_inner_in_range, inner_insert_idx++); MSLT *is_less_than_outer = new MSLT(inner_idx, final_outer_coordinate); is_less_than_outer->register_for_delete(); is_less_than_outer->override_name("inner_less_than_outer"); inner_tiled_inner->get_body_block()->insert_at(is_less_than_outer, inner_insert_idx++); // since we don't have a compound conditional type (YET), we get the results of the two SLT calls here. // If they sum to 2, then both are true since we assume true == 1. This way, we only need a single if // statement checking the value of the addition. MCast *is_inner_in_range_long = new MCast(is_inner_in_range->get_result(), MScalarType::get_long_type()); is_inner_in_range_long->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_inner_in_range_long, inner_insert_idx++); MCast *is_less_than_outer_long = new MCast(is_less_than_outer->get_result(), MScalarType::get_long_type()); is_less_than_outer_long->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_less_than_outer_long, inner_insert_idx++); MAdd *sum_of_conditionals = new MAdd(is_inner_in_range_long->get_casted(), is_less_than_outer_long->get_casted()); sum_of_conditionals->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(sum_of_conditionals, inner_insert_idx++); MEq *is_in_range_and_less_than = new MEq(sum_of_conditionals->get_result(), MVar::create_constant<long>(2)); is_in_range_and_less_than->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(is_in_range_and_less_than, inner_insert_idx++); MBlock *inner_is_in_range_and_less_than = new MBlock(); inner_is_in_range_and_less_than->register_for_delete(); MBlock *inner_not_in_range_nor_less_than = new MBlock(); inner_not_in_range_nor_less_than->register_for_delete(); MBlock *dummy_inner = new MBlock(); dummy_inner->register_for_delete(); MIfThenElse *inner_ite = new MIfThenElse(is_in_range_and_less_than->get_result(), inner_is_in_range_and_less_than, inner_not_in_range_nor_less_than, dummy_inner, nullptr); inner_ite->register_for_delete(); inner_tiled_inner->get_body_block()->insert_at(inner_ite, inner_insert_idx++); inner_ite->override_name("inner_ite"); // If in range, get the inner element and then go to the innermost tiled loop. // Since the innermost loop is already in outer_tiled_inner's body, remove it from there (and any other stuff that should only // execute if the we are in range) and then add it to the outer_is_in_range block. MIndex *get_inner_input = new MIndex(stage_args[2], inner_idx, create_type<MElementType *>(), "inner_input_element"); get_inner_input->register_for_delete(); inner_is_in_range_and_less_than->add_expr(get_inner_input); args.push_back(get_inner_input->get_result()); inner_is_in_range_and_less_than->add_exprs(inner_tiled_inner->get_body_block()->remove_range(inner_insert_idx++, -1)); // If out of range, continue to the next iteration of the outer_tiled_inner_loop MContinue *to_nn_loop = new MContinue(outer_tiled_inner); to_nn_loop->register_for_delete(); inner_not_in_range_nor_less_than->add_expr(to_nn_loop); *mblock = inner_is_in_range_and_less_than; } } else { // the loop indices are already setup by this point depending on whether we are NxM or N^2 MVar *current_outer_idx = outer_loop->get_loop_index(); MVar *current_inner_idx = inner_loop->get_loop_index(); final_outer_coordinate = current_outer_idx; final_inner_coordinate = current_inner_idx; MIndex *outer_element = new MIndex(stage_args[0], current_outer_idx, create_type<MElementType *>(), "outer_input_element"); outer_element->register_for_delete(); MIndex *inner_element = new MIndex(stage_args[2], current_inner_idx, create_type<MElementType *>(), "inner_input_element"); inner_element->register_for_delete(); outer_loop->get_body_block()->add_expr(outer_element); inner_loop->get_body_block()->add_expr(inner_element); args.push_back(outer_element->get_result()); args.push_back(inner_element->get_result()); *mblock = new MBlock(); (*mblock)->register_for_delete(); } // if this has an output, make the output element // this doesn't care if we are tiled or not. The equations are the same since we appropriately set the coordinates // above based on tiling or not. MVar *final_index; if (compareVIO) { // First create "shell" for a new Element* to be passed to the user MVar *new_element = new MVar(create_type<MElementType*>(), "output_element"); new_element->register_for_delete(); // create the statement that will actually initialize the value // compute the current output index if ((left_input || right_input) && !_force_commutative) { // N x M // equation for linearizing the coordinates is: // final_outer_coordinate X original_num_inputs_right + final_inner_coordinate MMul *mul = new MMul(final_outer_coordinate, original_num_inputs_right); mul->register_for_delete(); (*mblock)->add_expr(mul); MAdd *add = new MAdd(mul->get_result(), final_inner_coordinate); add->register_for_delete(); (*mblock)->add_expr(add); final_index = add->get_result(); } else { // N^2 and/or commutative // equation for linearizing the coordinates is: // [final_outer_coordinate^2 - final_outer_coordinate]/2 + final_inner_coordinate // the division term in this equation tells you how many elements have come before you. Then the addition // adds on your position in the current row. // It's not straightforward like the NxM version because we are only doing comparisons between elements // in the lower triangular part of the matrix (excluding the diagonal), so the linear indices from // the NxM version would give non-consecutive indices. This basically takes those indices and compresses // them down from 0 to however many comparisons we do. MMul *squared = new MMul(final_outer_coordinate, final_outer_coordinate); squared->register_for_delete(); (*mblock)->add_expr(squared); MSub *sub = new MSub(squared->get_result(), final_outer_coordinate); sub->register_for_delete(); (*mblock)->add_expr(sub); MDiv *div = new MDiv(sub->get_result(), MVar::create_constant<long>(2)); div->register_for_delete(); (*mblock)->add_expr(div); MAdd *add = new MAdd(div->get_result(), final_inner_coordinate); add->register_for_delete(); (*mblock)->add_expr(add); final_index = add->get_result(); } MStatement *set_new_element = new MStatement(new_element, nullptr); // nullptr tells it to create a new value set_new_element->register_for_delete(); set_new_element->add_parameter(final_index); // this is the id of the Element to be created (*mblock)->add_expr(set_new_element); args.push_back(new_element); // now set the Element in the output array MStatementIdx *set = new MStatementIdx(stage_args[4], new_element, final_index); set->register_for_delete(); (*mblock)->add_expr(set); } return args; }