static int debug_triangle(int tilex, int tiley, const union lp_rast_cmd_arg arg, struct tile *tile, char val) { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; unsigned i, nr_planes = 0; boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ return 0; } while (plane_mask) { plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + IMUL64(plane[nr_planes].dcdy, tiley) - IMUL64(plane[nr_planes].dcdx, tilex)); nr_planes++; } for(y = 0; y < TILE_SIZE; y++) { for(x = 0; x < TILE_SIZE; x++) { for (i = 0; i < nr_planes; i++) if (plane[i].c <= 0) goto out; plot(tile, x, y, val, blend); count++; out: for (i = 0; i < nr_planes; i++) plane[i].c -= plane[i].dcdx; } for (i = 0; i < nr_planes; i++) { plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE); plane[i].c += plane[i].dcdy; } } return count; }
void lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = GET_PLANES(tri); unsigned x = (arg.triangle.plane_mask & 0xff) + task->x; unsigned y = (arg.triangle.plane_mask >> 8) + task->y; __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; __m128i dcdx; __m128i dcdy; __m128i dcdx2; __m128i dcdx3; __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &unused); /* Adjust dcdx; */ dcdx = _mm_sub_epi32(zero, dcdx); c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ c = _mm_sub_epi32(c, _mm_set1_epi32(1)); dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, &span_0, &span_1, &span_2, &unused); { __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); __m128i c_01 = _mm_packs_epi32(c_0, c_1); __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); __m128i c_23 = _mm_packs_epi32(c_2, c_3); __m128i c_0123 = _mm_packs_epi16(c_01, c_23); unsigned mask = _mm_movemask_epi8(c_0123); if (mask != 0xffff) lp_rast_shade_quads_mask(task, &tri->inputs, x, y, 0xffff & ~mask); } }
void lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = GET_PLANES(tri); int x = (arg.triangle.plane_mask & 0xff) + task->x; int y = (arg.triangle.plane_mask >> 8) + task->y; unsigned i, j; struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; unsigned nr = 0; __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; __m128i dcdx; __m128i dcdy; __m128i rej4; __m128i dcdx2; __m128i dcdx3; __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &rej4); /* Adjust dcdx; */ dcdx = _mm_sub_epi32(zero, dcdx); c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); rej4 = _mm_slli_epi32(rej4, 2); /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ c = _mm_sub_epi32(c, _mm_set1_epi32(1)); rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1)); dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, &span_0, &span_1, &span_2, &unused); for (i = 0; i < 4; i++) { __m128i cx = c; for (j = 0; j < 4; j++) { __m128i c4rej = _mm_add_epi32(cx, rej4); __m128i rej_masks = _mm_srai_epi32(c4rej, 31); /* if (is_zero(rej_masks)) */ if (_mm_movemask_epi8(rej_masks) == 0) { __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); __m128i c_01 = _mm_packs_epi32(c_0, c_1); __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); __m128i c_23 = _mm_packs_epi32(c_2, c_3); __m128i c_0123 = _mm_packs_epi16(c_01, c_23); unsigned mask = _mm_movemask_epi8(c_0123); out[nr].i = i; out[nr].j = j; out[nr].mask = mask; if (mask != 0xffff) nr++; } cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); } c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); } for (i = 0; i < nr; i++) lp_rast_shade_quads_mask(task, &tri->inputs, x + 4 * out[i].j, y + 4 * out[i].i, 0xffff & ~out[i].mask); }
static boolean try_setup_line( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); const struct u_rect *scissor; struct u_rect bbox, bboxpos; boolean s_planes[4]; unsigned tri_bytes; int x[4]; int y[4]; int i; int nr_planes = 4; unsigned viewport_index = 0; unsigned layer = 0; /* linewidth should be interpreted as integer */ int fixed_width = util_iround(width) * FIXED_ONE; float x_offset=0; float y_offset=0; float x_offset_end=0; float y_offset_end=0; float x1diff; float y1diff; float x2diff; float y2diff; float dx, dy; float area; const float (*pv)[4]; boolean draw_start; boolean draw_end; boolean will_draw_start; boolean will_draw_end; if (0) print_line(setup, v1, v2); if (setup->flatshade_first) { pv = v1; } else { pv = v2; } if (setup->viewport_index_slot > 0) { unsigned *udata = (unsigned*)pv[setup->viewport_index_slot]; viewport_index = lp_clamp_viewport_idx(*udata); } if (setup->layer_slot > 0) { layer = *(unsigned*)pv[setup->layer_slot]; layer = MIN2(layer, scene->fb_max_layer); } dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; area = (dx * dx + dy * dy); if (area == 0) { LP_COUNT(nr_culled_tris); return TRUE; } info.oneoverarea = 1.0f / area; info.dx = dx; info.dy = dy; info.v1 = v1; info.v2 = v2; /* X-MAJOR LINE */ if (fabsf(dx) >= fabsf(dy)) { float dydx = dy / dx; x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; if (y2diff==-0.5 && dy<0){ y2diff = 0.5; } /* * Diamond exit rule test for starting point */ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } else if (sign(x1diff) == sign(-dx)) { draw_start = FALSE; } else if (sign(-y1diff) != sign(dy)) { draw_start = TRUE; } else { /* do intersection test */ float yintersect = fracf(v1[0][1]) + x1diff * dydx; draw_start = (yintersect < 1.0 && yintersect > 0.0); } /* * Diamond exit rule test for ending point */ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } else if (sign(x2diff) != sign(-dx)) { draw_end = FALSE; } else if (sign(-y2diff) == sign(dy)) { draw_end = TRUE; } else { /* do intersection test */ float yintersect = fracf(v2[0][1]) + x2diff * dydx; draw_end = (yintersect < 1.0 && yintersect > 0.0); } /* Are we already drawing start/end? */ will_draw_start = sign(-x1diff) != sign(dx); will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; if (dx < 0) { /* if v2 is to the right of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; dx = -dx; dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { x_offset_end = - x1diff - 0.5; y_offset_end = x_offset_end * dydx; } if (will_draw_end != draw_end) { x_offset = - x2diff - 0.5; y_offset = x_offset * dydx; } } else{ /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { x_offset = - x1diff + 0.5; y_offset = x_offset * dydx; } if (will_draw_end != draw_end) { x_offset_end = - x2diff + 0.5; y_offset_end = x_offset_end * dydx; } } /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; } else { const float dxdy = dx / dy; /* Y-MAJOR LINE */ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; if (x2diff==-0.5 && dx<0) { x2diff = 0.5; } /* * Diamond exit rule test for starting point */ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } else if (sign(-y1diff) == sign(dy)) { draw_start = FALSE; } else if (sign(x1diff) != sign(-dx)) { draw_start = TRUE; } else { /* do intersection test */ float xintersect = fracf(v1[0][0]) + y1diff * dxdy; draw_start = (xintersect < 1.0 && xintersect > 0.0); } /* * Diamond exit rule test for ending point */ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } else if (sign(-y2diff) != sign(dy) ) { draw_end = FALSE; } else if (sign(x2diff) == sign(-dx) ) { draw_end = TRUE; } else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? */ will_draw_start = sign(y1diff) == sign(dy); will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; if (dy > 0) { /* if v2 is on top of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; dx = -dx; dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { y_offset_end = - y1diff + 0.5; x_offset_end = y_offset_end * dxdy; } if (will_draw_end != draw_end) { y_offset = - y2diff + 0.5; x_offset = y_offset * dxdy; } } else { /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { y_offset = - y1diff - 0.5; x_offset = y_offset * dxdy; } if (will_draw_end != draw_end) { y_offset_end = - y2diff - 0.5; x_offset_end = y_offset_end * dxdy; } } /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); } /* Bounding rectangle (in pixels) */ { /* Yes this is necessary to accurately calculate bounding boxes * with the two fill-conventions we support. GL (normally) ends * up needing a bottom-left fill convention, which requires * slightly different rounding. */ int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; /* Inclusive coordinates: */ bbox.x1--; bbox.y1--; } if (bbox.x1 < bbox.x0 || bbox.y1 < bbox.y0) { if (0) debug_printf("empty bounding box\n"); LP_COUNT(nr_culled_tris); return TRUE; } if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); return TRUE; } bboxpos = bbox; /* Can safely discard negative regions: */ bboxpos.x0 = MAX2(bboxpos.x0, 0); bboxpos.y0 = MAX2(bboxpos.y0, 0); nr_planes = 4; /* * Determine how many scissor planes we need, that is drop scissor * edges if the bounding box of the tri is fully inside that edge. */ if (setup->scissor_test) { /* why not just use draw_regions */ scissor = &setup->scissors[viewport_index]; scissor_planes_needed(s_planes, &bboxpos, scissor); nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; } line = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes); if (!line) return FALSE; #ifdef DEBUG line->v[0][0] = v1[0][0]; line->v[1][0] = v2[0][0]; line->v[0][1] = v1[0][1]; line->v[1][1] = v2[0][1]; #endif LP_COUNT(nr_tris); if (lp_context->active_statistics_queries && !llvmpipe_rasterization_disabled(lp_context)) { lp_context->pipeline_statistics.c_primitives++; } /* calculate the deltas */ plane = GET_PLANES(line); plane[0].dcdy = x[0] - x[1]; plane[1].dcdy = x[1] - x[2]; plane[2].dcdy = x[2] - x[3]; plane[3].dcdy = x[3] - x[0]; plane[0].dcdx = y[0] - y[1]; plane[1].dcdx = y[1] - y[2]; plane[2].dcdx = y[2] - y[3]; plane[3].dcdx = y[3] - y[0]; if (draw_will_inject_frontface(lp_context->draw) && setup->face_slot > 0) { line->inputs.frontfacing = v1[setup->face_slot][0]; } else { line->inputs.frontfacing = TRUE; } /* Setup parameter interpolants: */ info.a0 = GET_A0(&line->inputs); info.dadx = GET_DADX(&line->inputs); info.dady = GET_DADY(&line->inputs); info.frontfacing = line->inputs.frontfacing; setup_line_coefficients(setup, &info); line->inputs.disable = FALSE; line->inputs.opaque = FALSE; line->inputs.layer = layer; line->inputs.viewport_index = viewport_index; /* * XXX: this code is mostly identical to the one in lp_setup_tri, except it * uses 4 planes instead of 3. Could share the code (including the sse * assembly, in fact we'd get the 4th plane for free). * The only difference apart from storing the 4th plane would be some * different shuffle for calculating dcdx/dcdy. */ for (i = 0; i < 4; i++) { /* half-edge constants, will be iterated over the whole render * target. */ plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]); /* correct for top-left vs. bottom-left fill convention. */ if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ plane[i].c++; } else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ if (plane[i].dcdy < 0) plane[i].c++; } } plane[i].dcdx *= FIXED_ONE; plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ plane[i].eo = 0; if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } /* * When rasterizing scissored tris, use the intersection of the * triangle bounding box and the scissor rect to generate the * scissor planes. * * This permits us to cut off the triangle "tails" that are present * in the intermediate recursive levels caused when two of the * triangles edges don't diverge quickly enough to trivially reject * exterior blocks from the triangle. * * It's not really clear if it's worth worrying about these tails, * but since we generate the planes for each scissored tri, it's * free to trim them in this case. * * Note that otherwise, the scissor planes only vary in 'C' value, * and even then only on state-changes. Could alternatively store * these planes elsewhere. * (Or only store the c value together with a bit indicating which * scissor edge this is, so rasterization would treat them differently * (easier to evaluate) to ordinary planes.) */ if (nr_planes > 4) { struct lp_rast_plane *plane_s = &plane[4]; if (s_planes[0]) { plane_s->dcdx = -1 << 8; plane_s->dcdy = 0; plane_s->c = (1-scissor->x0) << 8; plane_s->eo = 1 << 8; plane_s++; } if (s_planes[1]) { plane_s->dcdx = 1 << 8; plane_s->dcdy = 0; plane_s->c = (scissor->x1+1) << 8; plane_s->eo = 0 << 8; plane_s++; } if (s_planes[2]) { plane_s->dcdx = 0; plane_s->dcdy = 1 << 8; plane_s->c = (1-scissor->y0) << 8; plane_s->eo = 1 << 8; plane_s++; } if (s_planes[3]) { plane_s->dcdx = 0; plane_s->dcdy = -1 << 8; plane_s->c = (scissor->y1+1) << 8; plane_s->eo = 0; plane_s++; } assert(plane_s == &plane[nr_planes]); } return lp_setup_bin_triangle(setup, line, &bbox, &bboxpos, nr_planes, viewport_index); }
static boolean try_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { /* x/y positions in fixed point */ const struct lp_setup_variant_key *key = &setup->setup.variant->key; const int sizeAttr = setup->psize; const float size = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] : setup->point_size; /* Point size as fixed point integer, remove rounding errors * and gives minimum width for very small points */ int fixed_width = MAX2(FIXED_ONE, (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1)); const int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; const int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; struct lp_scene *scene = setup->scene; struct lp_rast_triangle *point; unsigned bytes; struct u_rect bbox; unsigned nr_planes = 4; struct point_info info; /* Bounding rectangle (in pixels) */ { /* Yes this is necessary to accurately calculate bounding boxes * with the two fill-conventions we support. GL (normally) ends * up needing a bottom-left fill convention, which requires * slightly different rounding. */ int adj = (setup->pixel_offset != 0) ? 1 : 0; bbox.x0 = (x0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER; bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER; bbox.y0 = (y0 + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; /* Inclusive coordinates: */ bbox.x1--; bbox.y1--; } if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); return TRUE; } u_rect_find_intersection(&setup->draw_region, &bbox); point = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &bytes); if (!point) return FALSE; #ifdef DEBUG point->v[0][0] = v0[0][0]; point->v[0][1] = v0[0][1]; #endif info.v0 = v0; info.dx01 = 0; info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; info.a0 = GET_A0(&point->inputs); info.dadx = GET_DADX(&point->inputs); info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ setup_point_coefficients(setup, &info); point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { struct lp_rast_plane *plane = GET_PLANES(point); plane[0].dcdx = -1; plane[0].dcdy = 0; plane[0].c = 1-bbox.x0; plane[0].eo = 1; plane[1].dcdx = 1; plane[1].dcdy = 0; plane[1].c = bbox.x1+1; plane[1].eo = 0; plane[2].dcdx = 0; plane[2].dcdy = 1; plane[2].c = 1-bbox.y0; plane[2].eo = 1; plane[3].dcdx = 0; plane[3].dcdy = -1; plane[3].c = bbox.y1+1; plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); }
static boolean try_setup_line( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { struct lp_scene *scene = setup->scene; const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; unsigned tri_bytes; int x[4]; int y[4]; int i; int nr_planes = 4; /* linewidth should be interpreted as integer */ int fixed_width = util_iround(width) * FIXED_ONE; float x_offset=0; float y_offset=0; float x_offset_end=0; float y_offset_end=0; float x1diff; float y1diff; float x2diff; float y2diff; float dx, dy; float area; boolean draw_start; boolean draw_end; boolean will_draw_start; boolean will_draw_end; if (0) print_line(setup, v1, v2); if (setup->scissor_test) { nr_planes = 8; } else { nr_planes = 4; } dx = v1[0][0] - v2[0][0]; dy = v1[0][1] - v2[0][1]; area = (dx * dx + dy * dy); if (area == 0) { LP_COUNT(nr_culled_tris); return TRUE; } info.oneoverarea = 1.0f / area; info.dx = dx; info.dy = dy; info.v1 = v1; info.v2 = v2; /* X-MAJOR LINE */ if (fabsf(dx) >= fabsf(dy)) { float dydx = dy / dx; x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; if (y2diff==-0.5 && dy<0){ y2diff = 0.5; } /* * Diamond exit rule test for starting point */ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } else if (sign(x1diff) == sign(-dx)) { draw_start = FALSE; } else if (sign(-y1diff) != sign(dy)) { draw_start = TRUE; } else { /* do intersection test */ float yintersect = fracf(v1[0][1]) + x1diff * dydx; draw_start = (yintersect < 1.0 && yintersect > 0.0); } /* * Diamond exit rule test for ending point */ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } else if (sign(x2diff) != sign(-dx)) { draw_end = FALSE; } else if (sign(-y2diff) == sign(dy)) { draw_end = TRUE; } else { /* do intersection test */ float yintersect = fracf(v2[0][1]) + x2diff * dydx; draw_end = (yintersect < 1.0 && yintersect > 0.0); } /* Are we already drawing start/end? */ will_draw_start = sign(-x1diff) != sign(dx); will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; if (dx < 0) { /* if v2 is to the right of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; dx = -dx; dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { x_offset_end = - x1diff - 0.5; y_offset_end = x_offset_end * dydx; } if (will_draw_end != draw_end) { x_offset = - x2diff - 0.5; y_offset = x_offset * dydx; } } else{ /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { x_offset = - x1diff + 0.5; y_offset = x_offset * dydx; } if (will_draw_end != draw_end) { x_offset_end = - x2diff + 0.5; y_offset_end = x_offset_end * dydx; } } /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; } else { const float dxdy = dx / dy; /* Y-MAJOR LINE */ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; if (x2diff==-0.5 && dx<0) { x2diff = 0.5; } /* * Diamond exit rule test for starting point */ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { draw_start = TRUE; } else if (sign(-y1diff) == sign(dy)) { draw_start = FALSE; } else if (sign(x1diff) != sign(-dx)) { draw_start = TRUE; } else { /* do intersection test */ float xintersect = fracf(v1[0][0]) + y1diff * dxdy; draw_start = (xintersect < 1.0 && xintersect > 0.0); } /* * Diamond exit rule test for ending point */ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { draw_end = FALSE; } else if (sign(-y2diff) != sign(dy) ) { draw_end = FALSE; } else if (sign(x2diff) == sign(-dx) ) { draw_end = TRUE; } else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? */ will_draw_start = sign(y1diff) == sign(dy); will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; if (dy > 0) { /* if v2 is on top of v1, swap pointers */ const float (*temp)[4] = v1; v1 = v2; v2 = temp; dx = -dx; dy = -dy; /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { y_offset_end = - y1diff + 0.5; x_offset_end = y_offset_end * dxdy; } if (will_draw_end != draw_end) { y_offset = - y2diff + 0.5; x_offset = y_offset * dxdy; } } else { /* Otherwise shift planes appropriately */ if (will_draw_start != draw_start) { y_offset = - y1diff - 0.5; x_offset = y_offset * dxdy; } if (will_draw_end != draw_end) { y_offset_end = - y2diff - 0.5; x_offset_end = y_offset_end * dxdy; } } /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); } LP_COUNT(nr_tris); /* Bounding rectangle (in pixels) */ { /* Yes this is necessary to accurately calculate bounding boxes * with the two fill-conventions we support. GL (normally) ends * up needing a bottom-left fill convention, which requires * slightly different rounding. */ int adj = (setup->pixel_offset != 0) ? 1 : 0; bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; /* Inclusive coordinates: */ bbox.x1--; bbox.y1--; } if (bbox.x1 < bbox.x0 || bbox.y1 < bbox.y0) { if (0) debug_printf("empty bounding box\n"); LP_COUNT(nr_culled_tris); return TRUE; } if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); return TRUE; } /* Can safely discard negative regions: */ bbox.x0 = MAX2(bbox.x0, 0); bbox.y0 = MAX2(bbox.y0, 0); line = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes); if (!line) return FALSE; #ifdef DEBUG line->v[0][0] = v1[0][0]; line->v[1][0] = v2[0][0]; line->v[0][1] = v1[0][1]; line->v[1][1] = v2[0][1]; #endif /* calculate the deltas */ plane = GET_PLANES(line); plane[0].dcdy = x[0] - x[1]; plane[1].dcdy = x[1] - x[2]; plane[2].dcdy = x[2] - x[3]; plane[3].dcdy = x[3] - x[0]; plane[0].dcdx = y[0] - y[1]; plane[1].dcdx = y[1] - y[2]; plane[2].dcdx = y[2] - y[3]; plane[3].dcdx = y[3] - y[0]; /* Setup parameter interpolants: */ info.a0 = GET_A0(&line->inputs); info.dadx = GET_DADX(&line->inputs); info.dady = GET_DADY(&line->inputs); setup_line_coefficients(setup, &info); line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { /* half-edge constants, will be interated over the whole render * target. */ plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. * * note that we're overloading gl_rasterization_rules to mean * both (0.5,0.5) pixel centers *and* bottom-left filling * convention. * * GL actually has a top-left filling convention, but GL's * notion of "top" differs from gallium's... * * Also, sometimes (in FBO cases) GL will render upside down * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ plane[i].c++; } else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ if (plane[i].dcdy < 0) plane[i].c++; } } plane[i].dcdx *= FIXED_ONE; plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ plane[i].eo = 0; if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; } /* * When rasterizing scissored tris, use the intersection of the * triangle bounding box and the scissor rect to generate the * scissor planes. * * This permits us to cut off the triangle "tails" that are present * in the intermediate recursive levels caused when two of the * triangles edges don't diverge quickly enough to trivially reject * exterior blocks from the triangle. * * It's not really clear if it's worth worrying about these tails, * but since we generate the planes for each scissored tri, it's * free to trim them in this case. * * Note that otherwise, the scissor planes only vary in 'C' value, * and even then only on state-changes. Could alternatively store * these planes elsewhere. */ if (nr_planes == 8) { const struct u_rect *scissor = &setup->scissor; plane[4].dcdx = -1; plane[4].dcdy = 0; plane[4].c = 1-scissor->x0; plane[4].eo = 1; plane[5].dcdx = 1; plane[5].dcdy = 0; plane[5].c = scissor->x1+1; plane[5].eo = 0; plane[6].dcdx = 0; plane[6].dcdy = 1; plane[6].c = 1-scissor->y0; plane[6].eo = 1; plane[7].dcdx = 0; plane[7].dcdy = -1; plane[7].c = scissor->y1+1; plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); }
void lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; const struct lp_rast_plane *plane = GET_PLANES(tri); int x = (arg.triangle.plane_mask & 0xff) + task->x; int y = (arg.triangle.plane_mask >> 8) + task->y; unsigned i, j; struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; unsigned nr = 0; __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = vec_splats((unsigned char) 0); __m128i c; __m128i dcdx; __m128i dcdy; __m128i rej4; __m128i dcdx2; __m128i dcdx3; __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; __m128i vshuf_mask0; __m128i vshuf_mask1; __m128i vshuf_mask2; #ifdef PIPE_ARCH_LITTLE_ENDIAN vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x03020100); vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x07060504); vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x0B0A0908); #else vshuf_mask0 = (__m128i) vec_splats((unsigned int) 0x0C0D0E0F); vshuf_mask1 = (__m128i) vec_splats((unsigned int) 0x08090A0B); vshuf_mask2 = (__m128i) vec_splats((unsigned int) 0x04050607); #endif transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &rej4); /* Adjust dcdx; */ dcdx = vec_sub_epi32(zero, dcdx); c = vec_add_epi32(c, vec_mullo_epi32(dcdx, (__m128i) vec_splats(x))); c = vec_add_epi32(c, vec_mullo_epi32(dcdy, (__m128i) vec_splats(y))); rej4 = vec_slli_epi32(rej4, 2); /* * Adjust so we can just check the sign bit (< 0 comparison), * instead of having to do a less efficient <= 0 comparison */ c = vec_sub_epi32(c, (__m128i) vec_splats((unsigned int) 1)); rej4 = vec_add_epi32(rej4, (__m128i) vec_splats((unsigned int) 1)); dcdx2 = vec_add_epi32(dcdx, dcdx); dcdx3 = vec_add_epi32(dcdx2, dcdx); transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, &span_0, &span_1, &span_2, &unused); for (i = 0; i < 4; i++) { __m128i cx = c; for (j = 0; j < 4; j++) { __m128i c4rej = vec_add_epi32(cx, rej4); __m128i rej_masks = vec_srai_epi32(c4rej, 31); /* if (is_zero(rej_masks)) */ if (vec_movemask_epi8(rej_masks) == 0) { __m128i c0_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask0), span_0); __m128i c1_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask1), span_1); __m128i c2_0 = vec_add_epi32(vec_perm(cx, cx, vshuf_mask2), span_2); __m128i c_0 = vec_or(vec_or(c0_0, c1_0), c2_0); __m128i c0_1 = vec_add_epi32(c0_0, vec_perm(dcdy, dcdy, vshuf_mask0)); __m128i c1_1 = vec_add_epi32(c1_0, vec_perm(dcdy, dcdy, vshuf_mask1)); __m128i c2_1 = vec_add_epi32(c2_0, vec_perm(dcdy, dcdy, vshuf_mask2)); __m128i c_1 = vec_or(vec_or(c0_1, c1_1), c2_1); __m128i c_01 = vec_packs_epi32(c_0, c_1); __m128i c0_2 = vec_add_epi32(c0_1, vec_perm(dcdy, dcdy, vshuf_mask0)); __m128i c1_2 = vec_add_epi32(c1_1, vec_perm(dcdy, dcdy, vshuf_mask1)); __m128i c2_2 = vec_add_epi32(c2_1, vec_perm(dcdy, dcdy, vshuf_mask2)); __m128i c_2 = vec_or(vec_or(c0_2, c1_2), c2_2); __m128i c0_3 = vec_add_epi32(c0_2, vec_perm(dcdy, dcdy, vshuf_mask0)); __m128i c1_3 = vec_add_epi32(c1_2, vec_perm(dcdy, dcdy, vshuf_mask1)); __m128i c2_3 = vec_add_epi32(c2_2, vec_perm(dcdy, dcdy, vshuf_mask2)); __m128i c_3 = vec_or(vec_or(c0_3, c1_3), c2_3); __m128i c_23 = vec_packs_epi32(c_2, c_3); __m128i c_0123 = vec_packs_epi16(c_01, c_23); unsigned mask = vec_movemask_epi8(c_0123); out[nr].i = i; out[nr].j = j; out[nr].mask = mask; if (mask != 0xffff) nr++; } cx = vec_add_epi32(cx, vec_slli_epi32(dcdx, 2)); } c = vec_add_epi32(c, vec_slli_epi32(dcdy, 2)); } for (i = 0; i < nr; i++) lp_rast_shade_quads_mask(task, &tri->inputs, x + 4 * out[i].j, y + 4 * out[i].i, 0xffff & ~out[i].mask); }