static void read_boxes_inplace(struct kgem *kgem, PixmapPtr pixmap, struct kgem_bo *bo, const BoxRec *box, int n) { int bpp = pixmap->drawable.bitsPerPixel; void *src, *dst = pixmap->devPrivate.ptr; int src_pitch = bo->pitch; int dst_pitch = pixmap->devKind; if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n)) return; DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); if (!kgem_bo_can_map(kgem, bo)) return; kgem_bo_submit(kgem, bo); src = kgem_bo_map(kgem, bo); if (src == NULL) return; if (sigtrap_get()) return; assert(src != dst); do { DBG(("%s: copying box (%d, %d), (%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); assert(box->x2 > box->x1); assert(box->y2 > box->y1); assert(box->x1 >= 0); assert(box->y1 >= 0); assert(box->x2 <= pixmap->drawable.width); assert(box->y2 <= pixmap->drawable.height); assert(box->x1 >= 0); assert(box->y1 >= 0); assert(box->x2 <= pixmap->drawable.width); assert(box->y2 <= pixmap->drawable.height); memcpy_blt(src, dst, bpp, src_pitch, dst_pitch, box->x1, box->y1, box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); sigtrap_put(); }
static bool read_boxes_inplace__cpu(struct kgem *kgem, PixmapPtr pixmap, struct kgem_bo *bo, const BoxRec *box, int n) { int bpp = pixmap->drawable.bitsPerPixel; void *src, *dst = pixmap->devPrivate.ptr; int src_pitch = bo->pitch; int dst_pitch = pixmap->devKind; if (!download_inplace__cpu(kgem, dst, bo, box, n)) return false; assert(kgem_bo_can_map__cpu(kgem, bo, false)); assert(bo->tiling != I915_TILING_Y); src = kgem_bo_map__cpu(kgem, bo); if (src == NULL) return false; kgem_bo_sync__cpu_full(kgem, bo, 0); if (sigtrap_get()) return false; if (bo->tiling == I915_TILING_X) { assert(kgem->memcpy_from_tiled_x); do { memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch, box->x1, box->y1, box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); } else { do { memcpy_blt(src, dst, bpp, src_pitch, dst_pitch, box->x1, box->y1, box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); } sigtrap_put(); return true; }
void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo, const BoxRec *box, int nbox) { struct kgem *kgem = &sna->kgem; struct kgem_bo *dst_bo; BoxRec extents; const BoxRec *tmp_box; int tmp_nbox; void *ptr; int src_pitch, cpp, offset; int n, cmd, br13; bool can_blt; DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n", __FUNCTION__, nbox, src_bo->handle, dst->drawable.width, dst->drawable.height)); #ifndef NDEBUG for (n = 0; n < nbox; n++) { if (box[n].x1 < 0 || box[n].y1 < 0 || box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch || box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo)) { FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n, box[n].x1, box[n].y1, box[n].x2, box[n].y2, src_bo->pitch, kgem_bo_size(src_bo)); } } #endif /* XXX The gpu is faster to perform detiling in bulk, but takes * longer to setup and retrieve the results, with an additional * copy. The long term solution is to use snoopable bo and avoid * this path. */ if (download_inplace(kgem, dst, src_bo, box ,nbox)) { fallback: read_boxes_inplace(kgem, dst, src_bo, box, nbox); return; } can_blt = kgem_bo_can_blt(kgem, src_bo) && (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); extents = box[0]; for (n = 1; n < nbox; n++) { if (box[n].x1 < extents.x1) extents.x1 = box[n].x1; if (box[n].x2 > extents.x2) extents.x2 = box[n].x2; if (can_blt) can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); if (box[n].y1 < extents.y1) extents.y1 = box[n].y1; if (box[n].y2 > extents.y2) extents.y2 = box[n].y2; } if (kgem_bo_can_map(kgem, src_bo)) { /* Is it worth detiling? */ if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) goto fallback; } /* Try to avoid switching rings... */ if (!can_blt || kgem->ring == KGEM_RENDER || upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { PixmapRec tmp; tmp.drawable.width = extents.x2 - extents.x1; tmp.drawable.height = extents.y2 - extents.y1; tmp.drawable.depth = dst->drawable.depth; tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; tmp.devPrivate.ptr = NULL; assert(tmp.drawable.width); assert(tmp.drawable.height); if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { BoxRec tile, stack[64], *clipped, *c; int step; if (n > ARRAY_SIZE(stack)) { clipped = malloc(sizeof(BoxRec) * n); if (clipped == NULL) goto fallback; } else clipped = stack; step = MIN(sna->render.max_3d_size, 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); while (step * step * 4 > sna->kgem.max_upload_tile_size) step /= 2; DBG(("%s: tiling download, using %dx%d tiles\n", __FUNCTION__, step, step)); assert(step); for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { int y2 = tile.y1 + step; if (y2 > extents.y2) y2 = extents.y2; tile.y2 = y2; for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { int x2 = tile.x1 + step; if (x2 > extents.x2) x2 = extents.x2; tile.x2 = x2; tmp.drawable.width = tile.x2 - tile.x1; tmp.drawable.height = tile.y2 - tile.y1; c = clipped; for (n = 0; n < nbox; n++) { *c = box[n]; if (!box_intersect(c, &tile)) continue; DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n", __FUNCTION__, c->x1, c->y1, c->x2, c->y2, c->x1 - tile.x1, c->y1 - tile.y1)); c++; } if (c == clipped) continue; dst_bo = kgem_create_buffer_2d(kgem, tmp.drawable.width, tmp.drawable.height, tmp.drawable.bitsPerPixel, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) { if (clipped != stack) free(clipped); goto fallback; } if (!sna->render.copy_boxes(sna, GXcopy, dst, src_bo, 0, 0, &tmp, dst_bo, -tile.x1, -tile.y1, clipped, c-clipped, COPY_LAST)) { kgem_bo_destroy(&sna->kgem, dst_bo); if (clipped != stack) free(clipped); goto fallback; } kgem_bo_submit(&sna->kgem, dst_bo); kgem_buffer_read_sync(kgem, dst_bo); if (sigtrap_get() == 0) { while (c-- != clipped) { memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, dst_bo->pitch, dst->devKind, c->x1 - tile.x1, c->y1 - tile.y1, c->x1, c->y1, c->x2 - c->x1, c->y2 - c->y1); } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, dst_bo); } } if (clipped != stack) free(clipped); } else { dst_bo = kgem_create_buffer_2d(kgem, tmp.drawable.width, tmp.drawable.height, tmp.drawable.bitsPerPixel, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) goto fallback; if (!sna->render.copy_boxes(sna, GXcopy, dst, src_bo, 0, 0, &tmp, dst_bo, -extents.x1, -extents.y1, box, nbox, COPY_LAST)) { kgem_bo_destroy(&sna->kgem, dst_bo); goto fallback; } kgem_bo_submit(&sna->kgem, dst_bo); kgem_buffer_read_sync(kgem, dst_bo); if (sigtrap_get() == 0) { for (n = 0; n < nbox; n++) { memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, dst_bo->pitch, dst->devKind, box[n].x1 - extents.x1, box[n].y1 - extents.y1, box[n].x1, box[n].y1, box[n].x2 - box[n].x1, box[n].y2 - box[n].y1); } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, dst_bo); } return; } /* count the total number of bytes to be read and allocate a bo */ cpp = dst->drawable.bitsPerPixel / 8; offset = 0; for (n = 0; n < nbox; n++) { int height = box[n].y2 - box[n].y1; int width = box[n].x2 - box[n].x1; offset += PITCH(width, cpp) * height; } DBG((" read buffer size=%d\n", offset)); dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr); if (!dst_bo) { read_boxes_inplace(kgem, dst, src_bo, box, nbox); return; } cmd = XY_SRC_COPY_BLT_CMD; src_pitch = src_bo->pitch; if (kgem->gen >= 040 && src_bo->tiling) { cmd |= BLT_SRC_TILED; src_pitch >>= 2; }
fastcall static void sna_tiling_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { struct sna_tile_state *tile = op->base.priv; struct sna_composite_spans_op tmp; int x, y, n, step; bool force_fallback = false; /* Use a small step to accommodate enlargement through tile alignment */ step = sna->render.max_3d_size; if (tile->dst_x & (8*512 / tile->dst->pDrawable->bitsPerPixel - 1) || tile->dst_y & 63) step /= 2; while (step * step * 4 > sna->kgem.max_copy_tile_size) step /= 2; DBG(("%s -- %dx%d, count=%d, step size=%d\n", __FUNCTION__, tile->width, tile->height, tile->rect_count, step)); if (tile->rect_count == 0) goto done; for (y = 0; y < tile->height; y += step) { int height = step; if (y + height > tile->height) height = tile->height - y; for (x = 0; x < tile->width; x += step) { const struct sna_tile_span *r = (void *)tile->rects; int width = step; if (x + width > tile->width) width = tile->width - x; if (!force_fallback && sna->render.composite_spans(sna, tile->op, tile->src, tile->dst, tile->src_x + x, tile->src_y + y, tile->dst_x + x, tile->dst_y + y, width, height, tile->flags, memset(&tmp, 0, sizeof(tmp)))) { for (n = 0; n < tile->rect_count; n++) { BoxRec b; b.x1 = r->box.x1 - tile->dst_x; if (b.x1 < x) b.x1 = x; b.y1 = r->box.y1 - tile->dst_y; if (b.y1 < y) b.y1 = y; b.x2 = r->box.x2 - tile->dst_x; if (b.x2 > x + width) b.x2 = x + width; b.y2 = r->box.y2 - tile->dst_y; if (b.y2 > y + height) b.y2 = y + height; DBG(("%s: rect[%d] = (%d, %d)x(%d,%d), tile=(%d,%d)x(%d, %d), blt=(%d,%d),(%d,%d)\n", __FUNCTION__, n, r->box.x1, r->box.y1, r->box.x2-r->box.x1, r->box.y2-r->box.y1, x, y, width, height, b.x1, b.y1, b.x2, b.y2)); if (b.y2 > b.y1 && b.x2 > b.x1) tmp.box(sna, &tmp, &b, r->opacity); r++; } tmp.done(sna, &tmp); } else { unsigned int flags; DBG(("%s -- falback\n", __FUNCTION__)); if (tile->op <= PictOpSrc) flags = MOVE_WRITE; else flags = MOVE_WRITE | MOVE_READ; if (!sna_drawable_move_to_cpu(tile->dst->pDrawable, flags)) goto done; if (tile->dst->alphaMap && !sna_drawable_move_to_cpu(tile->dst->alphaMap->pDrawable, flags)) goto done; if (tile->src->pDrawable && !sna_drawable_move_to_cpu(tile->src->pDrawable, MOVE_READ)) goto done; if (tile->src->alphaMap && !sna_drawable_move_to_cpu(tile->src->alphaMap->pDrawable, MOVE_READ)) goto done; for (n = 0; n < tile->rect_count; n++) { BoxRec b; b.x1 = r->box.x1 - tile->dst_x; if (b.x1 < x) b.x1 = x; b.y1 = r->box.y1 - tile->dst_y; if (b.y1 < y) b.y1 = y; b.x2 = r->box.x2 - tile->dst_x; if (b.x2 > x + width) b.x2 = x + width; b.y2 = r->box.y2 - tile->dst_y; if (b.y2 > y + height) b.y2 = y + height; DBG(("%s: rect[%d] = (%d, %d)x(%d,%d), tile=(%d,%d)x(%d, %d), blt=(%d,%d),(%d,%d)\n", __FUNCTION__, n, r->box.x1, r->box.y1, r->box.x2-r->box.x1, r->box.y2-r->box.y1, x, y, width, height, b.x1, b.y1, b.x2, b.y2)); if (b.y2 > b.y1 && b.x2 > b.x1) { xRenderColor alpha; PicturePtr mask; int error; alpha.red = alpha.green = alpha.blue = 0; alpha.alpha = r->opacity * 0xffff; mask = CreateSolidPicture(0, &alpha, &error); if (!mask) goto done; if (sigtrap_get() == 0) { fbComposite(tile->op, tile->src, mask, tile->dst, tile->src_x + x, tile->src_y + y, 0, 0, tile->dst_x + x, tile->dst_y + y, width, height); sigtrap_put(); } FreePicture(mask, 0); } r++; } force_fallback = true; } } } done: if (tile->rects != tile->rects_embedded) free(tile->rects); free(tile); }
static void sna_tiling_composite_done(struct sna *sna, const struct sna_composite_op *op) { struct sna_tile_state *tile = op->priv; struct sna_composite_op tmp; int x, y, n, step; /* Use a small step to accommodate enlargement through tile alignment */ step = sna->render.max_3d_size; if (tile->dst_x & (8*512 / tile->dst->pDrawable->bitsPerPixel - 1) || tile->dst_y & 63) step /= 2; while (step * step * 4 > sna->kgem.max_copy_tile_size) step /= 2; DBG(("%s -- %dx%d, count=%d, step size=%d\n", __FUNCTION__, tile->width, tile->height, tile->rect_count, step)); if (tile->rect_count == 0) goto done; for (y = 0; y < tile->height; y += step) { int height = step; if (y + height > tile->height) height = tile->height - y; for (x = 0; x < tile->width; x += step) { int width = step; if (x + width > tile->width) width = tile->width - x; if (sna->render.composite(sna, tile->op, tile->src, tile->mask, tile->dst, tile->src_x + x, tile->src_y + y, tile->mask_x + x, tile->mask_y + y, tile->dst_x + x, tile->dst_y + y, width, height, COMPOSITE_PARTIAL, memset(&tmp, 0, sizeof(tmp)))) { for (n = 0; n < tile->rect_count; n++) { const struct sna_composite_rectangles *r = &tile->rects[n]; int x1, x2, dx, y1, y2, dy; x1 = r->dst.x - tile->dst_x, dx = 0; if (x1 < x) dx = x - x1, x1 = x; y1 = r->dst.y - tile->dst_y, dy = 0; if (y1 < y) dy = y - y1, y1 = y; x2 = r->dst.x + r->width - tile->dst_x; if (x2 > x + width) x2 = x + width; y2 = r->dst.y + r->height - tile->dst_y; if (y2 > y + height) y2 = y + height; DBG(("%s: rect[%d] = (%d, %d)x(%d,%d), tile=(%d,%d)x(%d, %d), blt=(%d,%d),(%d,%d), delta=(%d,%d)\n", __FUNCTION__, n, r->dst.x, r->dst.y, r->width, r->height, x, y, width, height, x1, y1, x2, y2, dx, dy)); if (y2 > y1 && x2 > x1) { struct sna_composite_rectangles rr; rr.src.x = dx + r->src.x; rr.src.y = dy + r->src.y; rr.mask.x = dx + r->mask.x; rr.mask.y = dy + r->mask.y; rr.dst.x = dx + r->dst.x; rr.dst.y = dy + r->dst.y; rr.width = x2 - x1; rr.height = y2 - y1; tmp.blt(sna, &tmp, &rr); } } tmp.done(sna, &tmp); } else { unsigned int flags; DBG(("%s -- falback\n", __FUNCTION__)); if (tile->op <= PictOpSrc) flags = MOVE_WRITE; else flags = MOVE_WRITE | MOVE_READ; if (!sna_drawable_move_to_cpu(tile->dst->pDrawable, flags)) goto done; if (tile->dst->alphaMap && !sna_drawable_move_to_cpu(tile->dst->alphaMap->pDrawable, flags)) goto done; if (tile->src->pDrawable && !sna_drawable_move_to_cpu(tile->src->pDrawable, MOVE_READ)) goto done; if (tile->src->alphaMap && !sna_drawable_move_to_cpu(tile->src->alphaMap->pDrawable, MOVE_READ)) goto done; if (tile->mask && tile->mask->pDrawable && !sna_drawable_move_to_cpu(tile->mask->pDrawable, MOVE_READ)) goto done; if (tile->mask && tile->mask->alphaMap && !sna_drawable_move_to_cpu(tile->mask->alphaMap->pDrawable, MOVE_READ)) goto done; if (sigtrap_get() == 0) { fbComposite(tile->op, tile->src, tile->mask, tile->dst, tile->src_x + x, tile->src_y + y, tile->mask_x + x, tile->mask_y + y, tile->dst_x + x, tile->dst_y + y, width, height); sigtrap_put(); } } } } done: if (tile->rects != tile->rects_embedded) free(tile->rects); free(tile); }