static void radeonCheckQuery(struct gl_context *ctx, struct gl_query_object *q)
{
	radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id);

#ifdef DRM_RADEON_GEM_BUSY
	radeonContextPtr radeon = RADEON_CONTEXT(ctx);

	if (radeon->radeonScreen->kernel_mm) {
		struct radeon_query_object *query = (struct radeon_query_object *)q;
		uint32_t domain;

		/* Need to perform a flush, as per ARB_occlusion_query spec */
		if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) {
			ctx->Driver.Flush(ctx);
		}

		if (radeon_bo_is_busy(query->bo, &domain) == 0) {
			radeonQueryGetResult(ctx, q);
			query->Base.Ready = GL_TRUE;
		}
	} else {
		radeonWaitQuery(ctx, q);
	}
#else
	radeonWaitQuery(ctx, q);
#endif
}
Esempio n. 2
0
void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
{
	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
	struct radeon_bo *bo;
	GLuint face = _mesa_tex_target_to_face(target);
	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
	bo = !baseimage->mt ? baseimage->bo : baseimage->mt->bo;

	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
		"%s(%p, target %s, tex %p)\n",
		__func__, ctx, _mesa_lookup_enum_by_nr(target),
		texObj);

	if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s(%p, tex %p) Trying to generate mipmap for texture "
			"in processing by GPU.\n",
			__func__, ctx, texObj);
		radeon_firevertices(rmesa);
	}

	if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
		radeon_teximage_map(baseimage, GL_FALSE);
		radeon_generate_mipmap(ctx, target, texObj);
		radeon_teximage_unmap(baseimage);
	} else {
		_mesa_meta_GenerateMipmap(ctx, target, texObj);
	}
}
Esempio n. 3
0
static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
                                       struct radeon_winsys_cs_handle *_buf)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_bo *bo = (struct radeon_bo*)_buf;

    return radeon_bo_is_referenced_by_cs(cs, bo);
}
Esempio n. 4
0
/**
 * Map texture memory/buffer into user space.
 * Note: the region of interest parameters are ignored here.
 * \param mapOut  returns start of mapping of region of interest
 * \param rowStrideOut  returns row stride in bytes
 */
static void
radeon_map_texture_image(struct gl_context *ctx,
                         struct gl_texture_image *texImage,
                         GLuint slice,
                         GLuint x, GLuint y, GLuint w, GLuint h,
                         GLbitfield mode,
                         GLubyte **map,
                         GLint *stride)
{
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    radeon_texture_image *image = get_radeon_texture_image(texImage);
    radeon_mipmap_tree *mt = image->mt;
    GLuint texel_size = _mesa_get_format_bytes(texImage->TexFormat);
    GLuint width = texImage->Width;
    GLuint height = texImage->Height;
    struct radeon_bo *bo = !image->mt ? image->bo : image->mt->bo;
    unsigned int bw, bh;
    GLboolean write = (mode & GL_MAP_WRITE_BIT) != 0;

    _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
    assert(y % bh == 0);
    y /= bh;
    texel_size /= bw;

    if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
        radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
                     "%s for texture that is "
                     "queued for GPU processing.\n",
                     __func__);
        radeon_firevertices(rmesa);
    }

    if (image->bo) {
        /* TFP case */
        radeon_bo_map(image->bo, write);
        *stride = get_texture_image_row_stride(rmesa, texImage->TexFormat, width, 0, texImage->TexObject->Target);
        *map = bo->ptr;
    } else if (likely(mt)) {
        void *base;
        radeon_mipmap_level *lvl = &image->mt->levels[texImage->Level];

        radeon_bo_map(mt->bo, write);
        base = mt->bo->ptr + lvl->faces[image->base.Base.Face].offset;

        *stride = lvl->rowstride;
        *map = base + (slice * height) * *stride;
    } else {
        /* texture data is in malloc'd memory */

        assert(map);

        *stride = _mesa_format_row_stride(texImage->TexFormat, width);
        *map = image->base.Buffer + (slice * height) * *stride;
    }

    *map += y * *stride + x * texel_size;
}
Esempio n. 5
0
/**
 * All glTexSubImage calls go through this function.
 */
static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level,
		GLint xoffset, GLint yoffset, GLint zoffset,
		GLsizei width, GLsizei height, GLsizei depth,
		GLsizei imageSize,
		GLenum format, GLenum type,
		const GLvoid * pixels,
		const struct gl_pixelstore_attrib *packing,
		struct gl_texture_object *texObj,
		struct gl_texture_image *texImage,
		int compressed)
{
	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
	radeonTexObj* t = radeon_tex_obj(texObj);
	radeon_texture_image* image = get_radeon_texture_image(texImage);

	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
			__func__, dims, texObj, texImage,
			_mesa_tex_target_to_face(target), level);
	{
		struct radeon_bo *bo;
		bo = !image->mt ? image->bo : image->mt->bo;
		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
				"%s Calling texsubimage for texture that is "
				"queued for GPU processing.\n",
				__func__);
			radeon_firevertices(rmesa);
		}
	}


	t->validated = GL_FALSE;
	if (compressed) {
		pixels = _mesa_validate_pbo_compressed_teximage(
			ctx, imageSize, pixels, packing, "glCompressedTexSubImage");
	} else {
		pixels = _mesa_validate_pbo_teximage(ctx, dims,
			width, height, depth, format, type, pixels, packing, "glTexSubImage");
	}

	if (pixels) {
		radeon_store_teximage(ctx, dims,
			xoffset, yoffset, zoffset,
			width, height, depth,
			imageSize, format, type,
			pixels, packing,
			texObj, texImage,
			compressed);
	}

	_mesa_unmap_teximage_pbo(ctx, packing);
}
Esempio n. 6
0
static void radeonWaitQuery(struct gl_context *ctx, struct gl_query_object *q)
{
	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
	struct radeon_query_object *query = (struct radeon_query_object *)q;

	/* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
	if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs))
		ctx->Driver.Flush(ctx);

	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);

	radeonQueryGetResult(ctx, q);

	query->Base.Ready = GL_TRUE;
}
Esempio n. 7
0
/**
 * Replace data in a subrange of buffer object.  If the data range
 * specified by size + offset extends beyond the end of the buffer or
 * if data is NULL, no copy is performed.
 * Called via glBufferSubDataARB().
 */
static void
radeonBufferSubData(struct gl_context * ctx,
                    GLintptrARB offset,
                    GLsizeiptrARB size,
                    const GLvoid * data,
                    struct gl_buffer_object *obj)
{
    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);

    if (radeon_bo_is_referenced_by_cs(radeon_obj->bo, radeon->cmdbuf.cs)) {
        radeon_firevertices(radeon);
    }

    radeon_bo_map(radeon_obj->bo, GL_TRUE);

    memcpy(radeon_obj->bo->ptr + offset, data, size);

    radeon_bo_unmap(radeon_obj->bo);
}
Esempio n. 8
0
static void radeon_bo_set_tiling(struct pb_buffer *_buf,
                                 struct radeon_winsys_cs *rcs,
                                 enum radeon_bo_layout microtiled,
                                 enum radeon_bo_layout macrotiled,
                                 uint32_t pitch)
{
    struct radeon_bo *bo = get_radeon_bo(_buf);
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct drm_radeon_gem_set_tiling args = {};

    /* Tiling determines how DRM treats the buffer data.
     * We must flush CS when changing it if the buffer is referenced. */
    if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
        cs->flush_cs(cs->flush_data, 0);
    }

    while (p_atomic_read(&bo->num_active_ioctls)) {
        sched_yield();
    }

    if (microtiled == RADEON_LAYOUT_TILED)
        args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE;
    else if (microtiled == RADEON_LAYOUT_SQUARETILED)
        args.tiling_flags |= RADEON_BO_FLAGS_MICRO_TILE_SQUARE;

    if (macrotiled == RADEON_LAYOUT_TILED)
        args.tiling_flags |= RADEON_BO_FLAGS_MACRO_TILE;

    args.handle = bo->handle;
    args.pitch = pitch;

    drmCommandWriteRead(bo->rws->fd,
                        DRM_RADEON_GEM_SET_TILING,
                        &args,
                        sizeof(args));
}
/**
 * Validate texture mipmap tree.
 * If individual images are stored in different mipmap trees
 * use the mipmap tree that has the most of the correct data.
 */
int radeon_validate_texture_miptree(struct gl_context * ctx,
				    struct gl_sampler_object *samp,
				    struct gl_texture_object *texObj)
{
	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
	radeonTexObj *t = radeon_tex_obj(texObj);
	radeon_mipmap_tree *dst_miptree;

	if (samp == &texObj->Sampler && (t->validated || t->image_override)) {
		return GL_TRUE;
	}

	calculate_min_max_lod(samp, &t->base, &t->minLod, &t->maxLod);

	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s: Validating texture %p now, minLod = %d, maxLod = %d\n",
			__func__, texObj ,t->minLod, t->maxLod);

	dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base._MaxLevel);

	radeon_miptree_unreference(&t->mt);
	if (!dst_miptree) {
		radeon_try_alloc_miptree(rmesa, t);
		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s: No matching miptree found, allocated new one %p\n",
			__func__, t->mt);

	} else {
		radeon_miptree_reference(dst_miptree, &t->mt);
		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s: Using miptree %p\n", __func__, t->mt);
	}

	const unsigned faces = _mesa_num_tex_faces(texObj->Target);
	unsigned face, level;
	radeon_texture_image *img;
	/* Validate only the levels that will actually be used during rendering */
	for (face = 0; face < faces; ++face) {
		for (level = t->minLod; level <= t->maxLod; ++level) {
			img = get_radeon_texture_image(texObj->Image[face][level]);

			radeon_print(RADEON_TEXTURE, RADEON_TRACE,
				"Checking image level %d, face %d, mt %p ... ",
				level, face, img->mt);
			
			if (img->mt != t->mt && !img->used_as_render_target) {
				radeon_print(RADEON_TEXTURE, RADEON_TRACE,
					"MIGRATING\n");

				struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo;
				if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
					radeon_firevertices(rmesa);
				}
				migrate_image_to_miptree(t->mt, img, face, level);
			} else
				radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n");
		}
	}

	t->validated = GL_TRUE;

	return GL_TRUE;
}
Esempio n. 10
0
/**
 * All glTexImage calls go through this function.
 */
static void radeon_teximage(
	GLcontext *ctx, int dims,
	GLenum target, GLint level,
	GLint internalFormat,
	GLint width, GLint height, GLint depth,
	GLsizei imageSize,
	GLenum format, GLenum type, const GLvoid * pixels,
	const struct gl_pixelstore_attrib *packing,
	struct gl_texture_object *texObj,
	struct gl_texture_image *texImage,
	int compressed)
{
	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
	radeonTexObj* t = radeon_tex_obj(texObj);
	radeon_texture_image* image = get_radeon_texture_image(texImage);
	GLint postConvWidth = width;
	GLint postConvHeight = height;
	GLuint face = _mesa_tex_target_to_face(target);

	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
			__func__, dims, texObj, texImage, face, level);
	{
		struct radeon_bo *bo;
		bo = !image->mt ? image->bo : image->mt->bo;
		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
				"%s Calling teximage for texture that is "
				"queued for GPU processing.\n",
				__func__);
			radeon_firevertices(rmesa);
		}
	}


	t->validated = GL_FALSE;

	if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
	       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
						  &postConvHeight);
	}

	if (!_mesa_is_format_compressed(texImage->TexFormat)) {
		GLuint texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
		/* Minimum pitch of 32 bytes */
		if (postConvWidth * texelBytes < 32) {
			postConvWidth = 32 / texelBytes;
			texImage->RowStride = postConvWidth;
		}
		if (!image->mt) {
			assert(texImage->RowStride == postConvWidth);
		}
	}

	/* Mesa core only clears texImage->Data but not image->mt */
	radeonFreeTexImageData(ctx, texImage);

	if (!t->bo) {
		teximage_assign_miptree(rmesa, texObj, texImage, face, level);
		if (!image->mt) {
			int size = _mesa_format_image_size(texImage->TexFormat,
								texImage->Width,
								texImage->Height,
								texImage->Depth);
			texImage->Data = _mesa_alloc_texmemory(size);
			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
					"%s %dd: texObj %p, texImage %p, "
					" no miptree assigned, using local memory %p\n",
					__func__, dims, texObj, texImage, texImage->Data);
		}
	}

	/* Upload texture image; note that the spec allows pixels to be NULL */
	if (compressed) {
		pixels = _mesa_validate_pbo_compressed_teximage(
			ctx, imageSize, pixels, packing, "glCompressedTexImage");
	} else {
		pixels = _mesa_validate_pbo_teximage(
			ctx, dims, width, height, depth,
			format, type, pixels, packing, "glTexImage");
	}

	if (pixels) {
		radeon_store_teximage(ctx, dims,
			0, 0, 0,
			width, height, depth,
			imageSize, format, type,
			pixels, packing,
			texObj, texImage,
			compressed);
	}

	_mesa_unmap_teximage_pbo(ctx, packing);
}
Esempio n. 11
0
static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                                    unsigned flags, void *flush_ctx)
{
    struct radeon_bo *bo = radeon_bo(_buf);
    struct radeon_drm_cs *cs = flush_ctx;
    struct drm_radeon_gem_mmap args = {};
    void *ptr;

    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
    if (!(flags & PB_USAGE_UNSYNCHRONIZED)) {
        /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
        if (flags & PB_USAGE_DONTBLOCK) {
            if (!(flags & PB_USAGE_CPU_WRITE)) {
                /* Mapping for read.
                 *
                 * Since we are mapping for read, we don't need to wait
                 * if the GPU is using the buffer for read too
                 * (neither one is changing it).
                 *
                 * Only check whether the buffer is being used for write. */
                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
                    return NULL;
                }

                if (radeon_bo_is_busy((struct pb_buffer*)bo,
                                      RADEON_USAGE_WRITE)) {
                    return NULL;
                }
            } else {
                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
                    return NULL;
                }

                if (radeon_bo_is_busy((struct pb_buffer*)bo,
                                      RADEON_USAGE_READWRITE)) {
                    return NULL;
                }
            }
        } else {
            if (!(flags & PB_USAGE_CPU_WRITE)) {
                /* Mapping for read.
                 *
                 * Since we are mapping for read, we don't need to wait
                 * if the GPU is using the buffer for read too
                 * (neither one is changing it).
                 *
                 * Only check whether the buffer is being used for write. */
                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                    cs->flush_cs(cs->flush_data, 0);
                }
                radeon_bo_wait((struct pb_buffer*)bo,
                               RADEON_USAGE_WRITE);
            } else {
                /* Mapping for write. */
                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
                    cs->flush_cs(cs->flush_data, 0);
                } else {
                    /* Try to avoid busy-waiting in radeon_bo_wait. */
                    if (p_atomic_read(&bo->num_active_ioctls))
                        radeon_drm_cs_sync_flush(cs);
                }

                radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
            }
        }
    }

    /* Return the pointer if it's already mapped. */
    if (bo->ptr)
        return bo->ptr;

    /* Map the buffer. */
    pipe_mutex_lock(bo->map_mutex);
    /* Return the pointer if it's already mapped (in case of a race). */
    if (bo->ptr) {
        pipe_mutex_unlock(bo->map_mutex);
        return bo->ptr;
    }
    args.handle = bo->handle;
    args.offset = 0;
    args.size = (uint64_t)bo->size;
    if (drmCommandWriteRead(bo->rws->fd,
                            DRM_RADEON_GEM_MMAP,
                            &args,
                            sizeof(args))) {
        pipe_mutex_unlock(bo->map_mutex);
        fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
                bo, bo->handle);
        return NULL;
    }

    ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
               bo->rws->fd, args.addr_ptr);
    if (ptr == MAP_FAILED) {
        pipe_mutex_unlock(bo->map_mutex);
        fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
        return NULL;
    }
    bo->ptr = ptr;
    pipe_mutex_unlock(bo->map_mutex);

    return bo->ptr;
}
static Bool
RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                           int h, char *dst, int dst_pitch)
{
    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
    struct radeon_exa_pixmap_priv *driver_priv;
    struct radeon_bo *scratch = NULL;
    struct radeon_bo *copy_src;
    unsigned size;
    uint32_t datatype = 0;
    uint32_t src_domain = 0;
    uint32_t src_pitch_offset;
    unsigned bpp = pSrc->drawable.bitsPerPixel;
    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
    uint32_t copy_pitch;
    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
    int ret;
    Bool flush = FALSE;
    Bool r;

    if (bpp < 8)
	return FALSE;

    driver_priv = exaGetPixmapDriverPrivate(pSrc);
    if (!driver_priv || !driver_priv->bo)
	return FALSE;

#if X_BYTE_ORDER == X_BIG_ENDIAN
    switch (bpp) {
    case 32:
	swap = RADEON_HOST_DATA_SWAP_32BIT;
	break;
    case 16:
	swap = RADEON_HOST_DATA_SWAP_16BIT;
	break;
    }
#endif

    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
    copy_src = driver_priv->bo;
    copy_pitch = pSrc->devKind;
    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
		src_domain = 0;
	    else /* A write may be scheduled */
		flush = TRUE;
	}

	if (!src_domain)
	    radeon_bo_is_busy(driver_priv->bo, &src_domain);

	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
	    goto copy;
    }
    size = scratch_pitch * h;
    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
    if (scratch == NULL) {
	goto copy;
    }
    radeon_cs_space_reset_bos(info->cs);
    radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
    ret = radeon_cs_space_check(info->cs);
    if (ret) {
	goto copy;
    }
    RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
    RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
    RADEON_SWITCH_TO_2D();
    RADEONBlitChunk(pScrn, driver_priv->bo, scratch, datatype, src_pitch_offset,
                    scratch_pitch << 16, x, y, 0, 0, w, h,
                    RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT,
                    RADEON_GEM_DOMAIN_GTT);
    copy_src = scratch;
    copy_pitch = scratch_pitch;
    flush = TRUE;

copy:
    if (flush)
	FLUSH_RING();

    ret = radeon_bo_map(copy_src, 0);
    if (ret) {
	ErrorF("failed to map pixmap: %d\n", ret);
        r = FALSE;
        goto out;
    }
    r = TRUE;
    w *= bpp / 8;
    if (copy_src == driver_priv->bo)
	size = y * copy_pitch + x * bpp / 8;
    else
	size = 0;
    while (h--) {
        RADEONCopySwap((uint8_t*)dst, copy_src->ptr + size, w, swap);
        size += copy_pitch;
        dst += dst_pitch;
    }
    radeon_bo_unmap(copy_src);
out:
    if (scratch)
	radeon_bo_unref(scratch);
    return r;
}
static Bool
RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
		       char *src, int src_pitch)
{
    ScreenPtr pScreen = pDst->drawable.pScreen;
    RINFO_FROM_SCREEN(pScreen);
    struct radeon_exa_pixmap_priv *driver_priv;
    struct radeon_bo *scratch = NULL;
    struct radeon_bo *copy_dst;
    unsigned char *dst;
    unsigned size;
    uint32_t datatype = 0;
    uint32_t dst_domain;
    uint32_t dst_pitch_offset;
    unsigned bpp = pDst->drawable.bitsPerPixel;
    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 64);
    uint32_t copy_pitch;
    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
    int ret;
    Bool flush = TRUE;
    Bool r;
    int i;

    if (bpp < 8)
	return FALSE;

    driver_priv = exaGetPixmapDriverPrivate(pDst);
    if (!driver_priv || !driver_priv->bo)
	return FALSE;

#if X_BYTE_ORDER == X_BIG_ENDIAN
    switch (bpp) {
    case 32:
	swap = RADEON_HOST_DATA_SWAP_32BIT;
	break;
    case 16:
	swap = RADEON_HOST_DATA_SWAP_16BIT;
	break;
    }
#endif

    /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
    copy_dst = driver_priv->bo;
    copy_pitch = pDst->devKind;
    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
	    flush = FALSE;
	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain) &&
		!(dst_domain & RADEON_GEM_DOMAIN_VRAM))
		goto copy;
	}
	/* use cpu copy for fast fb access */
	if (info->is_fast_fb)
	    goto copy;
    }

    size = scratch_pitch * h;
    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
    if (scratch == NULL) {
	goto copy;
    }
    radeon_cs_space_reset_bos(info->cs);
    radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
    radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
    ret = radeon_cs_space_check(info->cs);
    if (ret) {
	goto copy;
    }
    copy_dst = scratch;
    copy_pitch = scratch_pitch;
    flush = FALSE;

copy:
    if (flush)
	radeon_cs_flush_indirect(pScrn);

    ret = radeon_bo_map(copy_dst, 0);
    if (ret) {
        r = FALSE;
        goto out;
    }
    r = TRUE;
    size = w * bpp / 8;
    dst = copy_dst->ptr;
    if (copy_dst == driver_priv->bo)
	dst += y * copy_pitch + x * bpp / 8;
    for (i = 0; i < h; i++) {
        RADEONCopySwap(dst + i * copy_pitch, (uint8_t*)src, size, swap);
        src += src_pitch;
    }
    radeon_bo_unmap(copy_dst);

    if (copy_dst == scratch) {
	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
	RADEON_SWITCH_TO_2D();
	RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
			dst_pitch_offset, 0, 0, x, y, w, h,
			RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
    }

out:
    if (scratch)
	radeon_bo_unref(scratch);
    return r;
}