Exemple #1
0
static void
write_uint8_linear(struct thread *t,
                   const struct sfid_render_cache_args *args,
                   __m256i r, __m256i g, __m256i b, __m256i a)
{
    const int slice_y = args->rt.minimum_array_element * args->rt.qpitch;
    const int x = t->grf[1].uw[4];
    const int y = t->grf[1].uw[5] + slice_y;
    __m256i rgba;

    rgba = _mm256_slli_epi32(a, 8);
    rgba = _mm256_or_si256(rgba, b);
    rgba = _mm256_slli_epi32(rgba, 8);
    rgba = _mm256_or_si256(rgba, g);
    rgba = _mm256_slli_epi32(rgba, 8);
    rgba = _mm256_or_si256(rgba, r);

#define SWIZZLE(x, y, z, w) \
	( ((x) << 0) | ((y) << 2) | ((z) << 4) | ((w) << 6) )

    /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7
     * form linear owords of pixels. */
    rgba = _mm256_permute4x64_epi64(rgba, SWIZZLE(0, 2, 1, 3));
    __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3));

    void *base = args->rt.pixels + x * args->rt.cpp + y * args->rt.stride;

    _mm_maskstore_epi32(base,
                        _mm256_extractf128_si256(mask, 0),
                        _mm256_extractf128_si256(rgba, 0));
    _mm_maskstore_epi32(base + args->rt.stride,
                        _mm256_extractf128_si256(mask, 1),
                        _mm256_extractf128_si256(rgba, 1));
}
Exemple #2
0
bool Container<HouseExt>::Load(HouseClass *pThis, IStream *pStm) {
	HouseExt::ExtData* pData = this->LoadKey(pThis, pStm);

	//ULONG out;
	SWIZZLE(pData->Factory_BuildingType);
	SWIZZLE(pData->Factory_InfantryType);
	SWIZZLE(pData->Factory_VehicleType);
	SWIZZLE(pData->Factory_NavyType);
	SWIZZLE(pData->Factory_AircraftType);

	return pData != nullptr;
}
Exemple #3
0
static void
sfid_render_cache_rt_write_simd8_unorm8_ymajor(struct thread *t,
        const struct sfid_render_cache_args *args)
{
    const int slice_y = args->rt.minimum_array_element * args->rt.qpitch;
    const int x = t->grf[1].uw[4];
    const int y = t->grf[1].uw[5] + slice_y;
    const int cpp = 4;
    struct reg *src = &t->grf[args->src];
    const __m256 scale = _mm256_set1_ps(255.0f);
    const __m256 half =  _mm256_set1_ps(0.5f);
    __m256i r, g, b, a;
    __m256i rgba;

    switch (args->rt.format) {
    case SF_R8G8B8A8_UNORM:
        r = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[0].reg, scale), half));
        g = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[1].reg, scale), half));
        b = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[2].reg, scale), half));
        a = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[3].reg, scale), half));
        break;
    case SF_B8G8R8A8_UNORM:
        b = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[0].reg, scale), half));
        g = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[1].reg, scale), half));
        r = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[2].reg, scale), half));
        a = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[3].reg, scale), half));
        break;
    default:
        stub("unorm8 ymajor format");
        return;
    }

    rgba = _mm256_slli_epi32(a, 8);
    rgba = _mm256_or_si256(rgba, b);
    rgba = _mm256_slli_epi32(rgba, 8);
    rgba = _mm256_or_si256(rgba, g);
    rgba = _mm256_slli_epi32(rgba, 8);
    rgba = _mm256_or_si256(rgba, r);

    /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7
     * form linear owords of pixels. */
    rgba = _mm256_permute4x64_epi64(rgba, SWIZZLE(0, 2, 1, 3));
    __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3));

    void *base = ymajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp);

    _mm_maskstore_epi32(base,
                        _mm256_extractf128_si256(mask, 0),
                        _mm256_extractf128_si256(rgba, 0));
    _mm_maskstore_epi32(base + 16,
                        _mm256_extractf128_si256(mask, 1),
                        _mm256_extractf128_si256(rgba, 1));
}
Exemple #4
0
static void
sfid_render_cache_rt_write_simd8_bgra_unorm8_xmajor(struct thread *t,
        const struct sfid_render_cache_args *args)
{
    __m256i argb;
    const float scale = 255.0f;
    struct reg src[4];

    memcpy(src, &t->grf[args->src], sizeof(src));

    const int cpp = 4;
    const int slice_y = args->rt.minimum_array_element * args->rt.qpitch;
    const int x = t->grf[1].uw[4];
    const int y = t->grf[1].uw[5] + slice_y;
    void *base = xmajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp);

    if (gt.blend.enable) {
        /* Load unorm8 */
        __m128i lo = _mm_load_si128(base);
        __m128i hi = _mm_load_si128(base + 512);
        __m256i dst_argb = _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1);
        dst_argb = _mm256_permute4x64_epi64(dst_argb, SWIZZLE(0, 2, 1, 3));

        blend_unorm8_argb(src, dst_argb);
    }

    gamma_correct(args->rt.format, src);

    const __m256i r = to_unorm(src[0].reg, scale);
    const __m256i g = to_unorm(src[1].reg, scale);
    const __m256i b = to_unorm(src[2].reg, scale);
    const __m256i a = to_unorm(src[3].reg, scale);

    argb = _mm256_slli_epi32(a, 8);
    argb = _mm256_or_si256(argb, r);
    argb = _mm256_slli_epi32(argb, 8);
    argb = _mm256_or_si256(argb, g);
    argb = _mm256_slli_epi32(argb, 8);
    argb = _mm256_or_si256(argb, b);

    /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7
     * form linear owords of pixels. */
    argb = _mm256_permute4x64_epi64(argb, SWIZZLE(0, 2, 1, 3));
    __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3));

    _mm_maskstore_epi32(base,
                        _mm256_extractf128_si256(mask, 0),
                        _mm256_extractf128_si256(argb, 0));
    _mm_maskstore_epi32(base + 512,
                        _mm256_extractf128_si256(mask, 1),
                        _mm256_extractf128_si256(argb, 1));
}
Exemple #5
0
static void
sfid_render_cache_rt_write_rep16_bgra_unorm8_xmajor(struct thread *t,
        const struct sfid_render_cache_args *args)
{
    const __m128 scale = _mm_set1_ps(255.0f);
    const __m128 half =  _mm_set1_ps(0.5f);
    struct reg src[1];

    memcpy(src, &t->grf[args->src], sizeof(src));

    if (srgb_format(args->rt.format)) {
        const __m256 inv_gamma = _mm256_set1_ps(1.0f / 2.4f);
        src[0].reg = _ZGVdN8vv_powf(src[0].reg, inv_gamma);
        /* Don't gamma correct alpha */
        src[0].f[3] = t->grf[args->src].f[3];
    }

    __m128 bgra = _mm_shuffle_ps(_mm256_castps256_ps128(src[0].reg),
                                 _mm256_castps256_ps128(src[0].reg),
                                 SWIZZLE(2, 1, 0, 3));

    bgra = _mm_mul_ps(bgra, scale);
    bgra = _mm_add_ps(bgra, half);

    __m128i bgra_i = _mm_cvtps_epi32(bgra);
    bgra_i = _mm_packus_epi32(bgra_i, bgra_i);
    bgra_i = _mm_packus_epi16(bgra_i, bgra_i);

    /* Swizzle two middle mask pairs so that dword 0-3 and 4-7
     * form linear owords of pixels. */
    __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3));

    const int slice_y = args->rt.minimum_array_element * args->rt.qpitch;
    const int x0 = t->grf[1].uw[4];
    const int y0 = t->grf[1].uw[5] + slice_y;
    const int cpp = 4;
    void *base0 = xmajor_offset(args->rt.pixels, x0,  y0, args->rt.stride, cpp);

    _mm_maskstore_epi32(base0, _mm256_extractf128_si256(mask, 0), bgra_i);
    _mm_maskstore_epi32(base0 + 512, _mm256_extractf128_si256(mask, 1), bgra_i);

    const int x1 = t->grf[1].uw[8];
    const int y1 = t->grf[1].uw[9] + slice_y;
    void *base1 = xmajor_offset(args->rt.pixels, x1,  y1, args->rt.stride, 4);

    __m256i mask1 = _mm256_permute4x64_epi64(t->mask_q2, SWIZZLE(0, 2, 1, 3));
    _mm_maskstore_epi32(base1, _mm256_extractf128_si256(mask1, 0), bgra_i);
    _mm_maskstore_epi32(base1 + 512, _mm256_extractf128_si256(mask1, 1), bgra_i);
}
Exemple #6
0
static void
sfid_render_cache_rt_write_simd8_r_uint8_ymajor(struct thread *t,
        const struct sfid_render_cache_args *args)
{
    const int slice_y = args->rt.minimum_array_element * args->rt.qpitch;
    const int x = t->grf[1].uw[4];
    const int y = t->grf[1].uw[5] + slice_y;
    const int cpp = 1;

    void *base = ymajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp);

    struct reg *src = &t->grf[args->src];

    __m256i r32 = _mm256_permute4x64_epi64(src[0].ireg, SWIZZLE(0, 2, 1, 3));

    __m128i lo = _mm256_extractf128_si256(r32, 0);
    __m128i hi = _mm256_extractf128_si256(r32, 1);
    __m128i r16 = _mm_packus_epi32(lo, hi);
    __m128i r8 = _mm_packus_epi16(r16, r16);

    /* FIXME: Needs masking. */
    *(uint32_t *) (base +  0) = _mm_extract_epi32(r8, 0);
    *(uint32_t *) (base + 16) = _mm_extract_epi32(r8, 1);
}
Exemple #7
0
int uwx_search_utable32(
    struct uwx_env *env,
    uint32_t ip,
    uint32_t text_base,
    uint32_t unwind_start,
    uint32_t unwind_end,
    struct uwx_utable_entry *uentry)
{
    int lb;
    int ub;
    int mid;
    int len;
    uint32_t code_start;
    uint32_t code_end;
    uint32_t unwind_info;

    /* Since the unwind table uses segment-relative offsets, convert */
    /* the IP in the current context to a segment-relative offset. */

    ip -= text_base;

    TRACE_T_SEARCH32(ip)

    /* Standard binary search. */
    /* Might modify this to do interpolation in the future. */

    lb = 0;
    ub = (unwind_end - unwind_start) / (3 * WORDSZ);
    mid = 0;
    while (ub > lb) {
	mid = (lb + ub) / 2;
	len = COPYIN_UINFO_4((char *)&code_start,
	    (uintptr_t)(unwind_start+mid*3*WORDSZ));
	len += COPYIN_UINFO_4((char *)&code_end,
	    (uintptr_t)(unwind_start+mid*3*WORDSZ+WORDSZ));
	if (len != 2 * WORDSZ)
	    return UWX_ERR_COPYIN_UTBL;
	if (env->byte_swap) {
	    uwx_swap4(&code_start);
	    uwx_swap4(&code_end);
	}
	TRACE_T_BINSEARCH32(lb, ub, mid, code_start, code_end)
	if (ip >= code_end)
	    lb = mid + 1;
	else if (ip < code_start)
	    ub = mid;
	else
	    break;
    }
    if (ub <= lb)
	return UWX_ERR_NOUENTRY;
    len = COPYIN_UINFO_4((char *)&unwind_info,
	(uintptr_t)(unwind_start+mid*3*WORDSZ+2*WORDSZ));
    if (len != WORDSZ)
	return UWX_ERR_COPYIN_UTBL;
    if (env->byte_swap)
	uwx_swap4(&unwind_info);
    uentry->ptr_size = WORDSZ;
    uentry->code_start = SWIZZLE(text_base + code_start);
    uentry->code_end = SWIZZLE(text_base + code_end);
    uentry->unwind_info = SWIZZLE(text_base + unwind_info);
    return UWX_OK;
}
Exemple #8
0
Fichier : wm.c Projet : krh/ksim
void
dump_surface(const char *filename, uint32_t binding_table_offset, int i)
{
	struct surface s;
	char *linear;
	__m256i alpha;

	get_surface(binding_table_offset, i, &s);

	int png_format;
	switch (s.format) {
	case SF_R8G8B8X8_UNORM:
	case SF_R8G8B8A8_UNORM:
	case SF_R8G8B8X8_UNORM_SRGB:
	case SF_R8G8B8A8_UNORM_SRGB:
		png_format = PNG_FORMAT_RGBA;
		break;
	case SF_B8G8R8A8_UNORM:
	case SF_B8G8R8X8_UNORM:
	case SF_B8G8R8A8_UNORM_SRGB:
	case SF_B8G8R8X8_UNORM_SRGB:
		png_format = PNG_FORMAT_BGRA;
		break;
	default:
		stub("image format");
		return;
	}

	switch (s.format) {
	case SF_R8G8B8X8_UNORM:
	case SF_B8G8R8X8_UNORM:
	case SF_R8G8B8X8_UNORM_SRGB:
	case SF_B8G8R8X8_UNORM_SRGB:
		alpha = _mm256_set1_epi32(0xff000000);
		break;
	default:
		alpha = _mm256_set1_epi32(0);
		break;
	}

	switch (s.tile_mode) {
	case LINEAR:
		linear = s.pixels;
		break;
	case XMAJOR:
		linear = detile_xmajor(&s, alpha);
		break;
	case YMAJOR:
		linear = detile_ymajor(&s, alpha);
		break;
	default:
		linear = s.pixels;
		stub("detile wmajor");
		break;
	}

	FILE *f = fopen(filename, "wb");
	ksim_assert(f != NULL);

	png_image pi = {
		.version = PNG_IMAGE_VERSION,
		.width = s.width,
		.height = s.height,
		.format = png_format
	};

	ksim_assert(png_image_write_to_stdio(&pi, f, 0, linear, s.stride, NULL));

	fclose(f);

	if (linear != s.pixels)
		free(linear);
}

static void
depth_test(struct primitive *p, struct dispatch *d)
{
	uint32_t cpp = depth_format_size(gt.depth.format);

	struct reg w_unorm;
	struct reg d24x8, cmp, d_f;

	void *base = ymajor_offset(p->depth.buffer, d->x, d->y, gt.depth.stride, cpp);

	if (gt.depth.test_enable) {
		const __m256 inv_scale = _mm256_set1_ps(1.0f / 16777215.0f);
		switch (gt.depth.format) {
		case D32_FLOAT:
			d_f.reg = _mm256_load_ps(base);
			break;
		case D24_UNORM_X8_UINT:
			d24x8.ireg = _mm256_load_si256(base);
			d_f.reg = _mm256_mul_ps(_mm256_cvtepi32_ps(d24x8.ireg),
						inv_scale);
			break;
		case D16_UNORM:
			stub("D16_UNORM");
		default:
			ksim_unreachable("invalid depth format");
		}

		/* Swizzle two middle pixel pairs so that dword 0-3 and 4-7
		 * match the shader dispatch subspan orderingg. */
		d_f.ireg = _mm256_permute4x64_epi64(d_f.ireg, SWIZZLE(0, 2, 1, 3));

		switch (gt.depth.test_function) {
		case COMPAREFUNCTION_ALWAYS:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_TRUE_US);
			break;
		case COMPAREFUNCTION_NEVER:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_FALSE_OS);
			break;
		case COMPAREFUNCTION_LESS:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_LT_OS);
			break;
		case COMPAREFUNCTION_EQUAL:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_EQ_OS);
			break;
		case COMPAREFUNCTION_LEQUAL:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_LE_OS);
			break;
		case COMPAREFUNCTION_GREATER:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_GT_OS);
			break;
		case COMPAREFUNCTION_NOTEQUAL:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_NEQ_OS);
			break;
		case COMPAREFUNCTION_GEQUAL:
			cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_GE_OS);
			break;
		}
		d->mask.ireg = _mm256_and_si256(cmp.ireg, d->mask.ireg);
	}

	if (gt.depth.write_enable) {
		const __m256 scale = _mm256_set1_ps(16777215.0f);
		const __m256 half =  _mm256_set1_ps(0.5f);

		struct reg w;
		w.ireg = _mm256_permute4x64_epi64(d->w.ireg, SWIZZLE(0, 2, 1, 3));
		__m256i m = _mm256_permute4x64_epi64(d->mask.ireg,
						     SWIZZLE(0, 2, 1, 3));

		switch (gt.depth.format) {
		case D32_FLOAT:
			_mm256_maskstore_ps(base, m, w.reg);
			break;
		case D24_UNORM_X8_UINT:
			w_unorm.ireg = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(w.reg, scale), half));
			_mm256_maskstore_epi32(base, m, w_unorm.ireg);
			break;
		case D16_UNORM:
			stub("D16_UNORM");
		default:
			ksim_unreachable("invalid depth format");
		}

	}
}
Exemple #9
0
bool GLDriver::checkActiveTextures()
{
   std::vector<uint8_t> untiledImage, untiledMipmap;
   gx2::GX2Surface surface;

   for (auto i = 0; i < latte::MaxTextures; ++i) {
      auto resourceOffset = (latte::SQ_PS_TEX_RESOURCE_0 + i) * 7;
      auto sq_tex_resource_word0 = getRegister<latte::SQ_TEX_RESOURCE_WORD0_N>(latte::Register::SQ_TEX_RESOURCE_WORD0_0 + 4 * resourceOffset);
      auto sq_tex_resource_word1 = getRegister<latte::SQ_TEX_RESOURCE_WORD1_N>(latte::Register::SQ_TEX_RESOURCE_WORD1_0 + 4 * resourceOffset);
      auto sq_tex_resource_word2 = getRegister<latte::SQ_TEX_RESOURCE_WORD2_N>(latte::Register::SQ_TEX_RESOURCE_WORD2_0 + 4 * resourceOffset);
      auto sq_tex_resource_word3 = getRegister<latte::SQ_TEX_RESOURCE_WORD3_N>(latte::Register::SQ_TEX_RESOURCE_WORD3_0 + 4 * resourceOffset);
      auto sq_tex_resource_word4 = getRegister<latte::SQ_TEX_RESOURCE_WORD4_N>(latte::Register::SQ_TEX_RESOURCE_WORD4_0 + 4 * resourceOffset);
      auto sq_tex_resource_word5 = getRegister<latte::SQ_TEX_RESOURCE_WORD5_N>(latte::Register::SQ_TEX_RESOURCE_WORD5_0 + 4 * resourceOffset);
      auto sq_tex_resource_word6 = getRegister<latte::SQ_TEX_RESOURCE_WORD6_N>(latte::Register::SQ_TEX_RESOURCE_WORD6_0 + 4 * resourceOffset);
      auto baseAddress = sq_tex_resource_word2.BASE_ADDRESS() << 8;

      if (!baseAddress) {
         continue;
      }

      if (baseAddress == mPixelTextureCache[i].baseAddress
       && sq_tex_resource_word0.value == mPixelTextureCache[i].word0
       && sq_tex_resource_word1.value == mPixelTextureCache[i].word1
       && sq_tex_resource_word2.value == mPixelTextureCache[i].word2
       && sq_tex_resource_word3.value == mPixelTextureCache[i].word3
       && sq_tex_resource_word4.value == mPixelTextureCache[i].word4
       && sq_tex_resource_word5.value == mPixelTextureCache[i].word5
       && sq_tex_resource_word6.value == mPixelTextureCache[i].word6) {
         continue;  // No change in sampler state
      }

      mPixelTextureCache[i].baseAddress = baseAddress;
      mPixelTextureCache[i].word0 = sq_tex_resource_word0.value;
      mPixelTextureCache[i].word1 = sq_tex_resource_word1.value;
      mPixelTextureCache[i].word2 = sq_tex_resource_word2.value;
      mPixelTextureCache[i].word3 = sq_tex_resource_word3.value;
      mPixelTextureCache[i].word4 = sq_tex_resource_word4.value;
      mPixelTextureCache[i].word5 = sq_tex_resource_word5.value;
      mPixelTextureCache[i].word6 = sq_tex_resource_word6.value;

      // Decode resource registers
      auto pitch = (sq_tex_resource_word0.PITCH() + 1) * 8;
      auto width = sq_tex_resource_word0.TEX_WIDTH() + 1;
      auto height = sq_tex_resource_word1.TEX_HEIGHT() + 1;
      auto depth = sq_tex_resource_word1.TEX_DEPTH() + 1;

      auto format = sq_tex_resource_word1.DATA_FORMAT();
      auto tileMode = sq_tex_resource_word0.TILE_MODE();
      auto numFormat = sq_tex_resource_word4.NUM_FORMAT_ALL();
      auto formatComp = sq_tex_resource_word4.FORMAT_COMP_X();
      auto degamma = sq_tex_resource_word4.FORCE_DEGAMMA();
      auto dim = sq_tex_resource_word0.DIM();

      auto buffer = getSurfaceBuffer(baseAddress, width, height, depth, dim, format, numFormat, formatComp, degamma, sq_tex_resource_word0.TILE_TYPE());

      if (buffer->dirtyAsTexture) {
         auto swizzle = sq_tex_resource_word2.SWIZZLE() << 8;

         // Rebuild a GX2Surface
         std::memset(&surface, 0, sizeof(gx2::GX2Surface));

         surface.dim = static_cast<gx2::GX2SurfaceDim>(dim);
         surface.width = width;
         surface.height = height;

         if (surface.dim == gx2::GX2SurfaceDim::TextureCube) {
            surface.depth = depth * 6;
         } else if (surface.dim == gx2::GX2SurfaceDim::Texture3D ||
            surface.dim == gx2::GX2SurfaceDim::Texture2DMSAAArray ||
            surface.dim == gx2::GX2SurfaceDim::Texture2DArray ||
            surface.dim == gx2::GX2SurfaceDim::Texture1DArray) {
            surface.depth = depth;
         } else {
            surface.depth = 1;
         }

         surface.mipLevels = 1;
         surface.format = getSurfaceFormat(format, numFormat, formatComp, degamma);

         surface.aa = gx2::GX2AAMode::Mode1X;
         surface.use = gx2::GX2SurfaceUse::Texture;

         if (sq_tex_resource_word0.TILE_TYPE()) {
            surface.use |= gx2::GX2SurfaceUse::DepthBuffer;
         }

         surface.tileMode = static_cast<gx2::GX2TileMode>(tileMode);
         surface.swizzle = swizzle;

         // Update the sizing information for the surface
         GX2CalcSurfaceSizeAndAlignment(&surface);

         // Align address
         baseAddress &= ~(surface.alignment - 1);

         surface.image = make_virtual_ptr<uint8_t>(baseAddress);
         surface.mipmaps = nullptr;

         // Calculate a new memory CRC
         uint64_t newHash[2] = { 0 };
         MurmurHash3_x64_128(surface.image, surface.imageSize, 0, newHash);

         // If the CPU memory has changed, we should re-upload this.  This hashing is
         //  also means that if the application temporarily uses one of its buffers as
         //  a color buffer, we are able to accurately handle this.  Providing they are
         //  not updating the memory at the same time.
         if (newHash[0] != buffer->cpuMemHash[0] || newHash[1] != buffer->cpuMemHash[1]) {
            buffer->cpuMemHash[0] = newHash[0];
            buffer->cpuMemHash[1] = newHash[1];

            // Untile
            gx2::internal::convertTiling(&surface, untiledImage, untiledMipmap);

            // Create texture
            auto compressed = isCompressedFormat(format);
            auto target = getTextureTarget(dim);
            auto textureDataType = gl::GL_INVALID_ENUM;
            auto textureFormat = getTextureFormat(format);
            auto size = untiledImage.size();

            if (compressed) {
               textureDataType = getCompressedTextureDataType(format, degamma);
            } else {
               textureDataType = getTextureDataType(format, formatComp);
            }

            if (textureDataType == gl::GL_INVALID_ENUM || textureFormat == gl::GL_INVALID_ENUM) {
               decaf_abort(fmt::format("Texture with unsupported format {}", surface.format.value()));
            }

            switch (dim) {
            case latte::SQ_TEX_DIM_1D:
               if (compressed) {
                  gl::glCompressedTextureSubImage1D(buffer->object,
                                                    0, /* level */
                                                    0, /* xoffset */
                                                    width,
                                                    textureDataType,
                                                    gsl::narrow_cast<gl::GLsizei>(size),
                                                    untiledImage.data());
               } else {
                  gl::glTextureSubImage1D(buffer->object,
                                          0, /* level */
                                          0, /* xoffset */
                                          width,
                                          textureFormat,
                                          textureDataType,
                                          untiledImage.data());
               }
               break;
            case latte::SQ_TEX_DIM_2D:
               if (compressed) {
                  gl::glCompressedTextureSubImage2D(buffer->object,
                                                    0, /* level */
                                                    0, 0, /* xoffset, yoffset */
                                                    width,
                                                    height,
                                                    textureDataType,
                                                    gsl::narrow_cast<gl::GLsizei>(size),
                                                    untiledImage.data());
               } else {
                  gl::glTextureSubImage2D(buffer->object,
                                          0, /* level */
                                          0, 0, /* xoffset, yoffset */
                                          width, height,
                                          textureFormat,
                                          textureDataType,
                                          untiledImage.data());
               }
               break;
            case latte::SQ_TEX_DIM_3D:
               if (compressed) {
                  gl::glCompressedTextureSubImage3D(buffer->object,
                                                    0, /* level */
                                                    0, 0, 0, /* xoffset, yoffset, zoffset */
                                                    width, height, depth,
                                                    textureDataType,
                                                    gsl::narrow_cast<gl::GLsizei>(size),
                                                    untiledImage.data());
               } else {
                  gl::glTextureSubImage3D(buffer->object,
                                          0, /* level */
                                          0, 0, 0, /* xoffset, yoffset, zoffset */
                                          width, height, depth,
                                          textureFormat,
                                          textureDataType,
                                          untiledImage.data());
               }
               break;
            case latte::SQ_TEX_DIM_CUBEMAP:
               decaf_check(surface.depth == 6);
            case latte::SQ_TEX_DIM_2D_ARRAY:
               if (compressed) {
                  gl::glCompressedTextureSubImage3D(buffer->object,
                                                    0, /* level */
                                                    0, 0, 0, /* xoffset, yoffset, zoffset */
                                                    width, height, surface.depth,
                                                    textureDataType,
                                                    gsl::narrow_cast<gl::GLsizei>(size),
                                                    untiledImage.data());
               } else {
                  gl::glTextureSubImage3D(buffer->object,
                                          0, /* level */
                                          0, 0, 0, /* xoffset, yoffset, zoffset */
                                          width, height, surface.depth,
                                          textureFormat,
                                          textureDataType,
                                          untiledImage.data());
               }
               break;
            default:
               decaf_abort(fmt::format("Unsupported texture dim: {}", sq_tex_resource_word0.DIM()));
            }
         }

         buffer->dirtyAsTexture = false;
         buffer->state = SurfaceUseState::CpuWritten;
      }

      // Setup texture swizzle
      auto dst_sel_x = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_X());
      auto dst_sel_y = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_Y());
      auto dst_sel_z = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_Z());
      auto dst_sel_w = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_W());

      gl::GLint textureSwizzle[] = {
         static_cast<gl::GLint>(dst_sel_x),
         static_cast<gl::GLint>(dst_sel_y),
         static_cast<gl::GLint>(dst_sel_z),
         static_cast<gl::GLint>(dst_sel_w),
      };

      gl::glTextureParameteriv(buffer->object, gl::GL_TEXTURE_SWIZZLE_RGBA, textureSwizzle);
      gl::glBindTextureUnit(i, buffer->object);
   }

   return true;
}