static void write_uint8_linear(struct thread *t, const struct sfid_render_cache_args *args, __m256i r, __m256i g, __m256i b, __m256i a) { const int slice_y = args->rt.minimum_array_element * args->rt.qpitch; const int x = t->grf[1].uw[4]; const int y = t->grf[1].uw[5] + slice_y; __m256i rgba; rgba = _mm256_slli_epi32(a, 8); rgba = _mm256_or_si256(rgba, b); rgba = _mm256_slli_epi32(rgba, 8); rgba = _mm256_or_si256(rgba, g); rgba = _mm256_slli_epi32(rgba, 8); rgba = _mm256_or_si256(rgba, r); #define SWIZZLE(x, y, z, w) \ ( ((x) << 0) | ((y) << 2) | ((z) << 4) | ((w) << 6) ) /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7 * form linear owords of pixels. */ rgba = _mm256_permute4x64_epi64(rgba, SWIZZLE(0, 2, 1, 3)); __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3)); void *base = args->rt.pixels + x * args->rt.cpp + y * args->rt.stride; _mm_maskstore_epi32(base, _mm256_extractf128_si256(mask, 0), _mm256_extractf128_si256(rgba, 0)); _mm_maskstore_epi32(base + args->rt.stride, _mm256_extractf128_si256(mask, 1), _mm256_extractf128_si256(rgba, 1)); }
bool Container<HouseExt>::Load(HouseClass *pThis, IStream *pStm) { HouseExt::ExtData* pData = this->LoadKey(pThis, pStm); //ULONG out; SWIZZLE(pData->Factory_BuildingType); SWIZZLE(pData->Factory_InfantryType); SWIZZLE(pData->Factory_VehicleType); SWIZZLE(pData->Factory_NavyType); SWIZZLE(pData->Factory_AircraftType); return pData != nullptr; }
static void sfid_render_cache_rt_write_simd8_unorm8_ymajor(struct thread *t, const struct sfid_render_cache_args *args) { const int slice_y = args->rt.minimum_array_element * args->rt.qpitch; const int x = t->grf[1].uw[4]; const int y = t->grf[1].uw[5] + slice_y; const int cpp = 4; struct reg *src = &t->grf[args->src]; const __m256 scale = _mm256_set1_ps(255.0f); const __m256 half = _mm256_set1_ps(0.5f); __m256i r, g, b, a; __m256i rgba; switch (args->rt.format) { case SF_R8G8B8A8_UNORM: r = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[0].reg, scale), half)); g = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[1].reg, scale), half)); b = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[2].reg, scale), half)); a = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[3].reg, scale), half)); break; case SF_B8G8R8A8_UNORM: b = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[0].reg, scale), half)); g = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[1].reg, scale), half)); r = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[2].reg, scale), half)); a = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(src[3].reg, scale), half)); break; default: stub("unorm8 ymajor format"); return; } rgba = _mm256_slli_epi32(a, 8); rgba = _mm256_or_si256(rgba, b); rgba = _mm256_slli_epi32(rgba, 8); rgba = _mm256_or_si256(rgba, g); rgba = _mm256_slli_epi32(rgba, 8); rgba = _mm256_or_si256(rgba, r); /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7 * form linear owords of pixels. */ rgba = _mm256_permute4x64_epi64(rgba, SWIZZLE(0, 2, 1, 3)); __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3)); void *base = ymajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp); _mm_maskstore_epi32(base, _mm256_extractf128_si256(mask, 0), _mm256_extractf128_si256(rgba, 0)); _mm_maskstore_epi32(base + 16, _mm256_extractf128_si256(mask, 1), _mm256_extractf128_si256(rgba, 1)); }
static void sfid_render_cache_rt_write_simd8_bgra_unorm8_xmajor(struct thread *t, const struct sfid_render_cache_args *args) { __m256i argb; const float scale = 255.0f; struct reg src[4]; memcpy(src, &t->grf[args->src], sizeof(src)); const int cpp = 4; const int slice_y = args->rt.minimum_array_element * args->rt.qpitch; const int x = t->grf[1].uw[4]; const int y = t->grf[1].uw[5] + slice_y; void *base = xmajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp); if (gt.blend.enable) { /* Load unorm8 */ __m128i lo = _mm_load_si128(base); __m128i hi = _mm_load_si128(base + 512); __m256i dst_argb = _mm256_inserti128_si256(_mm256_castsi128_si256(lo), hi, 1); dst_argb = _mm256_permute4x64_epi64(dst_argb, SWIZZLE(0, 2, 1, 3)); blend_unorm8_argb(src, dst_argb); } gamma_correct(args->rt.format, src); const __m256i r = to_unorm(src[0].reg, scale); const __m256i g = to_unorm(src[1].reg, scale); const __m256i b = to_unorm(src[2].reg, scale); const __m256i a = to_unorm(src[3].reg, scale); argb = _mm256_slli_epi32(a, 8); argb = _mm256_or_si256(argb, r); argb = _mm256_slli_epi32(argb, 8); argb = _mm256_or_si256(argb, g); argb = _mm256_slli_epi32(argb, 8); argb = _mm256_or_si256(argb, b); /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7 * form linear owords of pixels. */ argb = _mm256_permute4x64_epi64(argb, SWIZZLE(0, 2, 1, 3)); __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3)); _mm_maskstore_epi32(base, _mm256_extractf128_si256(mask, 0), _mm256_extractf128_si256(argb, 0)); _mm_maskstore_epi32(base + 512, _mm256_extractf128_si256(mask, 1), _mm256_extractf128_si256(argb, 1)); }
static void sfid_render_cache_rt_write_rep16_bgra_unorm8_xmajor(struct thread *t, const struct sfid_render_cache_args *args) { const __m128 scale = _mm_set1_ps(255.0f); const __m128 half = _mm_set1_ps(0.5f); struct reg src[1]; memcpy(src, &t->grf[args->src], sizeof(src)); if (srgb_format(args->rt.format)) { const __m256 inv_gamma = _mm256_set1_ps(1.0f / 2.4f); src[0].reg = _ZGVdN8vv_powf(src[0].reg, inv_gamma); /* Don't gamma correct alpha */ src[0].f[3] = t->grf[args->src].f[3]; } __m128 bgra = _mm_shuffle_ps(_mm256_castps256_ps128(src[0].reg), _mm256_castps256_ps128(src[0].reg), SWIZZLE(2, 1, 0, 3)); bgra = _mm_mul_ps(bgra, scale); bgra = _mm_add_ps(bgra, half); __m128i bgra_i = _mm_cvtps_epi32(bgra); bgra_i = _mm_packus_epi32(bgra_i, bgra_i); bgra_i = _mm_packus_epi16(bgra_i, bgra_i); /* Swizzle two middle mask pairs so that dword 0-3 and 4-7 * form linear owords of pixels. */ __m256i mask = _mm256_permute4x64_epi64(t->mask_q1, SWIZZLE(0, 2, 1, 3)); const int slice_y = args->rt.minimum_array_element * args->rt.qpitch; const int x0 = t->grf[1].uw[4]; const int y0 = t->grf[1].uw[5] + slice_y; const int cpp = 4; void *base0 = xmajor_offset(args->rt.pixels, x0, y0, args->rt.stride, cpp); _mm_maskstore_epi32(base0, _mm256_extractf128_si256(mask, 0), bgra_i); _mm_maskstore_epi32(base0 + 512, _mm256_extractf128_si256(mask, 1), bgra_i); const int x1 = t->grf[1].uw[8]; const int y1 = t->grf[1].uw[9] + slice_y; void *base1 = xmajor_offset(args->rt.pixels, x1, y1, args->rt.stride, 4); __m256i mask1 = _mm256_permute4x64_epi64(t->mask_q2, SWIZZLE(0, 2, 1, 3)); _mm_maskstore_epi32(base1, _mm256_extractf128_si256(mask1, 0), bgra_i); _mm_maskstore_epi32(base1 + 512, _mm256_extractf128_si256(mask1, 1), bgra_i); }
static void sfid_render_cache_rt_write_simd8_r_uint8_ymajor(struct thread *t, const struct sfid_render_cache_args *args) { const int slice_y = args->rt.minimum_array_element * args->rt.qpitch; const int x = t->grf[1].uw[4]; const int y = t->grf[1].uw[5] + slice_y; const int cpp = 1; void *base = ymajor_offset(args->rt.pixels, x, y, args->rt.stride, cpp); struct reg *src = &t->grf[args->src]; __m256i r32 = _mm256_permute4x64_epi64(src[0].ireg, SWIZZLE(0, 2, 1, 3)); __m128i lo = _mm256_extractf128_si256(r32, 0); __m128i hi = _mm256_extractf128_si256(r32, 1); __m128i r16 = _mm_packus_epi32(lo, hi); __m128i r8 = _mm_packus_epi16(r16, r16); /* FIXME: Needs masking. */ *(uint32_t *) (base + 0) = _mm_extract_epi32(r8, 0); *(uint32_t *) (base + 16) = _mm_extract_epi32(r8, 1); }
int uwx_search_utable32( struct uwx_env *env, uint32_t ip, uint32_t text_base, uint32_t unwind_start, uint32_t unwind_end, struct uwx_utable_entry *uentry) { int lb; int ub; int mid; int len; uint32_t code_start; uint32_t code_end; uint32_t unwind_info; /* Since the unwind table uses segment-relative offsets, convert */ /* the IP in the current context to a segment-relative offset. */ ip -= text_base; TRACE_T_SEARCH32(ip) /* Standard binary search. */ /* Might modify this to do interpolation in the future. */ lb = 0; ub = (unwind_end - unwind_start) / (3 * WORDSZ); mid = 0; while (ub > lb) { mid = (lb + ub) / 2; len = COPYIN_UINFO_4((char *)&code_start, (uintptr_t)(unwind_start+mid*3*WORDSZ)); len += COPYIN_UINFO_4((char *)&code_end, (uintptr_t)(unwind_start+mid*3*WORDSZ+WORDSZ)); if (len != 2 * WORDSZ) return UWX_ERR_COPYIN_UTBL; if (env->byte_swap) { uwx_swap4(&code_start); uwx_swap4(&code_end); } TRACE_T_BINSEARCH32(lb, ub, mid, code_start, code_end) if (ip >= code_end) lb = mid + 1; else if (ip < code_start) ub = mid; else break; } if (ub <= lb) return UWX_ERR_NOUENTRY; len = COPYIN_UINFO_4((char *)&unwind_info, (uintptr_t)(unwind_start+mid*3*WORDSZ+2*WORDSZ)); if (len != WORDSZ) return UWX_ERR_COPYIN_UTBL; if (env->byte_swap) uwx_swap4(&unwind_info); uentry->ptr_size = WORDSZ; uentry->code_start = SWIZZLE(text_base + code_start); uentry->code_end = SWIZZLE(text_base + code_end); uentry->unwind_info = SWIZZLE(text_base + unwind_info); return UWX_OK; }
void dump_surface(const char *filename, uint32_t binding_table_offset, int i) { struct surface s; char *linear; __m256i alpha; get_surface(binding_table_offset, i, &s); int png_format; switch (s.format) { case SF_R8G8B8X8_UNORM: case SF_R8G8B8A8_UNORM: case SF_R8G8B8X8_UNORM_SRGB: case SF_R8G8B8A8_UNORM_SRGB: png_format = PNG_FORMAT_RGBA; break; case SF_B8G8R8A8_UNORM: case SF_B8G8R8X8_UNORM: case SF_B8G8R8A8_UNORM_SRGB: case SF_B8G8R8X8_UNORM_SRGB: png_format = PNG_FORMAT_BGRA; break; default: stub("image format"); return; } switch (s.format) { case SF_R8G8B8X8_UNORM: case SF_B8G8R8X8_UNORM: case SF_R8G8B8X8_UNORM_SRGB: case SF_B8G8R8X8_UNORM_SRGB: alpha = _mm256_set1_epi32(0xff000000); break; default: alpha = _mm256_set1_epi32(0); break; } switch (s.tile_mode) { case LINEAR: linear = s.pixels; break; case XMAJOR: linear = detile_xmajor(&s, alpha); break; case YMAJOR: linear = detile_ymajor(&s, alpha); break; default: linear = s.pixels; stub("detile wmajor"); break; } FILE *f = fopen(filename, "wb"); ksim_assert(f != NULL); png_image pi = { .version = PNG_IMAGE_VERSION, .width = s.width, .height = s.height, .format = png_format }; ksim_assert(png_image_write_to_stdio(&pi, f, 0, linear, s.stride, NULL)); fclose(f); if (linear != s.pixels) free(linear); } static void depth_test(struct primitive *p, struct dispatch *d) { uint32_t cpp = depth_format_size(gt.depth.format); struct reg w_unorm; struct reg d24x8, cmp, d_f; void *base = ymajor_offset(p->depth.buffer, d->x, d->y, gt.depth.stride, cpp); if (gt.depth.test_enable) { const __m256 inv_scale = _mm256_set1_ps(1.0f / 16777215.0f); switch (gt.depth.format) { case D32_FLOAT: d_f.reg = _mm256_load_ps(base); break; case D24_UNORM_X8_UINT: d24x8.ireg = _mm256_load_si256(base); d_f.reg = _mm256_mul_ps(_mm256_cvtepi32_ps(d24x8.ireg), inv_scale); break; case D16_UNORM: stub("D16_UNORM"); default: ksim_unreachable("invalid depth format"); } /* Swizzle two middle pixel pairs so that dword 0-3 and 4-7 * match the shader dispatch subspan orderingg. */ d_f.ireg = _mm256_permute4x64_epi64(d_f.ireg, SWIZZLE(0, 2, 1, 3)); switch (gt.depth.test_function) { case COMPAREFUNCTION_ALWAYS: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_TRUE_US); break; case COMPAREFUNCTION_NEVER: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_FALSE_OS); break; case COMPAREFUNCTION_LESS: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_LT_OS); break; case COMPAREFUNCTION_EQUAL: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_EQ_OS); break; case COMPAREFUNCTION_LEQUAL: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_LE_OS); break; case COMPAREFUNCTION_GREATER: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_GT_OS); break; case COMPAREFUNCTION_NOTEQUAL: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_NEQ_OS); break; case COMPAREFUNCTION_GEQUAL: cmp.reg = _mm256_cmp_ps(d_f.reg, d->w.reg, _CMP_GE_OS); break; } d->mask.ireg = _mm256_and_si256(cmp.ireg, d->mask.ireg); } if (gt.depth.write_enable) { const __m256 scale = _mm256_set1_ps(16777215.0f); const __m256 half = _mm256_set1_ps(0.5f); struct reg w; w.ireg = _mm256_permute4x64_epi64(d->w.ireg, SWIZZLE(0, 2, 1, 3)); __m256i m = _mm256_permute4x64_epi64(d->mask.ireg, SWIZZLE(0, 2, 1, 3)); switch (gt.depth.format) { case D32_FLOAT: _mm256_maskstore_ps(base, m, w.reg); break; case D24_UNORM_X8_UINT: w_unorm.ireg = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(w.reg, scale), half)); _mm256_maskstore_epi32(base, m, w_unorm.ireg); break; case D16_UNORM: stub("D16_UNORM"); default: ksim_unreachable("invalid depth format"); } } }
bool GLDriver::checkActiveTextures() { std::vector<uint8_t> untiledImage, untiledMipmap; gx2::GX2Surface surface; for (auto i = 0; i < latte::MaxTextures; ++i) { auto resourceOffset = (latte::SQ_PS_TEX_RESOURCE_0 + i) * 7; auto sq_tex_resource_word0 = getRegister<latte::SQ_TEX_RESOURCE_WORD0_N>(latte::Register::SQ_TEX_RESOURCE_WORD0_0 + 4 * resourceOffset); auto sq_tex_resource_word1 = getRegister<latte::SQ_TEX_RESOURCE_WORD1_N>(latte::Register::SQ_TEX_RESOURCE_WORD1_0 + 4 * resourceOffset); auto sq_tex_resource_word2 = getRegister<latte::SQ_TEX_RESOURCE_WORD2_N>(latte::Register::SQ_TEX_RESOURCE_WORD2_0 + 4 * resourceOffset); auto sq_tex_resource_word3 = getRegister<latte::SQ_TEX_RESOURCE_WORD3_N>(latte::Register::SQ_TEX_RESOURCE_WORD3_0 + 4 * resourceOffset); auto sq_tex_resource_word4 = getRegister<latte::SQ_TEX_RESOURCE_WORD4_N>(latte::Register::SQ_TEX_RESOURCE_WORD4_0 + 4 * resourceOffset); auto sq_tex_resource_word5 = getRegister<latte::SQ_TEX_RESOURCE_WORD5_N>(latte::Register::SQ_TEX_RESOURCE_WORD5_0 + 4 * resourceOffset); auto sq_tex_resource_word6 = getRegister<latte::SQ_TEX_RESOURCE_WORD6_N>(latte::Register::SQ_TEX_RESOURCE_WORD6_0 + 4 * resourceOffset); auto baseAddress = sq_tex_resource_word2.BASE_ADDRESS() << 8; if (!baseAddress) { continue; } if (baseAddress == mPixelTextureCache[i].baseAddress && sq_tex_resource_word0.value == mPixelTextureCache[i].word0 && sq_tex_resource_word1.value == mPixelTextureCache[i].word1 && sq_tex_resource_word2.value == mPixelTextureCache[i].word2 && sq_tex_resource_word3.value == mPixelTextureCache[i].word3 && sq_tex_resource_word4.value == mPixelTextureCache[i].word4 && sq_tex_resource_word5.value == mPixelTextureCache[i].word5 && sq_tex_resource_word6.value == mPixelTextureCache[i].word6) { continue; // No change in sampler state } mPixelTextureCache[i].baseAddress = baseAddress; mPixelTextureCache[i].word0 = sq_tex_resource_word0.value; mPixelTextureCache[i].word1 = sq_tex_resource_word1.value; mPixelTextureCache[i].word2 = sq_tex_resource_word2.value; mPixelTextureCache[i].word3 = sq_tex_resource_word3.value; mPixelTextureCache[i].word4 = sq_tex_resource_word4.value; mPixelTextureCache[i].word5 = sq_tex_resource_word5.value; mPixelTextureCache[i].word6 = sq_tex_resource_word6.value; // Decode resource registers auto pitch = (sq_tex_resource_word0.PITCH() + 1) * 8; auto width = sq_tex_resource_word0.TEX_WIDTH() + 1; auto height = sq_tex_resource_word1.TEX_HEIGHT() + 1; auto depth = sq_tex_resource_word1.TEX_DEPTH() + 1; auto format = sq_tex_resource_word1.DATA_FORMAT(); auto tileMode = sq_tex_resource_word0.TILE_MODE(); auto numFormat = sq_tex_resource_word4.NUM_FORMAT_ALL(); auto formatComp = sq_tex_resource_word4.FORMAT_COMP_X(); auto degamma = sq_tex_resource_word4.FORCE_DEGAMMA(); auto dim = sq_tex_resource_word0.DIM(); auto buffer = getSurfaceBuffer(baseAddress, width, height, depth, dim, format, numFormat, formatComp, degamma, sq_tex_resource_word0.TILE_TYPE()); if (buffer->dirtyAsTexture) { auto swizzle = sq_tex_resource_word2.SWIZZLE() << 8; // Rebuild a GX2Surface std::memset(&surface, 0, sizeof(gx2::GX2Surface)); surface.dim = static_cast<gx2::GX2SurfaceDim>(dim); surface.width = width; surface.height = height; if (surface.dim == gx2::GX2SurfaceDim::TextureCube) { surface.depth = depth * 6; } else if (surface.dim == gx2::GX2SurfaceDim::Texture3D || surface.dim == gx2::GX2SurfaceDim::Texture2DMSAAArray || surface.dim == gx2::GX2SurfaceDim::Texture2DArray || surface.dim == gx2::GX2SurfaceDim::Texture1DArray) { surface.depth = depth; } else { surface.depth = 1; } surface.mipLevels = 1; surface.format = getSurfaceFormat(format, numFormat, formatComp, degamma); surface.aa = gx2::GX2AAMode::Mode1X; surface.use = gx2::GX2SurfaceUse::Texture; if (sq_tex_resource_word0.TILE_TYPE()) { surface.use |= gx2::GX2SurfaceUse::DepthBuffer; } surface.tileMode = static_cast<gx2::GX2TileMode>(tileMode); surface.swizzle = swizzle; // Update the sizing information for the surface GX2CalcSurfaceSizeAndAlignment(&surface); // Align address baseAddress &= ~(surface.alignment - 1); surface.image = make_virtual_ptr<uint8_t>(baseAddress); surface.mipmaps = nullptr; // Calculate a new memory CRC uint64_t newHash[2] = { 0 }; MurmurHash3_x64_128(surface.image, surface.imageSize, 0, newHash); // If the CPU memory has changed, we should re-upload this. This hashing is // also means that if the application temporarily uses one of its buffers as // a color buffer, we are able to accurately handle this. Providing they are // not updating the memory at the same time. if (newHash[0] != buffer->cpuMemHash[0] || newHash[1] != buffer->cpuMemHash[1]) { buffer->cpuMemHash[0] = newHash[0]; buffer->cpuMemHash[1] = newHash[1]; // Untile gx2::internal::convertTiling(&surface, untiledImage, untiledMipmap); // Create texture auto compressed = isCompressedFormat(format); auto target = getTextureTarget(dim); auto textureDataType = gl::GL_INVALID_ENUM; auto textureFormat = getTextureFormat(format); auto size = untiledImage.size(); if (compressed) { textureDataType = getCompressedTextureDataType(format, degamma); } else { textureDataType = getTextureDataType(format, formatComp); } if (textureDataType == gl::GL_INVALID_ENUM || textureFormat == gl::GL_INVALID_ENUM) { decaf_abort(fmt::format("Texture with unsupported format {}", surface.format.value())); } switch (dim) { case latte::SQ_TEX_DIM_1D: if (compressed) { gl::glCompressedTextureSubImage1D(buffer->object, 0, /* level */ 0, /* xoffset */ width, textureDataType, gsl::narrow_cast<gl::GLsizei>(size), untiledImage.data()); } else { gl::glTextureSubImage1D(buffer->object, 0, /* level */ 0, /* xoffset */ width, textureFormat, textureDataType, untiledImage.data()); } break; case latte::SQ_TEX_DIM_2D: if (compressed) { gl::glCompressedTextureSubImage2D(buffer->object, 0, /* level */ 0, 0, /* xoffset, yoffset */ width, height, textureDataType, gsl::narrow_cast<gl::GLsizei>(size), untiledImage.data()); } else { gl::glTextureSubImage2D(buffer->object, 0, /* level */ 0, 0, /* xoffset, yoffset */ width, height, textureFormat, textureDataType, untiledImage.data()); } break; case latte::SQ_TEX_DIM_3D: if (compressed) { gl::glCompressedTextureSubImage3D(buffer->object, 0, /* level */ 0, 0, 0, /* xoffset, yoffset, zoffset */ width, height, depth, textureDataType, gsl::narrow_cast<gl::GLsizei>(size), untiledImage.data()); } else { gl::glTextureSubImage3D(buffer->object, 0, /* level */ 0, 0, 0, /* xoffset, yoffset, zoffset */ width, height, depth, textureFormat, textureDataType, untiledImage.data()); } break; case latte::SQ_TEX_DIM_CUBEMAP: decaf_check(surface.depth == 6); case latte::SQ_TEX_DIM_2D_ARRAY: if (compressed) { gl::glCompressedTextureSubImage3D(buffer->object, 0, /* level */ 0, 0, 0, /* xoffset, yoffset, zoffset */ width, height, surface.depth, textureDataType, gsl::narrow_cast<gl::GLsizei>(size), untiledImage.data()); } else { gl::glTextureSubImage3D(buffer->object, 0, /* level */ 0, 0, 0, /* xoffset, yoffset, zoffset */ width, height, surface.depth, textureFormat, textureDataType, untiledImage.data()); } break; default: decaf_abort(fmt::format("Unsupported texture dim: {}", sq_tex_resource_word0.DIM())); } } buffer->dirtyAsTexture = false; buffer->state = SurfaceUseState::CpuWritten; } // Setup texture swizzle auto dst_sel_x = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_X()); auto dst_sel_y = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_Y()); auto dst_sel_z = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_Z()); auto dst_sel_w = getTextureSwizzle(sq_tex_resource_word4.DST_SEL_W()); gl::GLint textureSwizzle[] = { static_cast<gl::GLint>(dst_sel_x), static_cast<gl::GLint>(dst_sel_y), static_cast<gl::GLint>(dst_sel_z), static_cast<gl::GLint>(dst_sel_w), }; gl::glTextureParameteriv(buffer->object, gl::GL_TEXTURE_SWIZZLE_RGBA, textureSwizzle); gl::glBindTextureUnit(i, buffer->object); } return true; }