// The audio thread. void DSound::SoundLoop() { Common::SetCurrentThreadName("Audio thread - dsound"); currentPos = 0; lastPos = 0; dsBuffer->Play(0, 0, DSBPLAY_LOOPING); while (!threadData) { // No blocking inside the csection dsBuffer->GetCurrentPosition((DWORD*)¤tPos, 0); int numBytesToRender = FIX128(ModBufferSize(currentPos - lastPos)); if (numBytesToRender >= 256) { if (numBytesToRender > sizeof(realtimeBuffer)) PanicAlert("soundThread: too big render call"); m_mixer->Mix(realtimeBuffer, numBytesToRender / 4); WriteDataToBuffer(lastPos, (char*)realtimeBuffer, numBytesToRender); lastPos = ModBufferSize(lastPos + numBytesToRender); } soundSyncEvent.Wait(); } }
LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format) { if (format > NUM_ENCODING_PROGRAMS) { PanicAlert("Unknown texture copy format: 0x%x\n", format); return s_encodingPrograms[0]; } if (!s_encodingPrograms[format]) { if(s_encodingProgramsFailed[format]) { // we already failed to create a shader for this format, // so instead of re-trying and showing the same error message every frame, just return. return nullptr; } const char* shader = TextureConversionShaderLegacy::GenerateEncodingShader(format); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) { static int counter = 0; char szTemp[MAX_PATH]; sprintf(szTemp, "%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(szTemp, shader); } #endif s_encodingPrograms[format] = D3D::CompileAndCreatePixelShader(shader, (int)strlen(shader)); if (!s_encodingPrograms[format]) { ERROR_LOG(VIDEO, "Failed to create encoding fragment program"); s_encodingProgramsFailed[format] = true; } } return s_encodingPrograms[format]; }
void FPURegCache::MapReg(const int i, bool doLoad, bool makeDirty) { pendingFlush = true; _assert_msg_(JIT, !regs[i].location.IsImm(), "WTF - load - imm"); if (!regs[i].away) { // Reg is at home in the memory register file. Let's pull it out. X64Reg xr = GetFreeXReg(); _assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - load - invalid reg"); xregs[xr].mipsReg = i; xregs[xr].dirty = makeDirty; OpArg newloc = ::Gen::R(xr); if (doLoad) { if (!regs[i].location.IsImm() && (regs[i].location.offset & 0x3)) { PanicAlert("WARNING - misaligned fp register location %i", i); } emit->MOVSS(xr, regs[i].location); } regs[i].location = newloc; regs[i].away = true; } else { // There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary. xregs[RX(i)].dirty |= makeDirty; _assert_msg_(JIT, regs[i].location.IsSimpleReg(), "not loaded and not simple."); } }
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) { if (g_use_deterministic_gpu_thread && GpuRunningState) { std::unique_lock<std::mutex> lk(s_video_buffer_lock); u8* write_ptr = s_video_buffer_write_ptr; s_video_buffer_cond.wait(lk, [&]() { return !GpuRunningState || s_video_buffer_seen_ptr == write_ptr; }); if (!GpuRunningState) return; // Opportunistically reset FIFOs so we don't wrap around. if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr) PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr); memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr); s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data); s_fifo_aux_read_ptr = s_fifo_aux_data; if (may_move_read_ptr) { // what's left over in the buffer size_t size = write_ptr - s_video_buffer_pp_read_ptr; memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size); // This change always decreases the pointers. We write seen_ptr // after write_ptr here, and read it before in RunGpuLoop, so // 'write_ptr > seen_ptr' there cannot become spuriously true. s_video_buffer_write_ptr = write_ptr = s_video_buffer + size; s_video_buffer_pp_read_ptr = s_video_buffer; s_video_buffer_read_ptr = s_video_buffer; s_video_buffer_seen_ptr = write_ptr; } } }
static int ParseAttribList(u8* pAttribIDList, u16& _startID, u16& _endID) { u32 attribOffset = 0; CBigEndianBuffer attribList(pAttribIDList); u8 sequence = attribList.Read8(attribOffset); attribOffset++; u8 seqSize = attribList.Read8(attribOffset); attribOffset++; u8 typeID = attribList.Read8(attribOffset); attribOffset++; if (MAX_LOGLEVEL >= LogTypes::LOG_LEVELS::LDEBUG) { _dbg_assert_(WII_IPC_WIIMOTE, sequence == SDP_SEQ8); (void)seqSize; } if (typeID == SDP_UINT32) { _startID = attribList.Read16(attribOffset); attribOffset += 2; _endID = attribList.Read16(attribOffset); attribOffset += 2; } else { _startID = attribList.Read16(attribOffset); attribOffset += 2; _endID = _startID; WARN_LOG(WII_IPC_WIIMOTE, "Read just a single attrib - not tested"); PanicAlert("Read just a single attrib - not tested"); } return attribOffset; }
void CWII_IPC_HLE_WiiMote::SDPSendServiceAttributeResponse(u16 cid, u16 TransactionID, u32 ServiceHandle, u16 startAttrID, u16 endAttrID, u16 MaximumAttributeByteCount, u8* pContinuationState) { if (ServiceHandle != 0x10000) { ERROR_LOG(WII_IPC_WIIMOTE, "Unknown service handle %x" , ServiceHandle); PanicAlert("Unknown service handle %x" , ServiceHandle); } // _dbg_assert_(WII_IPC_WIIMOTE, ServiceHandle == 0x10000); u32 contState = ParseCont(pContinuationState); u32 packetSize = 0; const u8* pPacket = GetAttribPacket(ServiceHandle, contState, packetSize); // generate package u8 DataFrame[1000]; CBigEndianBuffer buffer(DataFrame); int Offset = 0; l2cap_hdr_t* pHeader = (l2cap_hdr_t*)&DataFrame[Offset]; Offset += sizeof(l2cap_hdr_t); pHeader->dcid = cid; buffer.Write8 (Offset, 0x05); Offset++; buffer.Write16(Offset, TransactionID); Offset += 2; // transaction ID memcpy(buffer.GetPointer(Offset), pPacket, packetSize); Offset += packetSize; pHeader->length = (u16)(Offset - sizeof(l2cap_hdr_t)); m_pHost->SendACLPacket(GetConnectionHandle(), DataFrame, pHeader->length + sizeof(l2cap_hdr_t)); // Debugger::PrintDataBuffer(LogTypes::WIIMOTE, DataFrame, pHeader->length + sizeof(l2cap_hdr_t), "test response: "); }
void GLVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl) { this->vtx_decl = _vtx_decl; vertex_stride = vtx_decl.stride; // We will not allow vertex components causing uneven strides. if (vertex_stride & 3) PanicAlert("Uneven vertex stride: %i", vertex_stride); VertexManager *vm = (OGL::VertexManager*)g_vertex_manager; glGenVertexArrays(1, &VAO); glBindVertexArray(VAO); // the element buffer is bound directly to the vao, so we must it set for every vao glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->m_index_buffers); glBindBuffer(GL_ARRAY_BUFFER, vm->m_vertex_buffers); SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, vtx_decl.position); for (int i = 0; i < 3; i++) { SetPointer(SHADER_NORM0_ATTRIB+i, vertex_stride, vtx_decl.normals[i]); } for (int i = 0; i < 2; i++) { SetPointer(SHADER_COLOR0_ATTRIB+i, vertex_stride, vtx_decl.colors[i]); } for (int i = 0; i < 8; i++) { SetPointer(SHADER_TEXTURE0_ATTRIB+i, vertex_stride, vtx_decl.texcoords[i]); } SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, vtx_decl.posmtx); vm->m_last_vao = VAO; }
void* AllocateAlignedMemory(size_t size,size_t alignment) { #ifdef _WIN32 void* ptr = _aligned_malloc(size,alignment); #else void* ptr = NULL; #ifdef ANDROID ptr = memalign(alignment, size); #elif defined(__SYMBIAN32__) // On Symbian, we will want to create an RChunk. ptr = malloc(size); #else posix_memalign(&ptr, alignment, size); #endif #endif // printf("Mapped memory at %p (size %ld)\n", ptr, // (unsigned long)size); if (ptr == NULL) PanicAlert("Failed to allocate aligned memory"); return ptr; }
bool DumpDSPCode(const u8 *code_be, int size_in_bytes, u32 crc) { const std::string binFile = StringFromFormat("%sDSP_UC_%08X.bin", File::GetUserPath(D_DUMPDSP_IDX).c_str(), crc); const std::string txtFile = StringFromFormat("%sDSP_UC_%08X.txt", File::GetUserPath(D_DUMPDSP_IDX).c_str(), crc); File::IOFile pFile(binFile, "wb"); if (pFile) { pFile.WriteBytes(code_be, size_in_bytes); pFile.Close(); } else { PanicAlert("Can't open file (%s) to dump UCode!!", binFile.c_str()); return false; } // Load the binary back in. std::vector<u16> code; LoadBinary(binFile, code); AssemblerSettings settings; settings.show_hex = true; settings.show_pc = true; settings.ext_separator = '\''; settings.decode_names = true; settings.decode_registers = true; std::string text; DSPDisassembler disasm(settings); if (!disasm.Disassemble(0, code, 0x0000, text)) return false; return File::WriteStringToFile(text, txtFile); }
VertexLoaderBase* VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) { VertexLoaderBase* loader; //#define COMPARE_VERTEXLOADERS #if defined(COMPARE_VERTEXLOADERS) && defined(_M_X86_64) // first try: Any new VertexLoader vs the old one loader = new VertexLoaderTester( new VertexLoader(vtx_desc, vtx_attr), // the software one new VertexLoaderX64(vtx_desc, vtx_attr), // the new one to compare vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #elif defined(_M_X86_64) loader = new VertexLoaderX64(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #elif defined(_M_ARM_64) loader = new VertexLoaderARM64(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; #endif // last try: The old VertexLoader loader = new VertexLoader(vtx_desc, vtx_attr); if (loader->IsInitialized()) return loader; delete loader; PanicAlert("No Vertex Loader found."); return nullptr; }
ID3D11RasterizerState* StateCache::Get(RasterizationState state) { std::lock_guard<std::mutex> guard(m_lock); auto it = m_raster.find(state.hex); if (it != m_raster.end()) return it->second; static constexpr std::array<D3D11_CULL_MODE, 4> cull_modes = { {D3D11_CULL_NONE, D3D11_CULL_BACK, D3D11_CULL_FRONT, D3D11_CULL_BACK}}; D3D11_RASTERIZER_DESC desc = {}; desc.FillMode = D3D11_FILL_SOLID; desc.CullMode = cull_modes[state.cullmode]; desc.ScissorEnable = TRUE; ID3D11RasterizerState* res = nullptr; HRESULT hr = D3D::device->CreateRasterizerState(&desc, &res); if (FAILED(hr)) PanicAlert("Failed to create rasterizer state at %s %d\n", __FILE__, __LINE__); D3D::SetDebugObjectName(res, "rasterizer state used to emulate the GX pipeline"); m_raster.emplace(state.hex, res); return res; }
void JitILBase::addex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); IREmitter::InstLoc ab = ibuild.EmitAdd(a, b); IREmitter::InstLoc new_carry = ibuild.EmitICmpUlt(ab, a); IREmitter::InstLoc previous_carry = ibuild.EmitLoadCarry(); IREmitter::InstLoc abc = ibuild.EmitAdd(ab, previous_carry); new_carry = ibuild.EmitOr(new_carry, ibuild.EmitICmpUlt(abc, ab)); ibuild.EmitStoreGReg(abc, inst.RD); ibuild.EmitStoreCarry(new_carry); if (inst.OE) PanicAlert("OE: addex"); if (inst.Rc) ComputeRC(ibuild, abc); }
void JitILBase::subfex(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); if (inst.OE) PanicAlert("OE: subfex"); IREmitter::InstLoc val, test, lhs, rhs, carry; rhs = ibuild.EmitLoadGReg(inst.RA); carry = ibuild.EmitLoadCarry(); rhs = ibuild.EmitXor(rhs, ibuild.EmitIntConst(-1)); rhs = ibuild.EmitAdd(rhs, carry); test = ibuild.EmitICmpEq(rhs, ibuild.EmitIntConst(0)); test = ibuild.EmitAnd(test, carry); lhs = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitAdd(lhs, rhs); ibuild.EmitStoreGReg(val, inst.RD); test = ibuild.EmitOr(test, ibuild.EmitICmpUgt(lhs, val)); ibuild.EmitStoreCarry(test); if (inst.Rc) ComputeRC(ibuild, val); }
D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, D3D11_USAGE usage, DXGI_FORMAT fmt, unsigned int levels, unsigned int slices, D3D11_SUBRESOURCE_DATA* data) { ID3D11Texture2D* pTexture = nullptr; HRESULT hr; D3D11_CPU_ACCESS_FLAG cpuflags; if (usage == D3D11_USAGE_STAGING) cpuflags = (D3D11_CPU_ACCESS_FLAG)((int)D3D11_CPU_ACCESS_WRITE|(int)D3D11_CPU_ACCESS_READ); else if (usage == D3D11_USAGE_DYNAMIC) cpuflags = D3D11_CPU_ACCESS_WRITE; else cpuflags = (D3D11_CPU_ACCESS_FLAG)0; D3D11_TEXTURE2D_DESC texdesc = CD3D11_TEXTURE2D_DESC(fmt, width, height, slices, levels, bind, usage, cpuflags); hr = D3D::device->CreateTexture2D(&texdesc, data, &pTexture); if (FAILED(hr)) { PanicAlert("Failed to create texture at %s, line %d: hr=%#x\n", __FILE__, __LINE__, hr); return nullptr; } D3DTexture2D* ret = new D3DTexture2D(pTexture, bind); SAFE_RELEASE(pTexture); return ret; }
void WaveFileWriter::AddStereoSamplesBE(const short *sample_data, int count) { if (!file) PanicAlertT("WaveFileWriter - file not open."); if (count > BUF_SIZE * 2) PanicAlert("WaveFileWriter - buffer too small (count = %i).", count); if (skip_silence) { bool all_zero = true; for (int i = 0; i < count * 2; i++) if (sample_data[i]) all_zero = false; if (all_zero) return; } for (int i = 0; i < count * 2; i++) conv_buffer[i] = Common::swap16((u16)sample_data[i]); file.WriteBytes(conv_buffer, count * 4); audio_size += count * 4; }
void WriteProfileResults(const std::string& filename) { ProfileStats prof_stats; GetProfileResults(&prof_stats); File::IOFile f(filename, "w"); if (!f) { PanicAlert("Failed to open %s", filename.c_str()); return; } fprintf(f.GetHandle(), "origAddr\tblkName\trunCount\tcost\ttimeCost\tpercent\ttimePercent\tOvAlli" "nBlkTime(ms)\tblkCodeSize\n"); for (auto& stat : prof_stats.block_stats) { std::string name = g_symbolDB.GetDescription(stat.addr); double percent = 100.0 * (double)stat.cost / (double)prof_stats.cost_sum; double timePercent = 100.0 * (double)stat.tick_counter / (double)prof_stats.timecost_sum; fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%.2f\t%.2f\t%.2f\t%i\n", stat.addr, name.c_str(), stat.run_count, stat.cost, stat.tick_counter, percent, timePercent, (double)stat.tick_counter * 1000.0 / (double)prof_stats.countsPerSec, stat.block_size); } }
void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, unsigned int expanded_width, unsigned int level) { if (pcfmt != PC_TEX_FMT_DXT1) { glActiveTexture(GL_TEXTURE0+9); glBindTexture(GL_TEXTURE_2D_ARRAY, texture); if (expanded_width != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width); glTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_iformat, width, height, 1, 0, gl_format, gl_type, temp); if (expanded_width != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } else { PanicAlert("PC_TEX_FMT_DXT1 support disabled"); //glCompressedTexImage2D(GL_TEXTURE_2D, 0, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, //width, height, 0, expanded_width * expanded_height/2, temp); } TextureCache::SetStage(); }
static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_type) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) vertex_shader_uid_data dummy_data; vertex_shader_uid_data* uid_data = out.template GetUidData<vertex_shader_uid_data>(); if (uid_data == nullptr) uid_data = &dummy_data; out.SetBuffer(text); const bool is_writing_shadercode = (out.GetBuffer() != nullptr); if (is_writing_shadercode) text[sizeof(text) - 1] = 0x7C; // canary _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) { // Add functions to do shifts on scalars and ivecs. // This is included in the vertex shader for lighting shader generation. out.Write("int ilshift(int a, int b) { return a << b; }\n" "int irshift(int a, int b) { return a >> b; }\n" "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n" "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n" "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n" "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n" "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n" "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n" "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n" "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n" "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n" "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n" "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n" "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n"); } out.Write("%s", s_lighting_struct); // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else out.Write("cbuffer VSBlock {\n"); out.Write(s_shader_uniforms); out.Write("};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T>(out, api_type); out.Write("};\n"); uid_data->numTexGens = xfmem.numTexGen.numTexGens; uid_data->components = components; uid_data->pixel_lighting = g_ActiveConfig.bEnablePixelLighting; if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); if (components & VB_HAS_POSMTXIDX) out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); if ((components & (VB_HAS_UV0<<i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); GenerateVSOutputMembers<T>(out, api_type, g_ActiveConfig.backend_info.bSupportsBindingLayout ? "centroid" : "centroid out"); out.Write("} vs;\n"); } else { // Let's set up attributes for (size_t i = 0; i < 8; ++i) { if (i < xfmem.numTexGen.numTexGens) { out.Write("centroid out float3 uv%d;\n", i); } } out.Write("centroid out float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("centroid out float3 Normal;\n"); out.Write("centroid out float3 WorldPos;\n"); } out.Write("centroid out float4 colors_0;\n"); out.Write("centroid out float4 colors_1;\n"); } out.Write("void main()\n{\n"); } else // D3D { out.Write("VS_OUTPUT main(\n"); // inputs if (components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); if (components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); if (components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); if (components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); if ((components & (VB_HAS_UV0<<i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } if (components & VB_HAS_POSMTXIDX) out.Write(" int posmtx : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } out.Write("VS_OUTPUT o;\n"); // transforms if (components & VB_HAS_POSMTXIDX) { if (is_writing_shadercode && (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS) && !DriverDetails::HasBug(DriverDetails::BUG_ANNIHILATEDUBOS))) { // This'll cause issues, but it can't be helped out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n"); } else { out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); } } if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); } else { out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0);\n"); if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n"); } if (!(components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); out.Write("int4 lacc;\n" "float3 ldir, h, cosAttn, distAttn;\n" "float dist, dist2, attn;\n"); uid_data->numColorChans = xfmem.numChan.numColorChans; if (xfmem.numChan.numColorChans == 0) { if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } GenerateLightingShader<T>(out, uid_data->lighting, components, "color", "o.colors_"); if (xfmem.numChan.numColorChans < 2) { if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. bool texGenSpecialCase = false; /*bool texGenSpecialCase = ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0 (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) && (xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); */ // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) { TexMtxInfo& texinfo = xfmem.texMtxInfo[i]; out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); uid_data->texMtxInfo[i].sourcerow = xfmem.texMtxInfo[i].sourcerow; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm0.xyz, 1.0);\n"); } break; case XF_SRCCOLORS_INROW: _assert_(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1); break; case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm1.xyz, 1.0);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _assert_(texinfo.inputform == XF_TEXINPUT_ABC1); out.Write("coord = float4(rawnorm2.xyz, 1.0);\n"); } break; default: _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } // first transformation uid_data->texMtxInfo[i].texgentype = xfmem.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) { // transform the light dir into tangent space uid_data->texMtxInfo[i].embosslightshift = xfmem.texMtxInfo[i].embosslightshift; uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } else { _assert_(0); // should have normals uid_data->texMtxInfo[i].embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift; out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } break; case XF_TEXGEN_COLOR_STRGBC0: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); break; case XF_TEXGEN_COLOR_STRGBC1: _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); break; case XF_TEXGEN_REGULAR: default: uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (components & (VB_HAS_TEXMTXIDX0<<i)) { out.Write("int tmp = int(tex%d.z);\n", i); if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i); } else { if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1); } break; } uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (xfmem.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { const PostMtxInfo& postInfo = xfmem.postMtxInfo[i]; uid_data->postMtxInfo[i].index = xfmem.postMtxInfo[i].index; int postidx = postInfo.index; out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n", postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); if (texGenSpecialCase) { // no normalization // q of input is 1 // q of output is unknown // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i); } else { uid_data->postMtxInfo[i].normalize = xfmem.postMtxInfo[i].normalize; if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); } } out.Write("}\n"); } // clipPos/w needs to be done in pixel shader, not here out.Write("o.clipPos = o.pos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("o.Normal = _norm0;\n"); out.Write("o.WorldPos = pos.xyz;\n"); if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); } //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //if not early z culling will improve speed if (g_ActiveConfig.backend_info.bSupportsClipControl) { out.Write("o.pos.z = -o.pos.z;\n"); } else if (api_type == API_D3D) { out.Write("o.pos.z = -o.pos.z;\n"); } else // OGL { // this results in a scale from -1..0 to -1..1 after perspective // divide out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); // the next steps of the OGL pipeline are: // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // clipping to -w_c <= (x_c,y_c,z_c) <= w_c // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide // z_w = (f-n)/2*z_d + (n+f)/2 // z_w now contains the value to go to the 0..1 depth buffer //trying to get the correct semantic while not using glDepthRange //seems to get rather complicated } // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results // in some primitives being placed one pixel too far to the bottom-right, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION".xy;\n"); if (api_type == API_OPENGL) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { AssignVSOutputMembers(out, "vs", "o"); } else { // TODO: Pass interface blocks between shader stages even if geometry shaders // are not supported, however that will require at least OpenGL 3.2 support. for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) out.Write("uv%d.xyz = o.tex%d;\n", i, i); out.Write("clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("Normal = o.Normal;\n"); out.Write("WorldPos = o.WorldPos;\n"); } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } out.Write("gl_Position = o.pos;\n"); } else // D3D { out.Write("return o;\n"); } out.Write("}\n"); if (is_writing_shadercode) { if (text[sizeof(text) - 1] != 0x7C) PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); } }
static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data* uid_data = out.template GetUidData<pixel_shader_uid_data>(); if (uid_data == nullptr) uid_data = &dummy_data; out.SetBuffer(text); const bool is_writing_shadercode = (out.GetBuffer() != nullptr); if (is_writing_shadercode) text[sizeof(text) - 1] = 0x7C; // canary unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, numTexgen, bpmem.genMode.numindstages); uid_data->dstAlphaMode = dstAlphaMode; uid_data->genMode_numindstages = bpmem.genMode.numindstages; uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; // dot product for integer vectors out.Write("int idot(int3 x, int3 y)\n" "{\n" "\tint3 tmp = x * y;\n" "\treturn tmp.x + tmp.y + tmp.z;\n" "}\n"); out.Write("int idot(int4 x, int4 y)\n" "{\n" "\tint4 tmp = x * y;\n" "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" "}\n\n"); // rounding + casting to integer at once in a single function out.Write("int iround(float x) { return int (round(x)); }\n" "int2 iround(float2 x) { return int2(round(x)); }\n" "int3 iround(float3 x) { return int3(round(x)); }\n" "int4 iround(float4 x) { return int4(round(x)); }\n\n"); out.Write("int itrunc(float x) { return int (trunc(x)); }\n" "int2 itrunc(float2 x) { return int2(trunc(x)); }\n" "int3 itrunc(float3 x) { return int3(trunc(x)); }\n" "int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n"); if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) { // Add functions to do shifts on scalars and ivecs. // These functions all have the same name to enable them to be used no matter what code is generated. // For example: tev color op code uses .rgb as a swizzle, but alpha code only uses .a. out.Write("int ilshift(int a, int b) { return a << b; }\n" "int irshift(int a, int b) { return a >> b; }\n" "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n" "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n" "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n" "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n" "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n" "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n" "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n" "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n" "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n" "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n" "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n" "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n"); } if (ApiType == API_OPENGL) { // Declare samplers for (int i = 0; i < 8; ++i) out.Write("SAMPLER_BINDING(%d) uniform sampler2DArray samp%d;\n", i, i); } else // D3D { // Declare samplers for (int i = 0; i < 8; ++i) out.Write("sampler samp%d : register(s%d);\n", i, i); out.Write("\n"); for (int i = 0; i < 8; ++i) out.Write("Texture2DArray Tex%d : register(t%d);\n", i, i); } out.Write("\n"); if (ApiType == API_OPENGL) { out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 1" : ""); } else { out.Write("cbuffer PSBlock : register(b0) {\n"); } out.Write( "\tint4 " I_COLORS"[4];\n" "\tint4 " I_KCOLORS"[4];\n" "\tint4 " I_ALPHA";\n" "\tfloat4 " I_TEXDIMS"[8];\n" "\tint4 " I_ZBIAS"[2];\n" "\tint4 " I_INDTEXSCALE"[2];\n" "\tint4 " I_INDTEXMTX"[6];\n" "\tint4 " I_FOGCOLOR";\n" "\tint4 " I_FOGI";\n" "\tfloat4 " I_FOGF"[2];\n" "\tfloat4 " I_ZSLOPE";\n" "\tfloat4 " I_EFBSCALE";\n" "};\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("%s", s_lighting_struct); if (ApiType == API_OPENGL) { out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); } else { out.Write("cbuffer VSBlock : register(b1) {\n"); } out.Write(s_shader_uniforms); out.Write("};\n"); } if (g_ActiveConfig.backend_info.bSupportsBBox) { if (ApiType == API_OPENGL) { out.Write( "layout(std140, binding = 3) buffer BBox {\n" "\tint4 bbox_data;\n" "};\n" ); } else { out.Write( "globallycoherent RWBuffer<int> bbox_data : register(u2);\n" ); } } out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T>(out, ApiType); out.Write("};\n"); const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) // We can't allow early_ztest for zfreeze because depth is overridden per-pixel. // This means it's impossible for zcomploc to be emulated on a zfrozen polygon. && !(bpmem.zmode.testenable && bpmem.genMode.zfreeze); const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || (bpmem.zmode.testenable && bpmem.genMode.zfreeze); if (forced_early_z) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate // this feature properly until 2012: Depth tests were always done after alpha testing. // Most importantly, it was not possible to write to the depth buffer without also writing // a color value (unless color writing was disabled altogether). // OpenGL has a flag which allows the driver to still update the depth buffer if alpha // test fails. The driver isn't required to do this, but I (degasus) assume all of them do // because it's the much faster code path for the GPU. // D3D11 also has a way to force the driver to enable early-z, so we're fine here. if(ApiType == API_OPENGL) { out.Write("layout(early_fragment_tests) in;\n"); } else { out.Write("[earlydepthstencil]\n"); } } else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) { static bool warn_once = true; if (warn_once) WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current configuration. Make sure to enable fast depth calculations. If this message still shows up your hardware isn't able to emulate the feature properly (a GPU with D3D 11.0 / OGL 4.2 support is required)."); warn_once = false; } if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) out.Write("out vec4 ocol1;\n"); if (per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the // pixel shader will be executed for each pixel which has at least one passed sample. // So there may be rendered pixels where the center of the pixel isn't in the primitive. // As the pixel shader usually renders at the center of the pixel, this position may be // outside the primitive. This will lead to sampling outside the texture, sign changes, ... // As a workaround, we interpolate at the centroid of the coveraged pixel, which // is always inside the primitive. // Without MSAA, this flag is defined to have no effect. uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("in VertexData {\n"); GenerateVSOutputMembers<T>(out, ApiType, g_ActiveConfig.backend_info.bSupportsBindingLayout ? "centroid" : "centroid in"); if (g_ActiveConfig.iStereoMode > 0) out.Write("\tflat int layer;\n"); out.Write("};\n"); } else { out.Write("centroid in float4 colors_0;\n"); out.Write("centroid in float4 colors_1;\n"); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes for (unsigned int i = 0; i < numTexgen; ++i) { out.Write("centroid in float3 uv%d;\n", i); } out.Write("centroid in float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("centroid in float3 Normal;\n"); out.Write("centroid in float3 WorldPos;\n"); } } out.Write("void main()\n{\n"); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { for (unsigned int i = 0; i < numTexgen; ++i) out.Write("\tfloat3 uv%d = tex%d;\n", i, i); } out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); } else // D3D { out.Write("void main(\n"); out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", per_pixel_depth ? "\n out float depth : SV_Depth," : ""); out.Write(" in centroid float4 colors_0 : COLOR0,\n"); out.Write(" in centroid float4 colors_1 : COLOR1\n"); // compute window position if needed because binding semantic WPOS is not widely supported for (unsigned int i = 0; i < numTexgen; ++i) out.Write(",\n in centroid float3 uv%d : TEXCOORD%d", i, i); out.Write(",\n in centroid float4 clipPos : TEXCOORD%d", numTexgen); if (g_ActiveConfig.bEnablePixelLighting) { out.Write(",\n in centroid float3 Normal : TEXCOORD%d", numTexgen + 1); out.Write(",\n in centroid float3 WorldPos : TEXCOORD%d", numTexgen + 2); } uid_data->stereo = g_ActiveConfig.iStereoMode > 0; if (g_ActiveConfig.iStereoMode > 0) out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); out.Write(" ) {\n"); } out.Write("\tint4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = " I_COLORS"[0];\n" "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, 0, 0);\n" "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" "\tint alphabump=0;\n" "\tint3 tevcoord=int3(0, 0, 0);\n" "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" "\tint4 tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,0);\n\n"); // tev combiner inputs // On GLSL, input variables must not be assigned to. // This is why we declare these variables locally instead. out.Write("\tfloat4 col0 = colors_0;\n"); out.Write("\tfloat4 col1 = colors_1;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); out.Write("\tfloat3 pos = WorldPos;\n"); out.Write("\tint4 lacc;\n" "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. //out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further //out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further //out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); uid_data->components = components; GenerateLightingShader<T>(out, uid_data->lighting, components, "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else { out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1); for (unsigned int i = 0; i < numTexgen; ++i) { out.Write("\tint2 fixpoint_uv%d = itrunc(", i); // optional perspective divides uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i; if (xfmem.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); } else { out.Write("uv%d.xy", i); } out.Write(" * " I_TEXDIMS"[%d].zw * 128.0);\n", i); // TODO: S24 overflows here? } } // indirect texture map lookup int nIndirectStagesUsed = 0; if (bpmem.genMode.numindstages > 0) { for (unsigned int i = 0; i < numStages; ++i) { if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; } } uid_data->nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { if (nIndirectStagesUsed & (1 << i)) { unsigned int texcoord = bpmem.tevindref.getTexCoord(i); unsigned int texmap = bpmem.tevindref.getTexMap(i); uid_data->SetTevindrefValues(i, texcoord, texmap); if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) out.Write("\ttempcoord = irshift(fixpoint_uv%d, " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); else out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); } else out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); SampleTexture<T>(out, "(float2(tempcoord)/128.0)", "abg", texmap, ApiType); } } // Uid fields for BuildSwapModeTable are set in WriteStage char swapModeTable[4][5]; const char* swapColors = "rgba"; for (int i = 0; i < 4; i++) { swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2]; swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1]; swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2]; swapModeTable[i][4] = '\0'; } for (unsigned int i = 0; i < numStages; i++) WriteStage<T>(out, uid_data, i, ApiType, swapModeTable); // build the equation for this stage #define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) bool enable_pl = g_ActiveConfig.bEnablePixelLighting; uid_data->num_values = (enable_pl) ? sizeof(*uid_data) : MY_STRUCT_OFFSET(*uid_data,stagehash[numStages]); if (numStages) { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register if (bpmem.combiners[numStages - 1].colorC.dest != 0) { out.Write("\tprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); } if (bpmem.combiners[numStages - 1].alphaC.dest != 0) { out.Write("\tprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); } } out.Write("\tprev = prev & 255;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data->Pretest = Pretest; // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha testing result) if (Pretest == AlphaTest::UNDETERMINED || (Pretest == AlphaTest::FAIL && bpmem.UseLateDepthTest())) WriteAlphaTest<T>(out, uid_data, ApiType, dstAlphaMode, per_pixel_depth); if (bpmem.genMode.zfreeze) { out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE); out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE); out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE".xy;\n"); // Opengl has reversed vertical screenspace coordiantes if (ApiType == API_OPENGL) out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT); out.Write("\tint zCoord = int(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y);\n"); } else if (!g_ActiveConfig.bFastDepthCalc) { // FastDepth means to trust the depth generated in perspective division. // It should be correct, but it seems not to be as accurate as required. TODO: Find out why! // For disabled FastDepth we just calculate the depth value again. // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range out.Write("\tint zCoord = " I_ZBIAS"[1].x + int((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n"); } else { out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); } out.Write("\tzCoord = clamp(zCoord, " I_ZBIAS"[1].x - " I_ZBIAS"[1].y, " I_ZBIAS"[1].x);\n"); // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; uid_data->ztex_op = bpmem.ztex2.op; uid_data->per_pixel_depth = per_pixel_depth; uid_data->forced_early_z = forced_early_z; uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data->early_ztest = bpmem.UseEarlyDepthTest(); uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel; uid_data->zfreeze = bpmem.genMode.zfreeze; // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) { out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); } // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } if (per_pixel_depth && bpmem.UseLateDepthTest()) { out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); } if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); out.Write("\tocol0 = float4(float3(prev.rgb), float(" I_ALPHA".a)) / 255.0;\n"); } else { WriteFog<T>(out, uid_data); out.Write("\tocol0 = float4(prev) / 255.0;\n"); } // Use dual-source color blending to perform dst alpha in a single pass if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. out.Write("\tocol1 = float4(prev) / 255.0;\n"); out.Write("\tocol0.a = float(" I_ALPHA".a) / 255.0;\n"); } if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) { uid_data->bounding_box = true; const char* atomic_op = ApiType == API_OPENGL ? "atomic" : "Interlocked"; out.Write( "\tif(bbox_data[0] > int(rawpos.x)) %sMin(bbox_data[0], int(rawpos.x));\n" "\tif(bbox_data[1] < int(rawpos.x)) %sMax(bbox_data[1], int(rawpos.x));\n" "\tif(bbox_data[2] > int(rawpos.y)) %sMin(bbox_data[2], int(rawpos.y));\n" "\tif(bbox_data[3] < int(rawpos.y)) %sMax(bbox_data[3], int(rawpos.y));\n", atomic_op, atomic_op, atomic_op, atomic_op); } out.Write("}\n"); if (is_writing_shadercode) { if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); } }
void InitBackendInfo() { HRESULT hr = DX11::D3D::LoadDXGI(); if (SUCCEEDED(hr)) hr = DX11::D3D::LoadD3D(); if (FAILED(hr)) { DX11::D3D::UnloadDXGI(); return; } g_Config.backend_info.APIType = API_D3D; g_Config.backend_info.bUseRGBATextures = true; // the GX formats barely match any D3D11 formats g_Config.backend_info.bUseMinimalMipCount = true; g_Config.backend_info.bSupportsExclusiveFullscreen = true; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsOversizedViewports = false; IDXGIFactory* factory; IDXGIAdapter* ad; hr = DX11::PCreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) PanicAlert("Failed to create IDXGIFactory object"); // adapters g_Config.backend_info.Adapters.clear(); g_Config.backend_info.AAModes.clear(); while (factory->EnumAdapters((UINT)g_Config.backend_info.Adapters.size(), &ad) != DXGI_ERROR_NOT_FOUND) { const size_t adapter_index = g_Config.backend_info.Adapters.size(); DXGI_ADAPTER_DESC desc; ad->GetDesc(&desc); // TODO: These don't get updated on adapter change, yet if (adapter_index == g_Config.iAdapter) { std::string samples; std::vector<DXGI_SAMPLE_DESC> modes = DX11::D3D::EnumAAModes(ad); for (unsigned int i = 0; i < modes.size(); ++i) { if (i == 0) samples = _trans("None"); else if (modes[i].Quality) samples = StringFromFormat(_trans("%d samples (quality level %d)"), modes[i].Count, modes[i].Quality); else samples = StringFromFormat(_trans("%d samples"), modes[i].Count); g_Config.backend_info.AAModes.push_back(samples); } // Requires the earlydepthstencil attribute (only available in shader model 5) g_Config.backend_info.bSupportsEarlyZ = (DX11::D3D::GetFeatureLevel(ad) == D3D_FEATURE_LEVEL_11_0); } g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description)); ad->Release(); } factory->Release(); // Clear ppshaders string vector g_Config.backend_info.PPShaders.clear(); DX11::D3D::UnloadDXGI(); DX11::D3D::UnloadD3D(); }
bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse /* = true */, bool allow_growth /* = true */, bool reallocate_if_full /* = false */) { size_t required_bytes = num_bytes + alignment; // Check for sane allocations if (required_bytes > m_maximum_size) { PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast<uint32_t>(num_bytes), static_cast<uint32_t>(m_maximum_size)); return false; } // Is the GPU behind or up to date with our current offset? if (m_current_offset >= m_current_gpu_position) { size_t remaining_bytes = m_current_size - m_current_offset; if (required_bytes <= remaining_bytes) { // Place at the current position, after the GPU position. m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } // Check for space at the start of the buffer // We use < here because we don't want to have the case of m_current_offset == // m_current_gpu_position. That would mean the code above would assume the // GPU has caught up to us, which it hasn't. if (allow_reuse && required_bytes < m_current_gpu_position) { // Reset offset to zero, since we're allocating behind the gpu now m_current_offset = 0; m_last_allocation_size = num_bytes; return true; } } // Is the GPU ahead of our current offset? if (m_current_offset < m_current_gpu_position) { // We have from m_current_offset..m_current_gpu_position space to use. size_t remaining_bytes = m_current_gpu_position - m_current_offset; if (required_bytes < remaining_bytes) { // Place at the current position, since this is still behind the GPU. m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } } // Try to grow the buffer up to the maximum size before waiting. // Double each time until the maximum size is reached. if (allow_growth && m_current_size < m_maximum_size) { size_t new_size = std::min(std::max(num_bytes, m_current_size * 2), m_maximum_size); if (ResizeBuffer(new_size)) { // Allocating from the start of the buffer. m_last_allocation_size = new_size; return true; } } // Can we find a fence to wait on that will give us enough memory? if (allow_reuse && WaitForClearSpace(required_bytes)) { _assert_(m_current_offset == m_current_gpu_position || (m_current_offset + required_bytes) < m_current_gpu_position); m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } // If we are not allowed to execute in our current state (e.g. in the middle of a render pass), // as a last resort, reallocate the buffer. This will incur a performance hit and is not // encouraged. if (reallocate_if_full && ResizeBuffer(m_current_size)) { m_last_allocation_size = num_bytes; return true; } // We tried everything we could, and still couldn't get anything. If we're not at a point // where the state is known and can be resumed, this is probably a fatal error. return false; }
s32 CWII_IPC_HLE_Device_fs::ExecuteCommand(u32 _Parameter, u32 _BufferIn, u32 _BufferInSize, u32 _BufferOut, u32 _BufferOutSize) { switch(_Parameter) { case IOCTL_GET_STATS: { if (_BufferOutSize < 0x1c) return -1017; WARN_LOG(WII_IPC_FILEIO, "FS: GET STATS - returning static values for now"); NANDStat fs; //TODO: scrape the real amounts from somewhere... fs.BlockSize = 0x4000; fs.FreeUserBlocks = 0x5DEC; fs.UsedUserBlocks = 0x1DD4; fs.FreeSysBlocks = 0x10; fs.UsedSysBlocks = 0x02F0; fs.Free_INodes = 0x146B; fs.Used_Inodes = 0x0394; *(NANDStat*)Memory::GetPointer(_BufferOut) = fs; return FS_RESULT_OK; } break; case IOCTL_CREATE_DIR: { _dbg_assert_(WII_IPC_FILEIO, _BufferOutSize == 0); u32 Addr = _BufferIn; u32 OwnerID = Memory::Read_U32(Addr); Addr += 4; u16 GroupID = Memory::Read_U16(Addr); Addr += 2; std::string DirName(HLE_IPC_BuildFilename((const char*)Memory::GetPointer(Addr), 64)); Addr += 64; Addr += 9; // owner attribs, permission u8 Attribs = Memory::Read_U8(Addr); INFO_LOG(WII_IPC_FILEIO, "FS: CREATE_DIR %s, OwnerID %#x, GroupID %#x, Attributes %#x", DirName.c_str(), OwnerID, GroupID, Attribs); DirName += DIR_SEP; File::CreateFullPath(DirName); _dbg_assert_msg_(WII_IPC_FILEIO, File::IsDirectory(DirName), "FS: CREATE_DIR %s failed", DirName.c_str()); return FS_RESULT_OK; } break; case IOCTL_SET_ATTR: { u32 Addr = _BufferIn; u32 OwnerID = Memory::Read_U32(Addr); Addr += 4; u16 GroupID = Memory::Read_U16(Addr); Addr += 2; std::string Filename = HLE_IPC_BuildFilename((const char*)Memory::GetPointer(_BufferIn), 64); Addr += 64; u8 OwnerPerm = Memory::Read_U8(Addr); Addr += 1; u8 GroupPerm = Memory::Read_U8(Addr); Addr += 1; u8 OtherPerm = Memory::Read_U8(Addr); Addr += 1; u8 Attributes = Memory::Read_U8(Addr); Addr += 1; INFO_LOG(WII_IPC_FILEIO, "FS: SetAttrib %s", Filename.c_str()); DEBUG_LOG(WII_IPC_FILEIO, " OwnerID: 0x%08x", OwnerID); DEBUG_LOG(WII_IPC_FILEIO, " GroupID: 0x%04x", GroupID); DEBUG_LOG(WII_IPC_FILEIO, " OwnerPerm: 0x%02x", OwnerPerm); DEBUG_LOG(WII_IPC_FILEIO, " GroupPerm: 0x%02x", GroupPerm); DEBUG_LOG(WII_IPC_FILEIO, " OtherPerm: 0x%02x", OtherPerm); DEBUG_LOG(WII_IPC_FILEIO, " Attributes: 0x%02x", Attributes); return FS_RESULT_OK; } break; case IOCTL_GET_ATTR: { _dbg_assert_msg_(WII_IPC_FILEIO, _BufferOutSize == 76, " GET_ATTR needs an 76 bytes large output buffer but it is %i bytes large", _BufferOutSize); u32 OwnerID = 0; u16 GroupID = 0x3031; // this is also known as makercd, 01 (0x3031) for nintendo and 08 (0x3038) for MH3 etc std::string Filename = HLE_IPC_BuildFilename((const char*)Memory::GetPointer(_BufferIn), 64); u8 OwnerPerm = 0x3; // read/write u8 GroupPerm = 0x3; // read/write u8 OtherPerm = 0x3; // read/write u8 Attributes = 0x00; // no attributes if (File::IsDirectory(Filename)) { INFO_LOG(WII_IPC_FILEIO, "FS: GET_ATTR Directory %s - all permission flags are set", Filename.c_str()); } else { if (File::Exists(Filename)) { INFO_LOG(WII_IPC_FILEIO, "FS: GET_ATTR %s - all permission flags are set", Filename.c_str()); } else { INFO_LOG(WII_IPC_FILEIO, "FS: GET_ATTR unknown %s", Filename.c_str()); return FS_FILE_NOT_EXIST; } } // write answer to buffer if (_BufferOutSize == 76) { u32 Addr = _BufferOut; Memory::Write_U32(OwnerID, Addr); Addr += 4; Memory::Write_U16(GroupID, Addr); Addr += 2; memcpy(Memory::GetPointer(Addr), Memory::GetPointer(_BufferIn), 64); Addr += 64; Memory::Write_U8(OwnerPerm, Addr); Addr += 1; Memory::Write_U8(GroupPerm, Addr); Addr += 1; Memory::Write_U8(OtherPerm, Addr); Addr += 1; Memory::Write_U8(Attributes, Addr); Addr += 1; } return FS_RESULT_OK; } break; case IOCTL_DELETE_FILE: { _dbg_assert_(WII_IPC_FILEIO, _BufferOutSize == 0); int Offset = 0; std::string Filename = HLE_IPC_BuildFilename((const char*)Memory::GetPointer(_BufferIn+Offset), 64); Offset += 64; if (File::Delete(Filename)) { INFO_LOG(WII_IPC_FILEIO, "FS: DeleteFile %s", Filename.c_str()); } else if (File::DeleteDir(Filename)) { INFO_LOG(WII_IPC_FILEIO, "FS: DeleteDir %s", Filename.c_str()); } else { WARN_LOG(WII_IPC_FILEIO, "FS: DeleteFile %s - failed!!!", Filename.c_str()); } return FS_RESULT_OK; } break; case IOCTL_RENAME_FILE: { _dbg_assert_(WII_IPC_FILEIO, _BufferOutSize == 0); int Offset = 0; std::string Filename = HLE_IPC_BuildFilename((const char*)Memory::GetPointer(_BufferIn+Offset), 64); Offset += 64; std::string FilenameRename = HLE_IPC_BuildFilename((const char*)Memory::GetPointer(_BufferIn+Offset), 64); Offset += 64; // try to make the basis directory File::CreateFullPath(FilenameRename); // if there is already a file, delete it if (File::Exists(Filename) && File::Exists(FilenameRename)) { File::Delete(FilenameRename); } // finally try to rename the file if (File::Rename(Filename, FilenameRename)) { INFO_LOG(WII_IPC_FILEIO, "FS: Rename %s to %s", Filename.c_str(), FilenameRename.c_str()); } else { ERROR_LOG(WII_IPC_FILEIO, "FS: Rename %s to %s - failed", Filename.c_str(), FilenameRename.c_str()); return FS_FILE_NOT_EXIST; } return FS_RESULT_OK; } break; case IOCTL_CREATE_FILE: { _dbg_assert_(WII_IPC_FILEIO, _BufferOutSize == 0); u32 Addr = _BufferIn; u32 OwnerID = Memory::Read_U32(Addr); Addr += 4; u16 GroupID = Memory::Read_U16(Addr); Addr += 2; std::string Filename(HLE_IPC_BuildFilename((const char*)Memory::GetPointer(Addr), 64)); Addr += 64; u8 OwnerPerm = Memory::Read_U8(Addr); Addr++; u8 GroupPerm = Memory::Read_U8(Addr); Addr++; u8 OtherPerm = Memory::Read_U8(Addr); Addr++; u8 Attributes = Memory::Read_U8(Addr); Addr++; INFO_LOG(WII_IPC_FILEIO, "FS: CreateFile %s", Filename.c_str()); DEBUG_LOG(WII_IPC_FILEIO, " OwnerID: 0x%08x", OwnerID); DEBUG_LOG(WII_IPC_FILEIO, " GroupID: 0x%04x", GroupID); DEBUG_LOG(WII_IPC_FILEIO, " OwnerPerm: 0x%02x", OwnerPerm); DEBUG_LOG(WII_IPC_FILEIO, " GroupPerm: 0x%02x", GroupPerm); DEBUG_LOG(WII_IPC_FILEIO, " OtherPerm: 0x%02x", OtherPerm); DEBUG_LOG(WII_IPC_FILEIO, " Attributes: 0x%02x", Attributes); // check if the file already exist if (File::Exists(Filename)) { WARN_LOG(WII_IPC_FILEIO, "\tresult = FS_RESULT_EXISTS"); return FS_FILE_EXIST; } // create the file File::CreateFullPath(Filename); // just to be sure bool Result = File::CreateEmptyFile(Filename); if (!Result) { ERROR_LOG(WII_IPC_FILEIO, "CWII_IPC_HLE_Device_fs: couldn't create new file"); PanicAlert("CWII_IPC_HLE_Device_fs: couldn't create new file"); return FS_RESULT_FATAL; } INFO_LOG(WII_IPC_FILEIO, "\tresult = FS_RESULT_OK"); return FS_RESULT_OK; } break; case IOCTL_SHUTDOWN: { INFO_LOG(WII_IPC_FILEIO, "Wii called Shutdown()"); // TODO: stop emulation } break; default: ERROR_LOG(WII_IPC_FILEIO, "CWII_IPC_HLE_Device_fs::IOCtl: ni 0x%x", _Parameter); PanicAlert("CWII_IPC_HLE_Device_fs::IOCtl: ni 0x%x", _Parameter); break; } return FS_RESULT_FATAL; }
bool CWII_IPC_HLE_Device_fs::IOCtlV(u32 _CommandAddress) { u32 ReturnValue = FS_RESULT_OK; SIOCtlVBuffer CommandBuffer(_CommandAddress); // Prepare the out buffer(s) with zeros as a safety precaution // to avoid returning bad values for(u32 i = 0; i < CommandBuffer.NumberPayloadBuffer; i++) { Memory::Memset(CommandBuffer.PayloadBuffer[i].m_Address, 0, CommandBuffer.PayloadBuffer[i].m_Size); } switch(CommandBuffer.Parameter) { case IOCTLV_READ_DIR: { // the wii uses this function to define the type (dir or file) std::string DirName(HLE_IPC_BuildFilename((const char*)Memory::GetPointer( CommandBuffer.InBuffer[0].m_Address), CommandBuffer.InBuffer[0].m_Size)); INFO_LOG(WII_IPC_FILEIO, "FS: IOCTL_READ_DIR %s", DirName.c_str()); if (!File::Exists(DirName)) { WARN_LOG(WII_IPC_FILEIO, "FS: Search not found: %s", DirName.c_str()); ReturnValue = FS_FILE_NOT_EXIST; break; } else if (!File::IsDirectory(DirName)) { // It's not a directory, so error. // Games don't usually seem to care WHICH error they get, as long as it's < // Well the system menu CARES! WARN_LOG(WII_IPC_FILEIO, "\tNot a directory - return FS_RESULT_FATAL"); ReturnValue = FS_RESULT_FATAL; break; } // make a file search CFileSearch::XStringVector Directories; Directories.push_back(DirName); CFileSearch::XStringVector Extensions; Extensions.push_back("*.*"); CFileSearch FileSearch(Extensions, Directories); // it is one if ((CommandBuffer.InBuffer.size() == 1) && (CommandBuffer.PayloadBuffer.size() == 1)) { size_t numFile = FileSearch.GetFileNames().size(); INFO_LOG(WII_IPC_FILEIO, "\t%lu files found", (unsigned long)numFile); Memory::Write_U32((u32)numFile, CommandBuffer.PayloadBuffer[0].m_Address); } else { u32 MaxEntries = Memory::Read_U32(CommandBuffer.InBuffer[0].m_Address); memset(Memory::GetPointer(CommandBuffer.PayloadBuffer[0].m_Address), 0, CommandBuffer.PayloadBuffer[0].m_Size); size_t numFiles = 0; char* pFilename = (char*)Memory::GetPointer((u32)(CommandBuffer.PayloadBuffer[0].m_Address)); for (size_t i=0; i<FileSearch.GetFileNames().size(); i++) { if (i >= MaxEntries) break; std::string name, ext; SplitPath(FileSearch.GetFileNames()[i], NULL, &name, &ext); std::string FileName = name + ext; // Decode entities of invalid file system characters so that // games (such as HP:HBP) will be able to find what they expect. for (const Common::replace_t& r : replacements) { for (size_t j = 0; (j = FileName.find(r.second, j)) != FileName.npos; ++j) FileName.replace(j, r.second.length(), 1, r.first); } strcpy(pFilename, FileName.c_str()); pFilename += FileName.length(); *pFilename++ = 0x00; // termination numFiles++; INFO_LOG(WII_IPC_FILEIO, "\tFound: %s", FileName.c_str()); } Memory::Write_U32((u32)numFiles, CommandBuffer.PayloadBuffer[1].m_Address); } ReturnValue = FS_RESULT_OK; } break; case IOCTLV_GETUSAGE: { _dbg_assert_(WII_IPC_FILEIO, CommandBuffer.PayloadBuffer.size() == 2); _dbg_assert_(WII_IPC_FILEIO, CommandBuffer.PayloadBuffer[0].m_Size == 4); _dbg_assert_(WII_IPC_FILEIO, CommandBuffer.PayloadBuffer[1].m_Size == 4); // this command sucks because it asks of the number of used // fsBlocks and inodes // It should be correct, but don't count on it... const char *relativepath = (const char*)Memory::GetPointer(CommandBuffer.InBuffer[0].m_Address); std::string path(HLE_IPC_BuildFilename(relativepath, CommandBuffer.InBuffer[0].m_Size)); u32 fsBlocks = 0; u32 iNodes = 0; INFO_LOG(WII_IPC_FILEIO, "IOCTL_GETUSAGE %s", path.c_str()); if (File::IsDirectory(path)) { // LPFaint99: After I found that setting the number of inodes to the number of children + 1 for the directory itself // I decided to compare with sneek which has the following 2 special cases which are // Copyright (C) 2009-2011 crediar http://code.google.com/p/sneek/ if ((memcmp(relativepath, "/title/00010001", 16 ) == 0 ) || (memcmp(relativepath, "/title/00010005", 16) == 0 )) { fsBlocks = 23; // size is size/0x4000 iNodes = 42; // empty folders return a FileCount of 1 } else { File::FSTEntry parentDir; // add one for the folder itself, allows some games to create their save files // R8XE52 (Jurassic: The Hunted), STEETR (Tetris Party Deluxe) now create their saves with this change iNodes = 1 + File::ScanDirectoryTree(path, parentDir); u64 totalSize = ComputeTotalFileSize(parentDir); // "Real" size, to be converted to nand blocks fsBlocks = (u32)(totalSize / (16 * 1024)); // one bock is 16kb } ReturnValue = FS_RESULT_OK; INFO_LOG(WII_IPC_FILEIO, "FS: fsBlock: %i, iNodes: %i", fsBlocks, iNodes); } else { fsBlocks = 0; iNodes = 0; ReturnValue = FS_RESULT_OK; WARN_LOG(WII_IPC_FILEIO, "FS: fsBlock failed, cannot find directory: %s", path.c_str()); } Memory::Write_U32(fsBlocks, CommandBuffer.PayloadBuffer[0].m_Address); Memory::Write_U32(iNodes, CommandBuffer.PayloadBuffer[1].m_Address); } break; default: PanicAlert("CWII_IPC_HLE_Device_fs::IOCtlV: %i", CommandBuffer.Parameter); break; } Memory::Write_U32(ReturnValue, _CommandAddress+4); return true; }
void VideoBackend::InitBackendInfo() { HRESULT hr = DX11::D3D::LoadDXGI(); if (SUCCEEDED(hr)) hr = DX11::D3D::LoadD3D(); if (FAILED(hr)) { DX11::D3D::UnloadDXGI(); return; } g_Config.backend_info.api_type = APIType::D3D; g_Config.backend_info.MaxTextureSize = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; g_Config.backend_info.bUsesLowerLeftOrigin = false; g_Config.backend_info.bSupportsExclusiveFullscreen = true; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsOversizedViewports = false; g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsComputeShaders = false; g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupportsPostProcessing = true; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsLogicOp = true; g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsGPUTextureDecoding = true; g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsCopyToVram = true; g_Config.backend_info.bSupportsLargePoints = false; g_Config.backend_info.bSupportsPartialDepthCopies = false; g_Config.backend_info.bSupportsBitfield = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsBackgroundCompiling = true; IDXGIFactory2* factory; IDXGIAdapter* ad; hr = DX11::PCreateDXGIFactory(__uuidof(IDXGIFactory2), (void**)&factory); if (FAILED(hr)) PanicAlert("Failed to create IDXGIFactory object"); // adapters g_Config.backend_info.Adapters.clear(); g_Config.backend_info.AAModes.clear(); while (factory->EnumAdapters((UINT)g_Config.backend_info.Adapters.size(), &ad) != DXGI_ERROR_NOT_FOUND) { const size_t adapter_index = g_Config.backend_info.Adapters.size(); DXGI_ADAPTER_DESC desc; ad->GetDesc(&desc); // TODO: These don't get updated on adapter change, yet if (adapter_index == g_Config.iAdapter) { std::vector<DXGI_SAMPLE_DESC> modes = DX11::D3D::EnumAAModes(ad); // First iteration will be 1. This equals no AA. for (unsigned int i = 0; i < modes.size(); ++i) { g_Config.backend_info.AAModes.push_back(modes[i].Count); } D3D_FEATURE_LEVEL feature_level = D3D::GetFeatureLevel(ad); bool shader_model_5_supported = feature_level >= D3D_FEATURE_LEVEL_11_0; g_Config.backend_info.MaxTextureSize = D3D::GetMaxTextureSize(feature_level); // Requires the earlydepthstencil attribute (only available in shader model 5) g_Config.backend_info.bSupportsEarlyZ = shader_model_5_supported; // Requires full UAV functionality (only available in shader model 5) g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics = shader_model_5_supported; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = shader_model_5_supported; // Sample shading requires shader model 5 g_Config.backend_info.bSupportsSSAA = shader_model_5_supported; } g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description)); ad->Release(); } factory->Release(); DX11::D3D::UnloadDXGI(); DX11::D3D::UnloadD3D(); }
void FramebufferManager::ResizeEFBTextures() { DestroyEFBFramebuffer(); if (!CreateEFBFramebuffer()) PanicAlert("Failed to create EFB textures"); }
inline void GenerateVertexShader(T& out, u32 components, const XFMemory &xfr, const BPMemory &bpm, bool use_integer_math) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) bool uidPresent = (&out.template GetUidData<vertex_shader_uid_data>() != NULL); vertex_shader_uid_data dummy_data; vertex_shader_uid_data& uid_data = uidPresent ? out.template GetUidData<vertex_shader_uid_data>() : dummy_data; if (uidPresent) { out.ClearUID(); } if (Write_Code) { _dbg_assert_log_(VIDEO, bpm.genMode.numtexgens == xfr.numTexGen.numTexGens, "numTexGens mismatch bpmem: %u xfmem: %u", bpm.genMode.numtexgens.Value(), xfr.numTexGen.numTexGens); _dbg_assert_log_(VIDEO, bpm.genMode.numcolchans == xfr.numChan.numColorChans, "numColorChans mismatch bpmem: %u xfmem: %u", bpm.genMode.numcolchans.Value(), xfr.numChan.numColorChans); } uid_data.numTexGens = xfr.numTexGen.numTexGens; uid_data.components = components; bool lightingEnabled = xfr.numChan.numColorChans > 0; bool enable_pl = g_ActiveConfig.PixelLightingEnabled(xfr, components); bool needLightShader = lightingEnabled && !enable_pl; for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { const TexMtxInfo& texinfo = xfr.texMtxInfo[i]; needLightShader = needLightShader || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1; } uid_data.pixel_lighting = enable_pl; uid_data.numColorChans = xfr.numChan.numColorChans; if (!(api_type & API_D3D9)) { uid_data.msaa = g_ActiveConfig.iMultisamples > 1; uid_data.ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; } char * buffer = nullptr; if (Write_Code) { buffer = out.GetBuffer(); if (buffer == nullptr) { buffer = text; out.SetBuffer(text); } buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] = 0x7C; // canary // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else if (api_type == API_D3D11) out.Write("cbuffer VSBlock : register(b0) {\n"); DeclareUniform<T, api_type>(out, C_PROJECTION, "float4", I_PROJECTION"[4]"); DeclareUniform<T, api_type>(out, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); DeclareUniform<T, api_type>(out, C_MATERIALS, "float4", I_MATERIALS"[4]"); DeclareUniform<T, api_type>(out, C_LIGHTS, "float4", I_LIGHTS"[40]"); DeclareUniform<T, api_type>(out, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); DeclareUniform<T, api_type>(out, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); DeclareUniform<T, api_type>(out, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); DeclareUniform<T, api_type>(out, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); DeclareUniform<T, api_type>(out, C_PLOFFSETPARAMS, "float4", I_PLOFFSETPARAMS"[13]"); if (api_type == API_OPENGL || api_type == API_D3D11) out.Write("};\n"); out.Write("struct VS_OUTPUT {\n"); GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr); out.Write("};\n"); if (api_type == API_OPENGL) { out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); if (components & VB_HAS_NRM0) out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); if (components & VB_HAS_NRM1) out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); if (components & VB_HAS_NRM2) out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); if (components & VB_HAS_COL0) out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); if (components & VB_HAS_COL1) out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); } if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("out VertexData {\n"); GenerateVSOutputMembers<T, api_type>(out, enable_pl, xfr, GetInterpolationQualifier(api_type, false, true)); out.Write("} vs;\n"); } else { const char* optCentroid = GetInterpolationQualifier(api_type); // Let's set up attributes if (xfr.numTexGen.numTexGens < 7) { for (int i = 0; i < 8; ++i) out.Write("%s out float3 uv%d_2;\n", optCentroid, i); out.Write("%s out float4 clipPos_2;\n", optCentroid); if (enable_pl) out.Write("%s out float4 Normal_2;\n", optCentroid); } else { // wpos is in w of first 4 texcoords if (enable_pl) { for (int i = 0; i < 8; ++i) out.Write("%s out float4 uv%d_2;\n", optCentroid, i); } else { for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) out.Write("%s out float%d uv%d_2;\n", optCentroid, i < 4 ? 4 : 3, i); } } out.Write("%s out float4 colors_0;\n", optCentroid); out.Write("%s out float4 colors_1;\n", optCentroid); } out.Write("void main()\n{\n"); } else { out.Write("VS_OUTPUT main(\n"); // inputs if (components & VB_HAS_NRM0) out.Write(" float3 rawnorm0 : NORMAL0,\n"); if (components & VB_HAS_NRM1) out.Write(" float3 rawnorm1 : NORMAL1,\n"); if (components & VB_HAS_NRM2) out.Write(" float3 rawnorm2 : NORMAL2,\n"); if (components & VB_HAS_COL0) out.Write(" float4 color0 : COLOR0,\n"); if (components & VB_HAS_COL1) out.Write(" float4 color1 : COLOR1,\n"); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0 << i)); if ((components & (VB_HAS_UV0 << i)) || hastexmtx) out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } out.Write(" float4 blend_indices : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } out.Write("VS_OUTPUT o;\n"); if (api_type & API_D3D9) { out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n"); } // transforms if (api_type & API_D3D9) { out.Write("int posmtx = indices.x;\n"); } else if (api_type == API_D3D11) { out.Write("int posmtx = blend_indices.x * 255.0;\n"); } else { out.Write("int posmtx = int(fposmtx);\n"); } out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); if (components & VB_HAS_NRMALL) { out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); } if (components & VB_HAS_NRM0) out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); if (components & VB_HAS_NRM1) out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); if (components & VB_HAS_NRM2) out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); if (!(components & VB_HAS_NRM0)) out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n"); out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); if (api_type & API_D3D9) { //Write Pos offset for Point/Line Rendering out.Write("o.pos.xy = o.pos.xy + " I_PLOFFSETPARAMS"[indices.z].xy * o.pos.w;\n"); } if (needLightShader) { out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); if (use_integer_math) { out.Write("int4 ilacc;\n"); } } if (!lightingEnabled) { if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } } if (needLightShader) GenerateLightingShader<T, Write_Code>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_", xfr, use_integer_math); // special case if only pos and tex coord 0 and tex coord input is AB11 // donko - this has caused problems in some games. removed for now. bool texGenSpecialCase = false; /*bool texGenSpecialCase = ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0 (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) && (xfr.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); */ if (Write_Code) { if (xfr.numChan.numColorChans < 2 && needLightShader) { if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } // transform texcoords out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"); } for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { const TexMtxInfo& texinfo = xfr.texMtxInfo[i]; uid_data.texMtxInfo[i].sourcerow = xfr.texMtxInfo[i].sourcerow; if (Write_Code) { out.Write("{\n"); out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n"); switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCGEOM_INROW inputform: %u", texinfo.inputform); out.Write("coord = rawpos;\n"); // pos.w is 1 break; case XF_SRCNORMAL_INROW: if (components & VB_HAS_NRM0) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCNORMAL_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm0.xyz, 1.0);\n"); } break; case XF_SRCCOLORS_INROW: _dbg_assert_log_(VIDEO, texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 || texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1, "texgentype missmatch: %u", texinfo.texgentype); break; case XF_SRCBINORMAL_T_INROW: if (components & VB_HAS_NRM1) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_T_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm1.xyz, 1.0);\n"); } break; case XF_SRCBINORMAL_B_INROW: if (components & VB_HAS_NRM2) { _dbg_assert_log_(VIDEO, texinfo.inputform == XF_TEXINPUT_ABC1, "Incorrect inputform sourcerow: XF_SRCBINORMAL_B_INROW inputform: %u", texinfo.inputform); out.Write("coord = float4(rawnorm2.xyz, 1.0);\n"); } break; default: _dbg_assert_log_(VIDEO, texinfo.sourcerow <= XF_SRCTEX7_INROW, "sourcerow missmatch: %u", texinfo.sourcerow); if (components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); break; } } // first transformation uid_data.texMtxInfo[i].texgentype = xfr.texMtxInfo[i].texgentype; switch (texinfo.texgentype) { case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map if (components & (VB_HAS_NRM1 | VB_HAS_NRM2)) { // transform the light dir into tangent space uid_data.texMtxInfo[i].embosslightshift = xfr.texMtxInfo[i].embosslightshift; uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift; if (Write_Code) { out.Write("float3 eldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift)); out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(eldir, _norm1), dot(eldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift); } } else { // The following assert was triggered in House of the Dead Overkill and Star Wars Rogue Squadron 2 // uid_data.texMtxInfo[i].embosssourceshift = xfr.texMtxInfo[i].embosssourceshift; if (Write_Code) { _dbg_assert_log_(VIDEO, 0, "vertex normals spected"); out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); } } break; case XF_TEXGEN_COLOR_STRGBC0: if (Write_Code) { _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow); out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); } break; case XF_TEXGEN_COLOR_STRGBC1: if (Write_Code) { _dbg_assert_log_(VIDEO, texinfo.sourcerow == XF_SRCCOLORS_INROW, "sourcerow missmatch spected: XF_SRCCOLORS_INROW found: %u", texinfo.sourcerow); out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); } break; case XF_TEXGEN_REGULAR: default: uid_data.texMtxInfo_n_projection |= xfr.texMtxInfo[i].projection << i; if (Write_Code) { if (components & (VB_HAS_TEXMTXIDX0 << i)) { out.Write("int tmp = int(tex%d.z);\n", i); if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i); } else { if (texinfo.projection == XF_TEXPROJ_STQ) out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3 * i, 3 * i + 1, 3 * i + 2); else out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3 * i, 3 * i + 1); } } break; } uid_data.dualTexTrans_enabled = xfr.dualTexTrans.enabled; // CHECKME: does this only work for regular tex gen types? if (xfr.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) { const PostMtxInfo& postInfo = xfr.postMtxInfo[i]; uid_data.postMtxInfo[i].index = xfr.postMtxInfo[i].index; int postidx = postInfo.index; if (Write_Code) { out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n", postidx & 0x3f, (postidx + 1) & 0x3f, (postidx + 2) & 0x3f); } if (texGenSpecialCase) { // no normalization // q of input is 1 // q of output is unknown // multiply by postmatrix if (Write_Code) out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i); } else { uid_data.postMtxInfo[i].normalize = xfr.postMtxInfo[i].normalize; if (Write_Code) { if (postInfo.normalize) out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); // multiply by postmatrix out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); } } } if (Write_Code) out.Write("}\n"); } if (Write_Code) { // clipPos/w needs to be done in pixel shader, not here if (xfr.numTexGen.numTexGens < 7) { out.Write("o.clipPos%s = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n", (api_type == API_OPENGL) ? "_2" : ""); } else { out.Write("o.tex0.w = pos.x;\n"); out.Write("o.tex1.w = pos.y;\n"); out.Write("o.tex2.w = o.pos.z;\n"); out.Write("o.tex3.w = o.pos.w;\n"); } if (enable_pl) { if (xfr.numTexGen.numTexGens < 7) { out.Write("o.Normal%s = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n", (api_type == API_OPENGL) ? "_2" : ""); } else { out.Write("o.tex4.w = _norm0.x;\n"); out.Write("o.tex5.w = _norm0.y;\n"); out.Write("o.tex6.w = _norm0.z;\n"); if (xfr.numTexGen.numTexGens < 8) out.Write("o.tex7 = pos.xyzz;\n"); else out.Write("o.tex7.w = pos.z;\n"); } if (components & VB_HAS_COL0) out.Write("o.colors_0 = color0;\n"); else out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); if (components & VB_HAS_COL1) out.Write("o.colors_1 = color1;\n"); else out.Write("o.colors_1 = o.colors_0;\n"); } //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //if not early z culling will improve speed if (g_ActiveConfig.backend_info.bSupportsClipControl) { out.Write("o.pos.z = -o.pos.z;\n"); } else if (api_type & API_D3D9 || api_type == API_D3D11) { out.Write("o.pos.z = -((" I_DEPTHPARAMS".x - 1.0) * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y);\n"); } else { // this results in a scale from -1..0 to -1..1 after perspective // divide out.Write("o.pos.z = o.pos.z * -2.0 - o.pos.w;\n"); // the next steps of the OGL pipeline are: // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // clipping to -w_c <= (x_c,y_c,z_c) <= w_c // (x_d,y_d,z_d) = (x_c,y_c,z_c)/w_c//perspective divide // z_w = (f-n)/2*z_d + (n+f)/2 // z_w now contains the value to go to the 0..1 depth buffer //trying to get the correct semantic while not using glDepthRange //seems to get rather complicated } // The console GPU places the pixel center at 7/12 in screen space unless // antialiasing is enabled, while D3D11 and OpenGL place it at 0.5, and D3D9 at 0. This results // in some primitives being placed one pixel too far to the bottom-right, // which in turn can be critical if it happens for clear quads. // Hence, we compensate for this pixel center difference so that primitives // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy + o.pos.w * " I_DEPTHPARAMS".zw;\n"); if (api_type & API_D3D9) { // Write Texture Offsets for Point/Line Rendering for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) { out.Write("o.tex%d.xy = o.tex%d.xy + (" I_PLOFFSETPARAMS"[indices.w].zw * " I_PLOFFSETPARAMS"[indices.y + %d].%s );\n", i, i, ((i / 4) + 1), texOffsetMemberSelector[i % 4]); } } if (api_type == API_OPENGL) { if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { AssignVSOutputMembers<T, api_type>(out, "vs", "o", enable_pl, xfr); } else { if (xfr.numTexGen.numTexGens < 7) { for (unsigned int i = 0; i < 8; ++i) { if (i < xfr.numTexGen.numTexGens) out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i); else out.Write(" uv%d_2.xyz = float3(0.0, 0.0, 0.0);\n", i); } out.Write(" clipPos_2 = o.clipPos;\n"); if (enable_pl) out.Write(" Normal_2 = o.Normal;\n"); } else { // clip position is in w of first 4 texcoords if (enable_pl) { for (int i = 0; i < 8; ++i) out.Write(" uv%d_2 = o.tex%d;\n", i, i); } else { for (unsigned int i = 0; i < xfr.numTexGen.numTexGens; ++i) out.Write(" uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz", i); } } out.Write("colors_0 = o.colors_0;\n"); out.Write("colors_1 = o.colors_1;\n"); } out.Write("gl_Position = o.pos;\n"); out.Write("}\n"); } else { out.Write("return o;\n}\n"); } if (buffer[VERTEXSHADERGEN_BUFFERSIZE - 1] != 0x7C) PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); } if (uidPresent) { out.CalculateUIDHash(); } }
void VideoBackend::InitBackendInfo() { HRESULT hr = D3D::LoadDXGI(); if (FAILED(hr)) return; hr = D3D::LoadD3D(); if (FAILED(hr)) { D3D::UnloadDXGI(); return; } g_Config.backend_info.api_type = APIType::D3D; g_Config.backend_info.bSupportsExclusiveFullscreen = false; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsOversizedViewports = false; g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false; IDXGIFactory* factory; IDXGIAdapter* ad; hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) { PanicAlert("Failed to create IDXGIFactory object"); D3D::UnloadD3D(); D3D::UnloadDXGI(); return; } // adapters g_Config.backend_info.Adapters.clear(); g_Config.backend_info.AAModes.clear(); while (factory->EnumAdapters((UINT)g_Config.backend_info.Adapters.size(), &ad) != DXGI_ERROR_NOT_FOUND) { const size_t adapter_index = g_Config.backend_info.Adapters.size(); DXGI_ADAPTER_DESC desc; ad->GetDesc(&desc); // TODO: These don't get updated on adapter change, yet if (adapter_index == g_Config.iAdapter) { ID3D12Device* temp_device; hr = d3d12_create_device(ad, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&temp_device)); if (SUCCEEDED(hr)) { std::string samples; std::vector<DXGI_SAMPLE_DESC> modes = D3D::EnumAAModes(temp_device); // First iteration will be 1. This equals no AA. for (unsigned int i = 0; i < modes.size(); ++i) { g_Config.backend_info.AAModes.push_back(modes[i].Count); } // Requires the earlydepthstencil attribute (only available in shader model 5) g_Config.backend_info.bSupportsEarlyZ = true; // Requires full UAV functionality (only available in shader model 5) g_Config.backend_info.bSupportsBBox = true; // Requires the instance attribute (only available in shader model 5) g_Config.backend_info.bSupportsGSInstancing = true; // Sample shading requires shader model 5 g_Config.backend_info.bSupportsSSAA = true; temp_device->Release(); } } g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description)); ad->Release(); } factory->Release(); // Clear ppshaders string vector g_Config.backend_info.PPShaders.clear(); g_Config.backend_info.AnaglyphShaders.clear(); D3D::UnloadD3D(); D3D::UnloadDXGI(); }
inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, u32 components) { // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data& uid_data = (&out.template GetUidData<pixel_shader_uid_data>() != NULL) ? out.template GetUidData<pixel_shader_uid_data>() : dummy_data; #ifndef ANDROID locale_t locale; locale_t old_locale; #endif if (Write_Code) { out.SetBuffer(text); #ifndef ANDROID locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation old_locale = uselocale(locale); // Apply the locale for this thread #endif text[sizeof(text) - 1] = 0x7C; // canary } unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest(); const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z && dstAlphaMode != DSTALPHA_NULL); bool lightingEnabled = xfregs.numChan.numColorChans > 0; bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting && lightingEnabled; uid_data.pixel_lighting = enable_pl; uid_data.dstAlphaMode = dstAlphaMode; uid_data.genMode_numindstages = bpmem.genMode.numindstages; uid_data.genMode_numtevstages = bpmem.genMode.numtevstages; uid_data.genMode_numtexgens = bpmem.genMode.numtexgens; if (ApiType == API_D3D11) { uid_data.tex_pcformat.samp0 = TextureCache::getStagePCelementCount(0); uid_data.tex_pcformat.samp1 = TextureCache::getStagePCelementCount(1); uid_data.tex_pcformat.samp2 = TextureCache::getStagePCelementCount(2); uid_data.tex_pcformat.samp3 = TextureCache::getStagePCelementCount(3); uid_data.tex_pcformat.samp4 = TextureCache::getStagePCelementCount(4); uid_data.tex_pcformat.samp5 = TextureCache::getStagePCelementCount(5); uid_data.tex_pcformat.samp6 = TextureCache::getStagePCelementCount(6); uid_data.tex_pcformat.samp7 = TextureCache::getStagePCelementCount(7); } if (Write_Code) { InitializeRegisterState(); out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, numTexgen, bpmem.genMode.numindstages); out.Write(headerUtil); if (ApiType == API_OPENGL) { // Declare samplers for (u32 i = 0; i < 8; ++i) out.Write("uniform sampler2D samp%d;\n", i); } else { // Declare samplers for (u32 i = 0; i < 8; ++i) out.Write("%s samp%d : register(s%d);\n", (ApiType == API_D3D11) ? "sampler" : "uniform sampler2D", i, i); if (ApiType == API_D3D11) { out.Write("\n"); for (u32 i = 0; i < 8; ++i) { out.Write("Texture2D Tex%d : register(t%d);\n", i, i); } } } out.Write("\n"); if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("layout(std140) uniform PSBlock {\n"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_COLORS, "float4", I_COLORS "[4]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_KCOLORS, "float4", I_KCOLORS "[4]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ALPHA, "float4", I_ALPHA); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXDIMS, "float4", I_TEXDIMS "[8]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_ZBIAS, "float4", I_ZBIAS "[2]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXSCALE, "float4", I_INDTEXSCALE "[2]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_INDTEXMTX, "float4", I_INDTEXMTX "[6]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_FOG, "float4", I_FOG "[3]"); if (enable_pl) { DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PLIGHTS, "float4", I_PLIGHTS "[40]"); DeclareUniform<T, ApiType>(out, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PMATERIALS, "float4", I_PMATERIALS "[4]"); } if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) out.Write("};\n"); if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) out.Write("out vec4 ocol1;\n"); if (per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); out.Write("VARYIN float4 colors_02;\n"); out.Write("VARYIN float4 colors_12;\n"); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes if (numTexgen < 7) { for (u32 i = 0; i < numTexgen; ++i) { out.Write("VARYIN float3 uv%d_2;\n", i); } out.Write("VARYIN float4 clipPos_2;\n"); if (enable_pl) { out.Write("VARYIN float4 Normal_2;\n"); } } else { // wpos is in w of first 4 texcoords if (enable_pl) { for (u32 i = 0; i < 8; ++i) { out.Write("VARYIN float4 uv%d_2;\n", i); } } else { for (u32 i = 0; i < numTexgen; ++i) { out.Write("VARYIN float%d uv%d_2;\n", i < 4 ? 4 : 3, i); } } out.Write("float4 clipPos;\n"); } if (forced_early_z) { // HACK: This doesn't force the driver to write to depth buffer if alpha test fails. // It just allows it, but it seems that all drivers do. out.Write("layout(early_fragment_tests) in;\n"); } out.Write("void main()\n{\n"); } else { if (forced_early_z) { out.Write("[earlydepthstencil]\n"); } out.Write("void main(\n"); if (ApiType != API_D3D11) { out.Write(" out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "", per_pixel_depth ? "\n out float depth : DEPTH," : "", ApiType == API_D3D9_SM20 ? "POSITION" : "VPOS"); } else { out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", per_pixel_depth ? "\n out float depth : SV_Depth," : ""); } // "centroid" attribute is only supported by D3D11 const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : ""); out.Write(" in %s float4 colors_0 : COLOR0,\n", optCentroid); out.Write(" in %s float4 colors_1 : COLOR1", optCentroid); // compute window position if needed because binding semantic WPOS is not widely supported if (numTexgen < 7) { for (unsigned int i = 0; i < numTexgen; ++i) out.Write(",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i); out.Write(",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen); if (enable_pl) out.Write(",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1); out.Write(" ) {\n"); } else { // wpos is in w of first 4 texcoords if (enable_pl) { for (u32 i = 0; i < 8; ++i) out.Write(",\n in float4 uv%d : TEXCOORD%d", i, i); } else { for (u32 i = 0; i < numTexgen; ++i) out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3, i, i); } out.Write(" ) {\n"); out.Write("float4 clipPos = float4(0.0,0.0,0.0,0.0);"); } } if (dstAlphaMode == DSTALPHA_NULL) { out.Write("ocol0 = float4(0.0,0.0,0.0,0.0);\n"); out.Write("}\n"); if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); #ifndef ANDROID uselocale(old_locale); // restore locale freelocale(locale); #endif return; } out.Write("float4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS "[3], prev = " I_COLORS "[0];\n" "float4 tex_t = float4(0.0,0.0,0.0,0.0), ras_t = float4(0.0,0.0,0.0,0.0), konst_t = float4(0.0,0.0,0.0,0.0);\n" "float3 c16 = float3(1.0,256.0,0.0), c24 = float3(1.0,256.0,256.0*256.0);\n" "float a_bump=0.0;\n" "float3 tevcoord=float3(0.0,0.0,0.0);\n" "float2 wrappedcoord=float2(0.0,0.0), t_coord=float2(0.0,0.0),ittmpexp=float2(0.0,0.0);\n" "float4 tin_a = float4(0.0,0.0,0.0,0.0), tin_b = float4(0.0,0.0,0.0,0.0), tin_c = float4(0.0,0.0,0.0,0.0), tin_d = float4(0.0,0.0,0.0,0.0);\n\n"); if (ApiType == API_OPENGL) { // On Mali, global variables must be initialized as constants. // This is why we initialize these variables locally instead. out.Write("float4 rawpos = gl_FragCoord;\n"); out.Write("float4 colors_0 = colors_02;\n"); out.Write("float4 colors_1 = colors_12;\n"); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes if (numTexgen < 7) { if (numTexgen) { for (u32 i = 0; i < numTexgen; ++i) { out.Write("float3 uv%d = uv%d_2;\n", i, i); } } out.Write("float4 clipPos = clipPos_2;\n"); if (enable_pl) { out.Write("float4 Normal = Normal_2;\n"); } } else { // wpos is in w of first 4 texcoords if (enable_pl) { for (u32 i = 0; i < 8; ++i) { out.Write("float4 uv%d = uv%d_2;\n", i, i); } } else { for (u32 i = 0; i < numTexgen; ++i) { out.Write("float%d uv%d = uv%d_2;\n", i < 4 ? 4 : 3, i, i); } } } } } if (enable_pl) { if (Write_Code) { if (xfregs.numChan.numColorChans > 0) { if (numTexgen < 7) { out.Write("float3 _norm0 = normalize(Normal.xyz);\n\n"); out.Write("float3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); } else { out.Write("float3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n"); out.Write("float3 pos = float3(uv0.w,uv1.w,uv7.w);\n"); } out.Write("float4 mat, lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); } } uid_data.components = components; GenerateLightingShader<T, Write_Code>(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); } if (Write_Code) { if (numTexgen < 7) out.Write("clipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); else out.Write("clipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n"); } // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { if (Write_Code) out.Write("float3 uv0 = float3(0.0,0.0,0.0);\n"); } else { for (unsigned int i = 0; i < numTexgen; ++i) { // optional perspective divides uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (Write_Code) { if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { out.Write("if (uv%d.z != 0.0)", i); out.Write("\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } out.Write("uv%d.xy = round(128.0 * uv%d.xy * " I_TEXDIMS"[%d].zw);\n", i, i, i); } } } // indirect texture map lookup int nIndirectStagesUsed = 0; if (bpmem.genMode.numindstages > 0) { for (unsigned int i = 0; i < numStages; ++i) { if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; } } uid_data.nIndirectStagesUsed = nIndirectStagesUsed; for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { if (nIndirectStagesUsed & (1 << i)) { unsigned int texcoord = bpmem.tevindref.getTexCoord(i); unsigned int texmap = bpmem.tevindref.getTexMap(i); uid_data.SetTevindrefValues(i, texcoord, texmap); if (Write_Code) { if (texcoord < numTexgen) { out.Write("t_coord = BSH(uv%d.xy , " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); } else { out.Write("t_coord = float2(0.0,0.0);\n"); } out.Write("float3 indtex%d = ", i); SampleTexture<T, Write_Code, ApiType>(out, "(t_coord/128.0)", "abg", texmap); } } } // Uid fields for BuildSwapModeTable are set in WriteStage char swapModeTable[4][5]; const char* swapColors = "rgba"; for (int i = 0; i < 4; i++) { swapModeTable[i][0] = swapColors[bpmem.tevksel[i * 2].swap1]; swapModeTable[i][1] = swapColors[bpmem.tevksel[i * 2].swap2]; swapModeTable[i][2] = swapColors[bpmem.tevksel[i * 2 + 1].swap1]; swapModeTable[i][3] = swapColors[bpmem.tevksel[i * 2 + 1].swap2]; swapModeTable[i][4] = '\0'; } for (unsigned int i = 0; i < numStages; i++) WriteStage<T, Write_Code, ApiType>(out, uid_data, i, swapModeTable); // build the equation for this stage if (Write_Code) { u32 colorCdest = bpmem.combiners[numStages - 1].colorC.dest; u32 alphaCdest = bpmem.combiners[numStages - 1].alphaC.dest; if (numStages) { // The results of the last texenv stage are put onto the screen, // regardless of the used destination register if (colorCdest != 0) { out.Write("prev.rgb = %s;\n", tevCOutputTable[colorCdest]); } if (alphaCdest != 0) { out.Write("prev.a = %s;\n", tevAOutputTable[alphaCdest]); } } // emulation of unsigned 8 overflow if (TevOverflowState[tevCOutputSourceMap[colorCdest]] || TevOverflowState[tevAOutputSourceMap[alphaCdest]]) out.Write("prev = CHK_O_U8(prev);\n"); } AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data.Pretest = Pretest; // NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled // (in this case we need to write a depth value if depth test passes regardless of the alpha testing result) if (Pretest != AlphaTest::PASS) WriteAlphaTest<T, Write_Code, ApiType>(out, uid_data, dstAlphaMode, per_pixel_depth); // D3D9 doesn't support readback of depth in pixel shader, so we always have to calculate it again. // This shouldn't be a performance issue as the written depth is usually still from perspective division // but this isn't true for z-textures, so there will be depth issues between enabled and disabled z-textures fragments if ((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc) { if (Write_Code) { out.Write("float zCoord = rawpos.z;\n"); } } else { // the screen space depth value = far z + (clip z / clip w) * z range if (Write_Code) { out.Write("float zCoord = " I_ZBIAS "[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS "[1].y;\n"); } } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either const bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel; uid_data.ztex_op = bpmem.ztex2.op; uid_data.per_pixel_depth = per_pixel_depth; uid_data.forced_early_z = forced_early_z; uid_data.fast_depth_calc = g_ActiveConfig.bFastDepthCalc; uid_data.early_ztest = bpmem.UseEarlyDepthTest(); uid_data.fog_fsel = bpmem.fog.c_proj_fsel.fsel; // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest() && Write_Code) out.Write("depth = zCoord;\n"); // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { // use the texture input of the last texture stage (tex_t), hopefully this has been read and is in correct format... if (Write_Code) { out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, tex_t.xyzw * (1.0/255.0)) + " I_ZBIAS "[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); // U24 overflow emulation disabled because problems caused by float rounding //out.Write("zCoord = CHK_O_U24(zCoord);\n"); } } if (per_pixel_depth && bpmem.UseLateDepthTest() && Write_Code) out.Write("depth = zCoord;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { if (Write_Code) out.Write("ocol0 = float4(prev.rgb," I_ALPHA ".a) * (1.0/255.0);\n"); } else { WriteFog<T, Write_Code>(out, uid_data); if (Write_Code) out.Write("ocol0 = prev * (1.0/255.0);\n"); } // Use dual-source color blending to perform dst alpha in a single pass if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) { if (Write_Code) { if (ApiType & API_D3D9) { // alpha component must be 0 or the shader will not compile (Direct3D 9Ex restriction) // Colors will be blended against the color from ocol1 in D3D 9... out.Write("ocol1 = float4(prev.a, prev.a, prev.a, 0.0) * (1.0/255.0);\n"); } else { // Colors will be blended against the alpha from ocol1... out.Write("ocol1 = prev * (1.0/255.0);\n"); } // ...and the alpha from ocol0 will be written to the framebuffer. out.Write("ocol0.a = " I_ALPHA ".a*(1.0/255.0);\n"); } } if (Write_Code) { out.Write("}\n"); if (text[sizeof(text) - 1] != 0x7C) PanicAlert("PixelShader generator - buffer too small, canary has been eaten!"); #ifndef ANDROID uselocale(old_locale); // restore locale freelocale(locale); #endif } }
static void ExceptionThread(mach_port_t port) { Common::SetCurrentThreadName("Mach exception thread"); #pragma pack(4) struct { mach_msg_header_t Head; NDR_record_t NDR; exception_type_t exception; mach_msg_type_number_t codeCnt; int64_t code[2]; int flavor; mach_msg_type_number_t old_stateCnt; natural_t old_state[x86_THREAD_STATE64_COUNT]; mach_msg_trailer_t trailer; } msg_in; struct { mach_msg_header_t Head; NDR_record_t NDR; kern_return_t RetCode; int flavor; mach_msg_type_number_t new_stateCnt; natural_t new_state[x86_THREAD_STATE64_COUNT]; } msg_out; #pragma pack() memset(&msg_in, 0xee, sizeof(msg_in)); memset(&msg_out, 0xee, sizeof(msg_out)); mach_msg_header_t *send_msg = nullptr; mach_msg_size_t send_size = 0; mach_msg_option_t option = MACH_RCV_MSG; while (true) { // If this isn't the first run, send the reply message. Then, receive // a message: either a mach_exception_raise_state RPC due to // thread_set_exception_ports, or MACH_NOTIFY_NO_SENDERS due to // mach_port_request_notification. CheckKR("mach_msg_overwrite", mach_msg_overwrite(send_msg, option, send_size, sizeof(msg_in), port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL, &msg_in.Head, 0)); if (msg_in.Head.msgh_id == MACH_NOTIFY_NO_SENDERS) { // the other thread exited mach_port_destroy(mach_task_self(), port); return; } if (msg_in.Head.msgh_id != 2406) { PanicAlert("unknown message received"); return; } if (msg_in.flavor != x86_THREAD_STATE64) { PanicAlert("unknown flavor %d (expected %d)", msg_in.flavor, x86_THREAD_STATE64); return; } x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state; bool ok = JitInterface::HandleFault((uintptr_t) msg_in.code[1], state); // Set up the reply. msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0); msg_out.Head.msgh_remote_port = msg_in.Head.msgh_remote_port; msg_out.Head.msgh_local_port = MACH_PORT_NULL; msg_out.Head.msgh_id = msg_in.Head.msgh_id + 100; msg_out.NDR = msg_in.NDR; if (ok) { msg_out.RetCode = KERN_SUCCESS; msg_out.flavor = x86_THREAD_STATE64; msg_out.new_stateCnt = x86_THREAD_STATE64_COUNT; memcpy(msg_out.new_state, msg_in.old_state, x86_THREAD_STATE64_COUNT * sizeof(natural_t)); } else { // Pass the exception to the next handler (debugger or crash). msg_out.RetCode = KERN_FAILURE; msg_out.flavor = 0; msg_out.new_stateCnt = 0; } msg_out.Head.msgh_size = offsetof(__typeof__(msg_out), new_state) + msg_out.new_stateCnt * sizeof(natural_t); send_msg = &msg_out.Head; send_size = msg_out.Head.msgh_size; option |= MACH_SEND_MSG; } }
void RegisterMMIO(MMIO::Mapping* mmio, u32 base) { // Declare all the boilerplate direct MMIOs. struct { u32 addr; u16* ptr; bool align_writes_on_32_bytes; } directly_mapped_vars[] = { { AR_INFO, &g_ARAM_Info.Hex }, { AR_MODE, &g_AR_MODE }, { AR_REFRESH, &g_AR_REFRESH }, { AR_DMA_MMADDR_H, MMIO::Utils::HighPart(&g_arDMA.MMAddr) }, { AR_DMA_MMADDR_L, MMIO::Utils::LowPart(&g_arDMA.MMAddr), true }, { AR_DMA_ARADDR_H, MMIO::Utils::HighPart(&g_arDMA.ARAddr) }, { AR_DMA_ARADDR_L, MMIO::Utils::LowPart(&g_arDMA.ARAddr), true }, { AR_DMA_CNT_H, MMIO::Utils::HighPart(&g_arDMA.Cnt.Hex) }, // AR_DMA_CNT_L triggers DMA { AUDIO_DMA_START_HI, MMIO::Utils::HighPart(&g_audioDMA.SourceAddress) }, { AUDIO_DMA_START_LO, MMIO::Utils::LowPart(&g_audioDMA.SourceAddress) }, }; for (auto& mapped_var : directly_mapped_vars) { u16 write_mask = mapped_var.align_writes_on_32_bytes ? 0xFFE0 : 0xFFFF; mmio->Register(base | mapped_var.addr, MMIO::DirectRead<u16>(mapped_var.ptr), MMIO::DirectWrite<u16>(mapped_var.ptr, write_mask) ); } // DSP mail MMIOs call DSP emulator functions to get results or write data. mmio->Register(base | DSP_MAIL_TO_DSP_HI, MMIO::ComplexRead<u16>([](u32) { if (dsp_slice > DSP_MAIL_SLICE && dsp_is_lle) { dsp_emulator->DSP_Update(DSP_MAIL_SLICE); dsp_slice -= DSP_MAIL_SLICE; } return dsp_emulator->DSP_ReadMailBoxHigh(true); }), MMIO::ComplexWrite<u16>([](u32, u16 val) { dsp_emulator->DSP_WriteMailBoxHigh(true, val); }) ); mmio->Register(base | DSP_MAIL_TO_DSP_LO, MMIO::ComplexRead<u16>([](u32) { return dsp_emulator->DSP_ReadMailBoxLow(true); }), MMIO::ComplexWrite<u16>([](u32, u16 val) { dsp_emulator->DSP_WriteMailBoxLow(true, val); }) ); mmio->Register(base | DSP_MAIL_FROM_DSP_HI, MMIO::ComplexRead<u16>([](u32) { if (dsp_slice > DSP_MAIL_SLICE && dsp_is_lle) { dsp_emulator->DSP_Update(DSP_MAIL_SLICE); dsp_slice -= DSP_MAIL_SLICE; } return dsp_emulator->DSP_ReadMailBoxHigh(false); }), MMIO::InvalidWrite<u16>() ); mmio->Register(base | DSP_MAIL_FROM_DSP_LO, MMIO::ComplexRead<u16>([](u32) { return dsp_emulator->DSP_ReadMailBoxLow(false); }), MMIO::InvalidWrite<u16>() ); mmio->Register(base | DSP_CONTROL, MMIO::ComplexRead<u16>([](u32) { return (g_dspState.DSPControl.Hex & ~DSP_CONTROL_MASK) | (dsp_emulator->DSP_ReadControlRegister() & DSP_CONTROL_MASK); }), MMIO::ComplexWrite<u16>([](u32, u16 val) { UDSPControl tmpControl; tmpControl.Hex = (val & ~DSP_CONTROL_MASK) | (dsp_emulator->DSP_WriteControlRegister(val) & DSP_CONTROL_MASK); // Not really sure if this is correct, but it works... // Kind of a hack because DSP_CONTROL_MASK should make this bit // only viewable to dsp emulator if (val & 1 /*DSPReset*/) { g_audioDMA.AudioDMAControl.Hex = 0; } // Update DSP related flags g_dspState.DSPControl.DSPReset = tmpControl.DSPReset; g_dspState.DSPControl.DSPAssertInt = tmpControl.DSPAssertInt; g_dspState.DSPControl.DSPHalt = tmpControl.DSPHalt; g_dspState.DSPControl.DSPInit = tmpControl.DSPInit; // Interrupt (mask) g_dspState.DSPControl.AID_mask = tmpControl.AID_mask; g_dspState.DSPControl.ARAM_mask = tmpControl.ARAM_mask; g_dspState.DSPControl.DSP_mask = tmpControl.DSP_mask; // Interrupt if (tmpControl.AID) g_dspState.DSPControl.AID = 0; if (tmpControl.ARAM) g_dspState.DSPControl.ARAM = 0; if (tmpControl.DSP) g_dspState.DSPControl.DSP = 0; // unknown g_dspState.DSPControl.unk3 = tmpControl.unk3; g_dspState.DSPControl.pad = tmpControl.pad; if (g_dspState.DSPControl.pad != 0) { PanicAlert("DSPInterface (w) g_dspState.DSPControl (CC00500A) gets a value with junk in the padding %08x", val); } UpdateInterrupts(); }) ); // ARAM MMIO controlling the DMA start. mmio->Register(base | AR_DMA_CNT_L, MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&g_arDMA.Cnt.Hex)), MMIO::ComplexWrite<u16>([](u32, u16 val) { g_arDMA.Cnt.Hex = (g_arDMA.Cnt.Hex & 0xFFFF0000) | (val & ~31); Do_ARAM_DMA(); }) ); // Audio DMA MMIO controlling the DMA start. mmio->Register(base | AUDIO_DMA_CONTROL_LEN, MMIO::DirectRead<u16>(&g_audioDMA.AudioDMAControl.Hex), MMIO::ComplexWrite<u16>([](u32, u16 val) { g_audioDMA.AudioDMAControl.Hex = val; g_audioDMA.ReadAddress = g_audioDMA.SourceAddress; g_audioDMA.BlocksLeft = g_audioDMA.AudioDMAControl.NumBlocks; }) ); // Audio DMA blocks remaining is invalid to write to, and requires logic on // the read side. mmio->Register(base | AUDIO_DMA_BLOCKS_LEFT, MMIO::ComplexRead<u16>([](u32) { return (g_audioDMA.BlocksLeft > 0 ? g_audioDMA.BlocksLeft - 1 : 0); }), MMIO::InvalidWrite<u16>() ); // 32 bit reads/writes are a combination of two 16 bit accesses. for (int i = 0; i < 0x1000; i += 4) { mmio->Register(base | i, MMIO::ReadToSmaller<u32>(mmio, base | i, base | (i + 2)), MMIO::WriteToSmaller<u32>(mmio, base | i, base | (i + 2)) ); } }