void MipsJit::Compile(u32 em_address) { PROFILE_THIS_SCOPE("jitc"); if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) { ClearCache(); } int block_num = blocks.AllocateBlock(em_address); JitBlock *b = blocks.GetBlock(block_num); DoJit(em_address, b); blocks.FinalizeBlock(block_num, jo.enableBlocklink); bool cleanSlate = false; if (js.hasSetRounding && !js.lastSetRounding) { WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); // Won't loop, since hasSetRounding is only ever set to 1. js.lastSetRounding = js.hasSetRounding; cleanSlate = true; } if (cleanSlate) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); Compile(em_address); } }
Shader::Shader(const char *code, uint32_t shaderType, bool useHWTransform, const ShaderID &shaderID) : failed_(false), useHWTransform_(useHWTransform), id_(shaderID) { PROFILE_THIS_SCOPE("shadercomp"); source_ = code; #ifdef SHADERLOG OutputDebugStringUTF8(code); #endif shader = glCreateShader(shaderType); glShaderSource(shader, 1, &code, 0); glCompileShader(shader); GLint success; glGetShaderiv(shader, GL_COMPILE_STATUS, &success); if (!success) { #define MAX_INFO_LOG_SIZE 2048 GLchar infoLog[MAX_INFO_LOG_SIZE]; GLsizei len; glGetShaderInfoLog(shader, MAX_INFO_LOG_SIZE, &len, infoLog); infoLog[len] = '\0'; #ifdef ANDROID ELOG("Error in shader compilation! %s\n", infoLog); ELOG("Shader source:\n%s\n", (const char *)code); #endif ERROR_LOG(G3D, "Error in shader compilation!\n"); ERROR_LOG(G3D, "Info log: %s\n", infoLog); ERROR_LOG(G3D, "Shader source:\n%s\n", (const char *)code); Reporting::ReportMessage("Error in shader compilation: info: %s / code: %s", infoLog, (const char *)code); #ifdef SHADERLOG OutputDebugStringUTF8(infoLog); #endif failed_ = true; shader = 0; } else { DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); } }
void hleLagSync(u64 userdata, int cyclesLate) { // The goal here is to prevent network, audio, and input lag from the real world. // Our normal timing is very "stop and go". This is efficient, but causes real world lag. // This event (optionally) runs every 1ms to sync with the real world. PROFILE_THIS_SCOPE("timing"); if (!FrameTimingThrottled()) { lagSyncScheduled = false; return; } float scale = 1.0f; if (PSP_CoreParameter().fpsLimit == FPS_LIMIT_CUSTOM) { // 0 is handled in FrameTimingThrottled(). scale = 60.0f / g_Config.iFpsLimit; } const double goal = lastLagSync + (scale / 1000.0f); time_update(); // Don't lag too long ever, if they leave it paused. while (time_now_d() < goal && goal < time_now_d() + 0.01) { #ifndef _WIN32 const double left = goal - time_now_d(); usleep((long)(left * 1000000)); #endif time_update(); } const int emuOver = (int)cyclesToUs(cyclesLate); const int over = (int)((time_now_d() - goal) * 1000000); ScheduleLagSync(over - emuOver); }
void IRJit::RunLoopUntil(u64 globalticks) { PROFILE_THIS_SCOPE("jit"); // ApplyRoundingMode(true); // IR Dispatcher while (true) { // RestoreRoundingMode(true); CoreTiming::Advance(); // ApplyRoundingMode(true); if (coreState != 0) { break; } while (mips_->downcount >= 0) { u32 inst = Memory::ReadUnchecked_U32(mips_->pc); u32 opcode = inst & 0xFF000000; if (opcode == MIPS_EMUHACK_OPCODE) { u32 data = inst & 0xFFFFFF; IRBlock *block = blocks_.GetBlock(data); mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); } else { // RestoreRoundingMode(true); Compile(mips_->pc); // ApplyRoundingMode(true); } } } // RestoreRoundingMode(true); }
// Maybe should write this in ASM... void GPU_Vulkan::FastRunLoop(DisplayList &list) { PROFILE_THIS_SCOPE("gpuloop"); const CommandInfo *cmdInfo = cmdInfo_; int dc = downcount; for (; dc > 0; --dc) { // We know that display list PCs have the upper nibble == 0 - no need to mask the pointer const u32 op = *(const u32 *)(Memory::base + list.pc); const u32 cmd = op >> 24; const CommandInfo info = cmdInfo[cmd]; const u8 cmdFlags = info.flags; // If we stashed the cmdFlags in the top bits of the cmdmem, we could get away with one table lookup instead of two const u32 diff = op ^ gstate.cmdmem[cmd]; // Inlined CheckFlushOp here to get rid of the dumpThisFrame_ check. if ((cmdFlags & FLAG_FLUSHBEFORE) || (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE))) { drawEngine_.Flush(curCmd_); } gstate.cmdmem[cmd] = op; // TODO: no need to write if diff==0... if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) { downcount = dc; (this->*info.func)(op, diff); dc = downcount; } else if (diff) { uint64_t dirty = info.flags >> 8; if (dirty) gstate_c.Dirty(dirty); } list.pc += 4; } downcount = 0; }
// TODO: All this setup is so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. void DrawEngineGLES::ApplyDrawState(int prim) { if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); if (gstate_c.needShaderTexClamp) { // We will rarely need to set this, so let's do it every time on use rather than in runloop. // Most of the time non-framebuffer textures will be used which can be clamped themselves. gstate_c.Dirty(DIRTY_TEXCLAMP); } } // Start profiling here to skip SetTexture which is already accounted for PROFILE_THIS_SCOPE("applydrawstate"); bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; gstate_c.allowShaderBlend = !g_Config.bDisableSlowFramebufEffects; // Do the large chunks of state conversion. We might be able to hide these two behind a dirty-flag each, // to avoid recomputing heavy stuff unnecessarily every draw call. GenericBlendState blendState; ConvertBlendState(blendState, gstate_c.allowShaderBlend); if (blendState.applyShaderBlending) { if (ApplyShaderBlending()) { // We may still want to do something about stencil -> alpha. ApplyStencilReplaceAndLogicOp(blendState.replaceAlphaWithStencil, blendState); } else { // Until next time, force it off. ResetShaderBlending(); gstate_c.allowShaderBlend = false; } } else if (blendState.resetShaderBlending) { ResetShaderBlending(); } if (blendState.enabled) { glstate.blend.enable(); glstate.blendEquationSeparate.set(glBlendEqLookup[(size_t)blendState.eqColor], glBlendEqLookup[(size_t)blendState.eqAlpha]); glstate.blendFuncSeparate.set( glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor], glBlendFactorLookup[(size_t)blendState.srcAlpha], glBlendFactorLookup[(size_t)blendState.dstAlpha]); if (blendState.dirtyShaderBlend) { gstate_c.Dirty(DIRTY_SHADERBLEND); } if (blendState.useBlendColor) { uint32_t color = blendState.blendColor; const float col[4] = { (float)((color & 0xFF) >> 0) * (1.0f / 255.0f), (float)((color & 0xFF00) >> 8) * (1.0f / 255.0f), (float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f), (float)((color & 0xFF000000) >> 24) * (1.0f / 255.0f), }; glstate.blendColor.set(col); }
void SoftGPU::FastRunLoop(DisplayList &list) { PROFILE_THIS_SCOPE("soft_runloop"); for (; downcount > 0; --downcount) { u32 op = Memory::ReadUnchecked_U32(list.pc); u32 cmd = op >> 24; u32 diff = op ^ gstate.cmdmem[cmd]; gstate.cmdmem[cmd] = op; ExecuteOp(op, diff); list.pc += 4; } }
void IRJit::Compile(u32 em_address) { PROFILE_THIS_SCOPE("jitc"); int block_num = blocks_.AllocateBlock(em_address); IRBlock *b = blocks_.GetBlock(block_num); std::vector<IRInst> instructions; std::vector<u32> constants; u32 mipsBytes; frontend_.DoJit(em_address, instructions, constants, mipsBytes); b->SetInstructions(instructions, constants); b->SetOriginalSize(mipsBytes); b->Finalize(block_num); // Overwrites the first instruction if (frontend_.CheckRounding()) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); Compile(em_address); } }
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, const char *code, bool useHWTransform) : vulkan_(vulkan), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(0) { PROFILE_THIS_SCOPE("shadercomp"); source_ = code; std::string errorMessage; std::vector<uint32_t> spirv; #ifdef SHADERLOG OutputDebugStringA(code); #endif bool success = GLSLtoSPV(VK_SHADER_STAGE_FRAGMENT_BIT, code, spirv, &errorMessage); if (!errorMessage.empty()) { if (success) { ERROR_LOG(G3D, "Warnings in shader compilation!"); } else { ERROR_LOG(G3D, "Error in shader compilation!"); } ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str()); ERROR_LOG(G3D, "Shader source:\n%s", code); #ifdef SHADERLOG OutputDebugStringA("Messages:\n"); OutputDebugStringA(errorMessage.c_str()); OutputDebugStringA(code); #endif Reporting::ReportMessage("Vulkan error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code); } else { success = vulkan_->CreateShaderModule(spirv, &module_); #ifdef SHADERLOG OutputDebugStringA("OK\n"); #endif } if (!success) { failed_ = true; return; } else { DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code); } }
static void DoFrameIdleTiming() { PROFILE_THIS_SCOPE("timing"); if (!FrameTimingThrottled() || !g_Config.bEnableSound || wasPaused) { return; } time_update(); double dist = time_now_d() - lastFrameTime; // Ignore if the distance is just crazy. May mean wrap or pause. if (dist < 0.0 || dist >= 15 * timePerVblank) { return; } float scaledVblank = timePerVblank; if (PSP_CoreParameter().fpsLimit == FPS_LIMIT_CUSTOM) { // 0 is handled in FrameTimingThrottled(). scaledVblank *= 60.0f / g_Config.iFpsLimit; } // If we have over at least a vblank of spare time, maintain at least 30fps in delay. // This prevents fast forward during loading screens. const double thresh = lastFrameTime + (numVBlanksSinceFlip - 1) * scaledVblank; if (numVBlanksSinceFlip >= 2 && time_now_d() < thresh) { // Give a little extra wiggle room in case the next vblank does more work. const double goal = lastFrameTime + numVBlanksSinceFlip * scaledVblank - 0.001; while (time_now_d() < goal) { #ifdef _WIN32 sleep_ms(1); #else const double left = goal - time_now_d(); usleep((long)(left * 1000000)); #endif time_update(); } } }
VulkanPipeline *PipelineManagerVulkan::GetOrCreatePipeline(VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &rasterKey, const DecVtxFormat *decFmt, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) { VulkanPipelineKey key{}; _assert_msg_(G3D, renderPass, "Can't create a pipeline with a null renderpass"); key.raster = rasterKey; key.renderPass = renderPass; key.useHWTransform = useHwTransform; key.vShader = vs->GetModule(); key.fShader = fs->GetModule(); key.vtxDecId = useHwTransform ? decFmt->id : 0; auto iter = pipelines_.Get(key); if (iter) return iter; PROFILE_THIS_SCOPE("pipelinebuild"); VulkanPipeline *pipeline = CreateVulkanPipeline( vulkan_->GetDevice(), pipelineCache_, layout, renderPass, rasterKey, decFmt, vs, fs, useHwTransform, lineWidth_); // Even if the result is nullptr, insert it so we don't try to create it repeatedly. pipelines_.Insert(key, pipeline); return pipeline; }
void MipsJit::RunLoopUntil(u64 globalticks) { PROFILE_THIS_SCOPE("jit"); ((void (*)())enterCode)(); }
// Let's collect all the throttling and frameskipping logic here. static void DoFrameTiming(bool &throttle, bool &skipFrame, float timestep) { PROFILE_THIS_SCOPE("timing"); int fpsLimiter = PSP_CoreParameter().fpsLimit; throttle = FrameTimingThrottled(); skipFrame = false; // Check if the frameskipping code should be enabled. If neither throttling or frameskipping is on, // we have nothing to do here. bool doFrameSkip = g_Config.iFrameSkip != 0; if (!throttle && g_Config.bFrameSkipUnthrottle) { doFrameSkip = true; skipFrame = true; if (numSkippedFrames >= 7) { skipFrame = false; } return; } if (!throttle && !doFrameSkip) return; time_update(); float scaledTimestep = timestep; if (fpsLimiter == FPS_LIMIT_CUSTOM && g_Config.iFpsLimit != 0) { scaledTimestep *= 60.0f / g_Config.iFpsLimit; } if (lastFrameTime == 0.0 || wasPaused) { nextFrameTime = time_now_d() + scaledTimestep; if (wasPaused) wasPaused = false; } else { // Advance lastFrameTime by a constant amount each frame, // but don't let it get too far behind as things can get very jumpy. const double maxFallBehindFrames = 5.5; nextFrameTime = std::max(lastFrameTime + scaledTimestep, time_now_d() - maxFallBehindFrames * scaledTimestep); } curFrameTime = time_now_d(); // Auto-frameskip automatically if speed limit is set differently than the default. bool useAutoFrameskip = g_Config.bAutoFrameSkip && g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; if (g_Config.bAutoFrameSkip || (g_Config.iFrameSkip == 0 && fpsLimiter == FPS_LIMIT_CUSTOM && g_Config.iFpsLimit > 60)) { // autoframeskip // Argh, we are falling behind! Let's skip a frame and see if we catch up. if (curFrameTime > nextFrameTime && doFrameSkip) { skipFrame = true; } } else if (g_Config.iFrameSkip >= 1) { // fixed frameskip if (numSkippedFrames >= g_Config.iFrameSkip) skipFrame = false; else skipFrame = true; } if (curFrameTime < nextFrameTime && throttle) { // If time gap is huge just jump (somebody unthrottled) if (nextFrameTime - curFrameTime > 2*scaledTimestep) { nextFrameTime = curFrameTime; } else { // Wait until we've caught up. while (time_now_d() < nextFrameTime) { #ifdef _WIN32 sleep_ms(1); // Sleep for 1ms on this thread #else const double left = nextFrameTime - curFrameTime; usleep((long)(left * 1000000)); #endif time_update(); } } curFrameTime = time_now_d(); } lastFrameTime = nextFrameTime; }
void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType) { PROFILE_THIS_SCOPE("spline"); DispatchFlush(); // TODO: Verify correct functionality with < 4. if (count_u < 4 || count_v < 4) return; u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; const u8* indices8 = (const u8*)indices; const u16* indices16 = (const u16*)indices; if (indices) GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound); // Simplify away bones and morph before proceeding SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); u8 *temp_buffer = decoded + 65536 * 18; u32 origVertType = vertType; vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); VertexDecoder *vdecoder = GetVertexDecoder(vertType); int vertexSize = vdecoder->VertexSize(); if (vertexSize != sizeof(SimpleVertex)) { ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, (int)sizeof(SimpleVertex)); } // TODO: Do something less idiotic to manage this buffer SimpleVertex **points = new SimpleVertex *[count_u * count_v]; // Make an array of pointers to the control points, to get rid of indices. for (int idx = 0; idx < count_u * count_v; idx++) { if (indices) points[idx] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]); else points[idx] = simplified_control_points + idx; } int count = 0; u8 *dest = splineBuffer; SplinePatchLocal patch; patch.tess_u = tess_u; patch.tess_v = tess_v; patch.type_u = type_u; patch.type_v = type_v; patch.count_u = count_u; patch.count_v = count_v; patch.points = points; patch.computeNormals = computeNormals; patch.primType = prim_type; patch.patchFacing = patchFacing; int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize; TesselateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount); delete[] points; u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT; UVScale prevUVScale; if (g_Config.bPrescaleUV) { // We scaled during Normalize already so let's turn it off when drawing. prevUVScale = gstate_c.uv; gstate_c.uv.uScale = 1.0f; gstate_c.uv.vScale = 1.0f; gstate_c.uv.uOff = 0; gstate_c.uv.vOff = 0; } int bytesRead; DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &bytesRead); DispatchFlush(); if (g_Config.bPrescaleUV) { gstate_c.uv = prevUVScale; } }
void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead) { PROFILE_THIS_SCOPE("bezier"); DispatchFlush(); u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; IndexConverter idxConv(vertType, indices); if (indices) GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound); VertexDecoder *origVDecoder = GetVertexDecoder((vertType & 0xFFFFFF) | (gstate.getUVGenMode() << 24)); *bytesRead = count_u * count_v * origVDecoder->VertexSize(); // Real hardware seems to draw nothing when given < 4 either U or V. // This would result in num_patches_u / num_patches_v being 0. if (count_u < 4 || count_v < 4) { return; } // Simplify away bones and morph before proceeding // There are normally not a lot of control points so just splitting decoded should be reasonably safe, although not great. SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); u8 *temp_buffer = decoded + 65536 * 18; u32 origVertType = vertType; vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); VertexDecoder *vdecoder = GetVertexDecoder(vertType); int vertexSize = vdecoder->VertexSize(); if (vertexSize != sizeof(SimpleVertex)) { ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, (int)sizeof(SimpleVertex)); } float *pos = (float*)(decoded + 65536 * 18); // Size 4 float float *tex = pos + count_u * count_v * 4; // Size 4 float float *col = tex + count_u * count_v * 4; // Size 4 float const bool hasColor = (origVertType & GE_VTYPE_COL_MASK) != 0; const bool hasTexCoords = (origVertType & GE_VTYPE_TC_MASK) != 0; // Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing) int num_patches_u = (count_u - 1) / 3; int num_patches_v = (count_v - 1) / 3; BezierPatch *patches = nullptr; if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) { int posStride, texStride, colStride; tessDataTransfer->PrepareBuffers(pos, tex, col, posStride, texStride, colStride, count_u * count_v, hasColor, hasTexCoords); float *p = pos; float *t = tex; float *c = col; for (int idx = 0; idx < count_u * count_v; idx++) { SimpleVertex *point = simplified_control_points + (indices ? idxConv.convert(idx) : idx); memcpy(p, point->pos.AsArray(), 3 * sizeof(float)); p += posStride; if (hasTexCoords) { memcpy(t, point->uv, 2 * sizeof(float)); t += texStride; } if (hasColor) { memcpy(c, Vec4f::FromRGBA(point->color_32).AsArray(), 4 * sizeof(float)); c += colStride; } } if (!hasColor) { SimpleVertex *point = simplified_control_points + (indices ? idxConv.convert(0) : 0); memcpy(col, Vec4f::FromRGBA(point->color_32).AsArray(), 4 * sizeof(float)); } } else { patches = new BezierPatch[num_patches_u * num_patches_v]; for (int patch_u = 0; patch_u < num_patches_u; patch_u++) { for (int patch_v = 0; patch_v < num_patches_v; patch_v++) { BezierPatch& patch = patches[patch_u + patch_v * num_patches_u]; for (int point = 0; point < 16; ++point) { int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u; patch.points[point] = simplified_control_points + (indices ? idxConv.convert(idx) : idx); } patch.u_index = patch_u * 3; patch.v_index = patch_v * 3; patch.index = patch_v * num_patches_u + patch_u; patch.primType = prim_type; patch.computeNormals = computeNormals; patch.patchFacing = patchFacing; } } } int count = 0; u8 *dest = splineBuffer; // We shouldn't really split up into separate 4x4 patches, instead we should do something that works // like the splines, so we subdivide across the whole "mega-patch". // If specified as 0, uses 1. if (tess_u < 1) { tess_u = 1; } if (tess_v < 1) { tess_v = 1; } u16 *inds = quadIndices_; if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) { tessDataTransfer->SendDataToShader(pos, tex, col, count_u * count_v, hasColor, hasTexCoords); TessellateBezierPatchHardware(dest, inds, count, tess_u, tess_v, prim_type); numPatches = num_patches_u * num_patches_v; } else { int maxVertices = SPLINE_BUFFER_SIZE / vertexSize; // Downsample until it fits, in case crazy tessellation factors are sent. while ((tess_u + 1) * (tess_v + 1) * num_patches_u * num_patches_v > maxVertices) { tess_u /= 2; tess_v /= 2; } for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { const BezierPatch &patch = patches[patch_idx]; TessellateBezierPatch(dest, inds, count, tess_u, tess_v, patch, origVertType); } delete[] patches; } u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT; UVScale prevUVScale; if (origVertType & GE_VTYPE_TC_MASK) { // We scaled during Normalize already so let's turn it off when drawing. prevUVScale = gstate_c.uv; gstate_c.uv.uScale = 1.0f; gstate_c.uv.vScale = 1.0f; gstate_c.uv.uOff = 0; gstate_c.uv.vOff = 0; } uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode()); int generatedBytesRead; DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeID, &generatedBytesRead); DispatchFlush(); if (origVertType & GE_VTYPE_TC_MASK) { gstate_c.uv = prevUVScale; } }
void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead) { PROFILE_THIS_SCOPE("spline"); DispatchFlush(); u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; IndexConverter idxConv(vertType, indices); if (indices) GetIndexBounds(indices, count_u * count_v, vertType, &index_lower_bound, &index_upper_bound); VertexDecoder *origVDecoder = GetVertexDecoder((vertType & 0xFFFFFF) | (gstate.getUVGenMode() << 24)); *bytesRead = count_u * count_v * origVDecoder->VertexSize(); // Real hardware seems to draw nothing when given < 4 either U or V. if (count_u < 4 || count_v < 4) { return; } // Simplify away bones and morph before proceeding SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); u8 *temp_buffer = decoded + 65536 * 18; u32 origVertType = vertType; vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); VertexDecoder *vdecoder = GetVertexDecoder(vertType); int vertexSize = vdecoder->VertexSize(); if (vertexSize != sizeof(SimpleVertex)) { ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, (int)sizeof(SimpleVertex)); } // TODO: Do something less idiotic to manage this buffer SimpleVertex **points = new SimpleVertex *[count_u * count_v]; // Make an array of pointers to the control points, to get rid of indices. for (int idx = 0; idx < count_u * count_v; idx++) { points[idx] = simplified_control_points + (indices ? idxConv.convert(idx) : idx); } int count = 0; u8 *dest = splineBuffer; SplinePatchLocal patch; patch.tess_u = tess_u; patch.tess_v = tess_v; patch.type_u = type_u; patch.type_v = type_v; patch.count_u = count_u; patch.count_v = count_v; patch.points = points; patch.computeNormals = computeNormals; patch.primType = prim_type; patch.patchFacing = patchFacing; if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) { float *pos = (float*)(decoded + 65536 * 18); // Size 4 float float *tex = pos + count_u * count_v * 4; // Size 4 float float *col = tex + count_u * count_v * 4; // Size 4 float const bool hasColor = (origVertType & GE_VTYPE_COL_MASK) != 0; const bool hasTexCoords = (origVertType & GE_VTYPE_TC_MASK) != 0; int posStride, texStride, colStride; tessDataTransfer->PrepareBuffers(pos, tex, col, posStride, texStride, colStride, count_u * count_v, hasColor, hasTexCoords); float *p = pos; float *t = tex; float *c = col; for (int idx = 0; idx < count_u * count_v; idx++) { memcpy(p, points[idx]->pos.AsArray(), 3 * sizeof(float)); p += posStride; if (hasTexCoords) { memcpy(t, points[idx]->uv, 2 * sizeof(float)); t += texStride; } if (hasColor) { memcpy(c, Vec4f::FromRGBA(points[idx]->color_32).AsArray(), 4 * sizeof(float)); c += colStride; } } if (!hasColor) memcpy(col, Vec4f::FromRGBA(points[0]->color_32).AsArray(), 4 * sizeof(float)); tessDataTransfer->SendDataToShader(pos, tex, col, count_u * count_v, hasColor, hasTexCoords); TessellateSplinePatchHardware(dest, quadIndices_, count, patch); numPatches = (count_u - 3) * (count_v - 3); } else { int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize; TessellateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount); } delete[] points; u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT; UVScale prevUVScale; if ((origVertType & GE_VTYPE_TC_MASK) != 0) { // We scaled during Normalize already so let's turn it off when drawing. prevUVScale = gstate_c.uv; gstate_c.uv.uScale = 1.0f; gstate_c.uv.vScale = 1.0f; gstate_c.uv.uOff = 0.0f; gstate_c.uv.vOff = 0.0f; } uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode()); int generatedBytesRead; DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeID, &generatedBytesRead); DispatchFlush(); if ((origVertType & GE_VTYPE_TC_MASK) != 0) { gstate_c.uv = prevUVScale; } }
LinkedShader::LinkedShader(ShaderID VSID, Shader *vs, ShaderID FSID, Shader *fs, bool useHWTransform) : useHWTransform_(useHWTransform), program(0), dirtyUniforms(0) { PROFILE_THIS_SCOPE("shaderlink"); program = glCreateProgram(); vs_ = vs; glAttachShader(program, vs->shader); glAttachShader(program, fs->shader); // Bind attribute locations to fixed locations so that they're // the same in all shaders. We use this later to minimize the calls to // glEnableVertexAttribArray and glDisableVertexAttribArray. glBindAttribLocation(program, ATTR_POSITION, "position"); glBindAttribLocation(program, ATTR_TEXCOORD, "texcoord"); glBindAttribLocation(program, ATTR_NORMAL, "normal"); glBindAttribLocation(program, ATTR_W1, "w1"); glBindAttribLocation(program, ATTR_W2, "w2"); glBindAttribLocation(program, ATTR_COLOR0, "color0"); glBindAttribLocation(program, ATTR_COLOR1, "color1"); #if !defined(USING_GLES2) if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { // Dual source alpha glBindFragDataLocationIndexed(program, 0, 0, "fragColor0"); glBindFragDataLocationIndexed(program, 0, 1, "fragColor1"); } else if (gl_extensions.VersionGEThan(3, 3, 0)) { glBindFragDataLocation(program, 0, "fragColor0"); } #elif !defined(IOS) if (gl_extensions.GLES3) { if (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) { glBindFragDataLocationIndexedEXT(program, 0, 0, "fragColor0"); glBindFragDataLocationIndexedEXT(program, 0, 1, "fragColor1"); } } #endif glLinkProgram(program); GLint linkStatus = GL_FALSE; glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); if (linkStatus != GL_TRUE) { GLint bufLength = 0; glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength); if (bufLength) { char* buf = new char[bufLength]; glGetProgramInfoLog(program, bufLength, NULL, buf); #ifdef ANDROID ELOG("Could not link program:\n %s", buf); #endif ERROR_LOG(G3D, "Could not link program:\n %s", buf); ERROR_LOG(G3D, "VS desc:\n%s\n", vs->GetShaderString(SHADER_STRING_SHORT_DESC, VSID).c_str()); ERROR_LOG(G3D, "FS desc:\n%s\n", fs->GetShaderString(SHADER_STRING_SHORT_DESC, FSID).c_str()); std::string vs_source = vs->GetShaderString(SHADER_STRING_SOURCE_CODE, VSID); std::string fs_source = fs->GetShaderString(SHADER_STRING_SOURCE_CODE, FSID); ERROR_LOG(G3D, "VS:\n%s\n", vs_source.c_str()); ERROR_LOG(G3D, "FS:\n%s\n", fs_source.c_str()); Reporting::ReportMessage("Error in shader program link: info: %s / fs: %s / vs: %s", buf, fs_source.c_str(), vs_source.c_str()); #ifdef SHADERLOG OutputDebugStringUTF8(buf); OutputDebugStringUTF8(vs_source.c_str()); OutputDebugStringUTF8(fs_source.c_str()); #endif delete [] buf; // we're dead! } // Prevent a buffer overflow. numBones = 0; return; } INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader); u_tex = glGetUniformLocation(program, "tex"); u_proj = glGetUniformLocation(program, "u_proj"); u_proj_through = glGetUniformLocation(program, "u_proj_through"); u_texenv = glGetUniformLocation(program, "u_texenv"); u_fogcolor = glGetUniformLocation(program, "u_fogcolor"); u_fogcoef = glGetUniformLocation(program, "u_fogcoef"); u_alphacolorref = glGetUniformLocation(program, "u_alphacolorref"); u_alphacolormask = glGetUniformLocation(program, "u_alphacolormask"); u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue"); u_testtex = glGetUniformLocation(program, "testtex"); u_fbotex = glGetUniformLocation(program, "fbotex"); u_blendFixA = glGetUniformLocation(program, "u_blendFixA"); u_blendFixB = glGetUniformLocation(program, "u_blendFixB"); u_fbotexSize = glGetUniformLocation(program, "u_fbotexSize"); // Transform u_view = glGetUniformLocation(program, "u_view"); u_world = glGetUniformLocation(program, "u_world"); u_texmtx = glGetUniformLocation(program, "u_texmtx"); if (VSID.Bit(VS_BIT_ENABLE_BONES)) numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1); else numBones = 0; u_depthRange = glGetUniformLocation(program, "u_depthRange"); #ifdef USE_BONE_ARRAY u_bone = glGetUniformLocation(program, "u_bone"); #else for (int i = 0; i < 8; i++) { char name[10]; sprintf(name, "u_bone%i", i); u_bone[i] = glGetUniformLocation(program, name); } #endif // Lighting, texturing u_ambient = glGetUniformLocation(program, "u_ambient"); u_matambientalpha = glGetUniformLocation(program, "u_matambientalpha"); u_matdiffuse = glGetUniformLocation(program, "u_matdiffuse"); u_matspecular = glGetUniformLocation(program, "u_matspecular"); u_matemissive = glGetUniformLocation(program, "u_matemissive"); u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); u_texclamp = glGetUniformLocation(program, "u_texclamp"); u_texclampoff = glGetUniformLocation(program, "u_texclampoff"); for (int i = 0; i < 4; i++) { char temp[64]; sprintf(temp, "u_lightpos%i", i); u_lightpos[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightdir%i", i); u_lightdir[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightatt%i", i); u_lightatt[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightangle%i", i); u_lightangle[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightspotCoef%i", i); u_lightspotCoef[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightambient%i", i); u_lightambient[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightdiffuse%i", i); u_lightdiffuse[i] = glGetUniformLocation(program, temp); sprintf(temp, "u_lightspecular%i", i); u_lightspecular[i] = glGetUniformLocation(program, temp); } attrMask = 0; if (-1 != glGetAttribLocation(program, "position")) attrMask |= 1 << ATTR_POSITION; if (-1 != glGetAttribLocation(program, "texcoord")) attrMask |= 1 << ATTR_TEXCOORD; if (-1 != glGetAttribLocation(program, "normal")) attrMask |= 1 << ATTR_NORMAL; if (-1 != glGetAttribLocation(program, "w1")) attrMask |= 1 << ATTR_W1; if (-1 != glGetAttribLocation(program, "w2")) attrMask |= 1 << ATTR_W2; if (-1 != glGetAttribLocation(program, "color0")) attrMask |= 1 << ATTR_COLOR0; if (-1 != glGetAttribLocation(program, "color1")) attrMask |= 1 << ATTR_COLOR1; availableUniforms = 0; if (u_proj != -1) availableUniforms |= DIRTY_PROJMATRIX; if (u_proj_through != -1) availableUniforms |= DIRTY_PROJTHROUGHMATRIX; if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV; if (u_alphacolorref != -1) availableUniforms |= DIRTY_ALPHACOLORREF; if (u_alphacolormask != -1) availableUniforms |= DIRTY_ALPHACOLORMASK; if (u_fogcolor != -1) availableUniforms |= DIRTY_FOGCOLOR; if (u_fogcoef != -1) availableUniforms |= DIRTY_FOGCOEF; if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV; if (u_uvscaleoffset != -1) availableUniforms |= DIRTY_UVSCALEOFFSET; if (u_texclamp != -1) availableUniforms |= DIRTY_TEXCLAMP; if (u_world != -1) availableUniforms |= DIRTY_WORLDMATRIX; if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND; if (u_depthRange != -1) availableUniforms |= DIRTY_DEPTHRANGE; // Looping up to numBones lets us avoid checking u_bone[i] #ifdef USE_BONE_ARRAY if (u_bone != -1) { for (int i = 0; i < numBones; i++) { availableUniforms |= DIRTY_BONEMATRIX0 << i; } } #else for (int i = 0; i < numBones; i++) { if (u_bone[i] != -1) availableUniforms |= DIRTY_BONEMATRIX0 << i; } #endif if (u_ambient != -1) availableUniforms |= DIRTY_AMBIENT; if (u_matambientalpha != -1) availableUniforms |= DIRTY_MATAMBIENTALPHA; if (u_matdiffuse != -1) availableUniforms |= DIRTY_MATDIFFUSE; if (u_matemissive != -1) availableUniforms |= DIRTY_MATEMISSIVE; if (u_matspecular != -1) availableUniforms |= DIRTY_MATSPECULAR; for (int i = 0; i < 4; i++) { if (u_lightdir[i] != -1 || u_lightspecular[i] != -1 || u_lightpos[i] != -1) availableUniforms |= DIRTY_LIGHT0 << i; } glUseProgram(program); // Default uniform values glUniform1i(u_tex, 0); glUniform1i(u_fbotex, 1); glUniform1i(u_testtex, 2); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL; }
void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType) { PROFILE_THIS_SCOPE("bezier"); DispatchFlush(); // TODO: Verify correct functionality with < 4. if (count_u < 4 || count_v < 4) return; u16 index_lower_bound = 0; u16 index_upper_bound = count_u * count_v - 1; bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT; const u8* indices8 = (const u8*)indices; const u16* indices16 = (const u16*)indices; if (indices) GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound); // Simplify away bones and morph before proceeding // There are normally not a lot of control points so just splitting decoded should be reasonably safe, although not great. SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12); u8 *temp_buffer = decoded + 65536 * 18; u32 origVertType = vertType; vertType = NormalizeVertices((u8 *)simplified_control_points, temp_buffer, (u8 *)control_points, index_lower_bound, index_upper_bound, vertType); VertexDecoder *vdecoder = GetVertexDecoder(vertType); int vertexSize = vdecoder->VertexSize(); if (vertexSize != sizeof(SimpleVertex)) { ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, (int)sizeof(SimpleVertex)); } // Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing) int num_patches_u = (count_u - 1) / 3; int num_patches_v = (count_v - 1) / 3; BezierPatch* patches = new BezierPatch[num_patches_u * num_patches_v]; for (int patch_u = 0; patch_u < num_patches_u; patch_u++) { for (int patch_v = 0; patch_v < num_patches_v; patch_v++) { BezierPatch& patch = patches[patch_u + patch_v * num_patches_u]; for (int point = 0; point < 16; ++point) { int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u; if (indices) patch.points[point] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]); else patch.points[point] = simplified_control_points + idx; } patch.u_index = patch_u * 3; patch.v_index = patch_v * 3; patch.index = patch_v * num_patches_u + patch_u; patch.primType = prim_type; patch.computeNormals = computeNormals; patch.patchFacing = patchFacing; } } int count = 0; u8 *dest = splineBuffer; // Simple approximation of the real tesselation factor. // We shouldn't really split up into separate 4x4 patches, instead we should do something that works // like the splines, so we subdivide across the whole "mega-patch". if (num_patches_u == 0) num_patches_u = 1; if (num_patches_v == 0) num_patches_v = 1; if (tess_u < 4) tess_u = 4; if (tess_v < 4) tess_v = 4; u16 *inds = quadIndices_; int maxVertices = SPLINE_BUFFER_SIZE / vertexSize; for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) { BezierPatch& patch = patches[patch_idx]; TesselateBezierPatch(dest, inds, count, tess_u, tess_v, patch, origVertType, maxVertices); } delete[] patches; u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT; UVScale prevUVScale; if (g_Config.bPrescaleUV) { // We scaled during Normalize already so let's turn it off when drawing. prevUVScale = gstate_c.uv; gstate_c.uv.uScale = 1.0f; gstate_c.uv.vScale = 1.0f; gstate_c.uv.uOff = 0; gstate_c.uv.vOff = 0; } int bytesRead; DispatchSubmitPrim(splineBuffer, quadIndices_, primType[prim_type], count, vertTypeWithIndex16, &bytesRead); DispatchFlush(); if (g_Config.bPrescaleUV) { gstate_c.uv = prevUVScale; } }