Пример #1
0
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) {
	LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen);

	// Make an entry in the table
	VSCacheEntry entry;
	entry.shader = shader;

	vshaders[uid] = entry;
	last_entry = &vshaders[uid];
	if (!shader)
		return false;

	INCSTAT(stats.numVertexShadersCreated);
	SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());
	if (activate)
	{
		D3D::SetVertexShader(shader);
		return true;
	}
	return false;
}
Пример #2
0
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing, bool is_preprocess)
{
	if (!count)
		return 0;

	VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);

	int size = count * loader->m_VertexSize;
	if ((int)src.size() < size)
		return -1;

	if (skip_drawing || is_preprocess)
		return size;

	// If the native vertex format changed, force a flush.
	if (loader->m_native_vertex_format != s_current_vtx_fmt ||
	    loader->m_native_components != g_current_components)
	{
		VertexManagerBase::Flush();
	}
	s_current_vtx_fmt = loader->m_native_vertex_format;
	g_current_components = loader->m_native_components;

	// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
	// They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope.
	bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);

	DataReader dst = VertexManagerBase::PrepareForAdditionalData(primitive, count,
			loader->m_native_vtx_decl.stride, cullall);

	count = loader->RunVertices(src, dst, count);

	IndexGenerator::AddIndices(primitive, count);

	VertexManagerBase::FlushData(count, loader->m_native_vtx_decl.stride);

	ADDSTAT(stats.thisFrame.numPrims, count);
	INCSTAT(stats.thisFrame.numPrimitiveJoins);
	return size;
}
Пример #3
0
bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob)
{
  ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
  if (shader == nullptr)
    return false;

  // TODO: Somehow make the debug name a bit more specific
  D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a vertex shader of VertexShaderCache");

  // Make an entry in the table
  VSCacheEntry entry;
  entry.shader = shader;
  entry.SetByteCode(bcodeblob);

  vshaders[uid] = entry;
  last_entry = &vshaders[uid];

  INCSTAT(stats.numVertexShadersCreated);
  SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());

  return true;
}
Пример #4
0
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
{
	if (!count)
		return true;

	CPState* state = &g_main_cp_state;

	VertexLoader* loader = RefreshLoader(vtx_attr_group, state);

	size_t size = count * loader->GetVertexSize();
	if (buf_size < size)
		return false;

	if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
	{
		// if cull mode is CULL_ALL, ignore triangles and quads
		DataSkip((u32)size);
		return true;
	}

	NativeVertexFormat* native = loader->GetNativeVertexFormat();

	// If the native vertex format changed, force a flush.
	if (native != s_current_vtx_fmt)
		VertexManager::Flush();
	s_current_vtx_fmt = native;

	VertexManager::PrepareForAdditionalData(primitive, count,
			loader->GetNativeVertexDeclaration().stride);

	loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);

	IndexGenerator::AddIndices(primitive, count);

	ADDSTAT(stats.thisFrame.numPrims, count);
	INCSTAT(stats.thisFrame.numPrimitiveJoins);
	return true;
}
Пример #5
0
void VertexManager::Draw(u32 stride)
{
  u32 index_size = IndexGenerator::GetIndexLen();
  u32 max_index = IndexGenerator::GetNumVerts();
  GLenum primitive_mode = 0;

  switch (m_current_primitive_type)
  {
  case PRIMITIVE_POINTS:
    primitive_mode = GL_POINTS;
    glDisable(GL_CULL_FACE);
    break;
  case PRIMITIVE_LINES:
    primitive_mode = GL_LINES;
    glDisable(GL_CULL_FACE);
    break;
  case PRIMITIVE_TRIANGLES:
    primitive_mode =
        g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? GL_TRIANGLE_STRIP : GL_TRIANGLES;
    break;
  }

  if (g_ogl_config.bSupportsGLBaseVertex)
  {
    glDrawRangeElementsBaseVertex(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
                                  (u8*)nullptr + s_index_offset, (GLint)s_baseVertex);
  }
  else
  {
    glDrawRangeElements(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
                        (u8*)nullptr + s_index_offset);
  }

  INCSTAT(stats.thisFrame.numDrawCalls);

  if (m_current_primitive_type != PRIMITIVE_TRIANGLES)
    static_cast<Renderer*>(g_renderer.get())->SetGenerationMode();
}
Пример #6
0
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate)
{
	LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);

	// Make an entry in the table
	PSCacheEntry newentry;
	newentry.shader = shader;
	PixelShaders[uid] = newentry;
	last_entry = &PixelShaders[uid];

	if (!shader) {
		// INCSTAT(stats.numPixelShadersFailed);
		return false;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
	if (activate)
	{
		D3D::SetPixelShader(shader);
	}
	return true;
}
Пример #7
0
bool ConvertVertices(VertexLoaderParameters &parameters, u32 &readsize, u32 &writesize)
{
	if (parameters.needloaderrefresh)
	{
		UpdateLoader(parameters);
	}
	auto loader = g_main_cp_state.vertex_loaders[parameters.vtx_attr_group];
	if (!loader->EnvironmentIsSupported())
	{
		loader = loader->GetFallback();
	}
	readsize = parameters.count * loader->m_VertexSize;
	if (parameters.buf_size < readsize)
		return false;
	if (parameters.skip_draw)
	{
		return true;
	}
	// Lookup pointers for any vertex arrays.
	UpdateVertexArrayPointers();
	NativeVertexFormat *nativefmt = loader->m_native_vertex_format;
	// Flush if our vertex format is different from the currently set.
	if (s_current_vtx_fmt != nullptr && s_current_vtx_fmt != nativefmt)
	{
		VertexManagerBase::Flush();
	}
	s_current_vtx_fmt = nativefmt;
	g_current_components = loader->m_native_components;
	VertexManagerBase::PrepareForAdditionalData(parameters.primitive, parameters.count, loader->m_native_stride);
	parameters.destination = VertexManagerBase::s_pCurBufferPointer;		
	s32 finalcount = loader->RunVertices(parameters);
	writesize = loader->m_native_stride * finalcount;
	IndexGenerator::AddIndices(parameters.primitive, finalcount);
	ADDSTAT(stats.thisFrame.numPrims, finalcount);
	INCSTAT(stats.thisFrame.numPrimitiveJoins);
	return true;
}
Пример #8
0
bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
	ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
	if (shader == nullptr)
		return false;

	// TODO: Somehow make the debug name a bit more specific
	D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");

	// Make an entry in the table
	PSCacheEntry newentry;
	newentry.shader = shader;
	PixelShaders[uid] = newentry;
	last_entry = &PixelShaders[uid];

	if (!shader) {
		// INCSTAT(stats.numPixelShadersFailed);
		return false;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
	return true;
}
Пример #9
0
u8* OpcodeDecoder_Run(DataReader& reader, u32* cycles)
{
	u32 totalCycles = 0;
	u8* opcodeStart;
	while (true)
	{
		opcodeStart = reader.GetReadPosition();
		if (!reader.size())
			goto end;

		u8 cmd_byte = reader.Read<u8>();
		size_t distance = reader.size();

		switch (cmd_byte)
		{
		case GX_NOP:
		{
			totalCycles += GX_NOP_CYCLES; // Hm, this means that we scan over nop streams pretty slowly...
		}
		break;
		case GX_UNKNOWN_RESET:
		{
			totalCycles += GX_NOP_CYCLES; // Datel software uses this command
			DEBUG_LOG(VIDEO, "GX Reset?: %08x", cmd_byte);
		}
		break;
		case GX_LOAD_CP_REG:
		{
			if (sizeCheck && distance < GX_LOAD_CP_REG_SIZE)
				goto end;
			totalCycles += GX_LOAD_CP_REG_CYCLES;
			u8 sub_cmd = reader.Read<u8>();
			u32 value = reader.Read<u32>();
			LoadCPReg<is_preprocess>(sub_cmd, value);
			if (!is_preprocess)
				INCSTAT(stats.thisFrame.numCPLoads);
		}
		break;
		case GX_LOAD_XF_REG:
		{
			if (sizeCheck && distance < GX_LOAD_XF_REG_SIZE)
				goto end;
			u32 Cmd2 = reader.Read<u32>();
			distance -= GX_LOAD_XF_REG_SIZE;
			int transfer_size = ((Cmd2 >> 16) & 15) + 1;
			if (sizeCheck && distance < (transfer_size * sizeof(u32)))
				goto end;
			totalCycles += GX_LOAD_XF_REG_BASE_CYCLES + GX_LOAD_XF_REG_TRANSFER_CYCLES * transfer_size;
			if (is_preprocess)
			{
				reader.ReadSkip(transfer_size * sizeof(u32));
			}
			else
			{
				u32 xf_address = Cmd2 & 0xFFFF;
				LoadXFReg(transfer_size, xf_address);
				INCSTAT(stats.thisFrame.numXFLoads);
			}
		}
		break;
		case GX_LOAD_INDX_A: //used for position matrices
		case GX_LOAD_INDX_B: //used for normal matrices
		case GX_LOAD_INDX_C: //used for postmatrices
		case GX_LOAD_INDX_D: //used for lights
		{
			if (sizeCheck && distance < GX_LOAD_INDX_SIZE)
				goto end;
			totalCycles += GX_LOAD_INDX_CYCLES;
			const s32 ref_array = (cmd_byte >> 3) + 8;
			if (is_preprocess)
				PreprocessIndexedXF(reader.Read<u32>(), ref_array);
			else
				LoadIndexedXF(reader.Read<u32>(), ref_array);
		}
		break;
		case GX_CMD_CALL_DL:
		{
			if (sizeCheck && distance < GX_CMD_CALL_DL_SIZE)
				goto end;
			u32 address = reader.Read<u32>();
			u32 count = reader.Read<u32>();
			if (is_preprocess)
				InterpretDisplayListPreprocess(address, count);
			else
				totalCycles += GX_CMD_CALL_DL_BASE_CYCLES + InterpretDisplayList(address, count);
		}
		break;
		case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
		{
			totalCycles += GX_CMD_UNKNOWN_METRICS_CYCLES;
			DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
		}
		break;
		case GX_CMD_INVL_VC: // Invalidate Vertex Cache	
		{
			totalCycles += GX_CMD_INVL_VC_CYCLES;
			DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
		}
		break;
		case GX_LOAD_BP_REG:
		{
			if (sizeCheck && distance < GX_LOAD_BP_REG_SIZE)
				goto end;
			totalCycles += GX_LOAD_BP_REG_CYCLES;
			u32 bp_cmd = reader.Read<u32>();
			if (is_preprocess)
			{
				LoadBPRegPreprocess(bp_cmd);
			}
			else
			{
				LoadBPReg(bp_cmd);
				INCSTAT(stats.thisFrame.numBPLoads);
			}
		}
		break;
		// draw primitives 
		default:
			if ((cmd_byte & GX_DRAW_PRIMITIVES) == 0x80)
			{
				// load vertices
				if (sizeCheck && distance < GX_DRAW_PRIMITIVES_SIZE)
					goto end;

				u32 count = reader.Read<u16>();
				distance -= GX_DRAW_PRIMITIVES_SIZE;
				if (count)
				{
					CPState& state = is_preprocess ? g_preprocess_cp_state : g_main_cp_state;
					VertexLoaderParameters parameters;
					parameters.count = count;
					parameters.buf_size = distance;
					parameters.primitive = (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT;
					u32 vtx_attr_group = cmd_byte & GX_VAT_MASK;
					parameters.vtx_attr_group = vtx_attr_group;
					parameters.needloaderrefresh = (state.attr_dirty & (1u << vtx_attr_group)) != 0;
					parameters.skip_draw = g_bSkipCurrentFrame;
					parameters.VtxDesc = &state.vtx_desc;
					parameters.VtxAttr = &state.vtx_attr[vtx_attr_group];
					parameters.source = reader.GetReadPosition();
					state.attr_dirty &= ~(1 << vtx_attr_group);
					u32 readsize = 0;					
					if (is_preprocess)
					{
						u32 components = 0;
						VertexLoaderManager::GetVertexSizeAndComponents(parameters, readsize, components);
						readsize *= count;
						if (distance >= readsize)
						{
							totalCycles += GX_NOP_CYCLES + GX_DRAW_PRIMITIVES_CYCLES * parameters.count;
							reader.ReadSkip(readsize);
						}
						else
						{
							goto end;
						}
					}
					else
					{
						u32 writesize = 0;
						if (VertexLoaderManager::ConvertVertices(parameters, readsize, writesize))
						{
							totalCycles += GX_NOP_CYCLES + GX_DRAW_PRIMITIVES_CYCLES * parameters.count;
							reader.ReadSkip(readsize);
							VertexManagerBase::s_pCurBufferPointer += writesize;
						}
						else
						{
							goto end;
						}
					}
				}
				else
				{
					totalCycles += GX_NOP_CYCLES;
				}
			}
			else
			{
				if (!s_bFifoErrorSeen)
					UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
				ERROR_LOG(VIDEO, "FIFO: Unknown Opcode(0x%02x @ %p, preprocessing = %s)", cmd_byte, opcodeStart, is_preprocess ? "yes" : "no");
				s_bFifoErrorSeen = true;
				totalCycles += 1;
			}
			break;
		}
Пример #10
0
inline u32 Decode(const u8* end)
{
	const u8 *opcodeStart = g_VideoData.GetReadPosition();
	if (opcodeStart == end)
		return 0;

	u8 cmd_byte = g_VideoData.Read<u8>();
	size_t distance = (size_t)(end - g_VideoData.GetReadPosition());
	u32 cycles;

	switch (cmd_byte)
	{
	case GX_NOP:
	{
		cycles = GX_NOP_CYCLES; // Hm, this means that we scan over nop streams pretty slowly...
	}
	break;
	case GX_UNKNOWN_RESET:
	{
		cycles = GX_NOP_CYCLES; // Datel software uses this command
		DEBUG_LOG(VIDEO, "GX Reset?: %08x", cmd_byte);		
	}
	break;
	case GX_LOAD_CP_REG:
	{
		if (sizeCheck && distance < GX_LOAD_CP_REG_SIZE)
			return 0;
		cycles = GX_LOAD_CP_REG_CYCLES;
		u8 sub_cmd = g_VideoData.Read<u8>();
		u32 value = g_VideoData.Read<u32>();
		LoadCPReg(sub_cmd, value);
		INCSTAT(stats.thisFrame.numCPLoads);
	}
	break;
	case GX_LOAD_XF_REG:
	{
		if (sizeCheck && distance < GX_LOAD_XF_REG_SIZE)
			return 0;
		u32 Cmd2 = g_VideoData.Read<u32>();
		distance -= GX_LOAD_XF_REG_SIZE;
		int transfer_size = ((Cmd2 >> 16) & 15) + 1;
		if (sizeCheck && (distance < (transfer_size * sizeof(u32))))
			return 0;
		cycles = GX_LOAD_XF_REG_BASE_CYCLES + GX_LOAD_XF_REG_TRANSFER_CYCLES * transfer_size;
		u32 xf_address = Cmd2 & 0xFFFF;
		LoadXFReg(transfer_size, xf_address);

		INCSTAT(stats.thisFrame.numXFLoads);
	}
	break;
	case GX_LOAD_INDX_A: //used for position matrices
	{	
		if (sizeCheck && distance < GX_LOAD_INDX_A_SIZE)
			return 0;
		cycles = GX_LOAD_INDX_A_CYCLES;
		LoadIndexedXF(g_VideoData.Read<u32>(), 0xC);
	}
	break;
	case GX_LOAD_INDX_B: //used for normal matrices
	{
		if (sizeCheck && distance < GX_LOAD_INDX_B_SIZE)
			return 0;
		cycles = GX_LOAD_INDX_B_CYCLES;
		LoadIndexedXF(g_VideoData.Read<u32>(), 0xD);
	}
	break;
	case GX_LOAD_INDX_C: //used for postmatrices
	{
		if (sizeCheck && distance < GX_LOAD_INDX_C_SIZE)
			return 0;
		cycles = GX_LOAD_INDX_C_CYCLES;
		LoadIndexedXF(g_VideoData.Read<u32>(), 0xE);
	}
	break;
	case GX_LOAD_INDX_D: //used for lights
	{
		if (sizeCheck && distance < GX_LOAD_INDX_D_SIZE)
			return 0;
		cycles = GX_LOAD_INDX_D_CYCLES;
		LoadIndexedXF(g_VideoData.Read<u32>(), 0xF);
	}
	break;
	case GX_CMD_CALL_DL:
	{
		if (sizeCheck && distance < GX_CMD_CALL_DL_SIZE)
			return 0;
		u32 address = g_VideoData.Read<u32>();
		u32 count = g_VideoData.Read<u32>();
		cycles = GX_CMD_CALL_DL_BASE_CYCLES + InterpretDisplayList(address, count);
	}
	break;
	case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
	{	
		cycles = GX_CMD_UNKNOWN_METRICS_CYCLES;
		DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
	}
	break;
	case GX_CMD_INVL_VC: // Invalidate Vertex Cache	
	{
		cycles = GX_CMD_INVL_VC_CYCLES;
		DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
	}
	break;
	case GX_LOAD_BP_REG:
	{
		if (sizeCheck && distance < GX_LOAD_BP_REG_SIZE)
			return 0;
		cycles = GX_LOAD_BP_REG_CYCLES;
		u32 bp_cmd = g_VideoData.Read<u32>();
		LoadBPReg(bp_cmd);
		INCSTAT(stats.thisFrame.numBPLoads);
	}
	break;
		// draw primitives 
	default:
		if ((cmd_byte & GX_DRAW_PRIMITIVES) == 0x80)
		{
			// load vertices
			if (sizeCheck && distance < GX_DRAW_PRIMITIVES_SIZE)
				return 0;
			
			u32 count = g_VideoData.Read<u16>();
			distance -= GX_DRAW_PRIMITIVES_SIZE;
			if (count)
			{
				VertexLoaderParameters parameters;
				parameters.count = count;				
				parameters.buf_size = distance;
				parameters.primitive = (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT;
				parameters.vtx_attr_group = cmd_byte & GX_VAT_MASK;
				parameters.needloaderrefresh = (g_main_cp_state.attr_dirty & (1 << parameters.vtx_attr_group)) != 0;
				parameters.skip_draw = g_bSkipCurrentFrame;
				parameters.VtxDesc = &g_main_cp_state.vtx_desc;
				parameters.VtxAttr = &g_main_cp_state.vtx_attr[parameters.vtx_attr_group];
				parameters.source = g_VideoData.GetReadPosition();
				g_main_cp_state.attr_dirty &= ~(1 << parameters.vtx_attr_group);
				u32 readsize = 0;
				u32 writesize = 0;
				if (VertexLoaderManager::ConvertVertices(parameters, readsize, writesize))
				{
					cycles = GX_NOP_CYCLES + GX_DRAW_PRIMITIVES_CYCLES * parameters.count;
					g_VideoData.ReadSkip(readsize);
					VertexManagerBase::s_pCurBufferPointer += writesize;
				}
				else
				{
					return 0;
				}
			}
			else
			{
				cycles = GX_NOP_CYCLES;
			}
		}
		else
		{
			if (!s_bFifoErrorSeen)
Пример #11
0
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list)
{
	u32 totalCycles = 0;
	u8* opcodeStart;
	while (true)
	{
		opcodeStart = src.GetPointer();

		if (!src.size())
			goto end;

		u8 cmd_byte = src.Read<u8>();
		int refarray;
		switch (cmd_byte)
		{
		case GX_NOP:
			totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
			break;

		case GX_UNKNOWN_RESET:
			totalCycles += 6; // Datel software uses this command
			DEBUG_LOG(VIDEO, "GX Reset?: %08x", cmd_byte);
			break;

		case GX_LOAD_CP_REG:
			{
				if (src.size() < 1 + 4)
					goto end;
				totalCycles += 12;
				u8 sub_cmd = src.Read<u8>();
				u32 value =  src.Read<u32>();
				LoadCPReg(sub_cmd, value, is_preprocess);
				if (!is_preprocess)
					INCSTAT(stats.thisFrame.numCPLoads);
			}
			break;

		case GX_LOAD_XF_REG:
			{
				if (src.size() < 4)
					goto end;
				u32 Cmd2 =  src.Read<u32>();
				int transfer_size = ((Cmd2 >> 16) & 15) + 1;
				if (src.size() < transfer_size * sizeof(u32))
					goto end;
				totalCycles += 18 + 6 * transfer_size;
				if (!is_preprocess)
				{
					u32 xf_address = Cmd2 & 0xFFFF;
					LoadXFReg(transfer_size, xf_address, src);

					INCSTAT(stats.thisFrame.numXFLoads);
				}
				src.Skip<u32>(transfer_size);
			}
			break;

		case GX_LOAD_INDX_A: //used for position matrices
			refarray = 0xC;
			goto load_indx;
		case GX_LOAD_INDX_B: //used for normal matrices
			refarray = 0xD;
			goto load_indx;
		case GX_LOAD_INDX_C: //used for postmatrices
			refarray = 0xE;
			goto load_indx;
		case GX_LOAD_INDX_D: //used for lights
			refarray = 0xF;
			goto load_indx;
		load_indx:
			if (src.size() < 4)
				goto end;
			totalCycles += 6;
			if (is_preprocess)
				PreprocessIndexedXF(src.Read<u32>(), refarray);
			else
				LoadIndexedXF(src.Read<u32>(), refarray);
			break;

		case GX_CMD_CALL_DL:
			{
				if (src.size() < 8)
					goto end;
				u32 address = src.Read<u32>();
				u32 count = src.Read<u32>();

				if (in_display_list)
				{
					totalCycles += 6;
					WARN_LOG(VIDEO,"recursive display list detected");
				}
				else
				{
					if (is_preprocess)
						InterpretDisplayListPreprocess(address, count);
					else
						totalCycles += 6 + InterpretDisplayList(address, count);
				}
			}
			break;

		case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
			totalCycles += 6;
			DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
			break;

		case GX_CMD_INVL_VC: // Invalidate Vertex Cache
			totalCycles += 6;
			DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
			break;

		case GX_LOAD_BP_REG:
			// In skipped_frame case: We have to let BP writes through because they set
			// tokens and stuff.  TODO: Call a much simplified LoadBPReg instead.
			{
				if (src.size() < 4)
					goto end;
				totalCycles += 12;
				u32 bp_cmd = src.Read<u32>();
				if (is_preprocess)
				{
					LoadBPRegPreprocess(bp_cmd);
				}
				else
				{
					LoadBPReg(bp_cmd);
					INCSTAT(stats.thisFrame.numBPLoads);
				}
			}
			break;

		// draw primitives
		default:
			if ((cmd_byte & 0xC0) == 0x80)
			{
				// load vertices
				if (src.size() < 2)
					goto end;
				u16 num_vertices = src.Read<u16>();
				int bytes = VertexLoaderManager::RunVertices(
					cmd_byte & GX_VAT_MASK,   // Vertex loader index (0 - 7)
					(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
					num_vertices,
					src,
					Fifo::g_bSkipCurrentFrame,
					is_preprocess);

				if (bytes < 0)
					goto end;

				src.Skip(bytes);

				// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
				totalCycles += num_vertices * 4 * 3 + 6;
			}
			else
			{
				if (!s_bFifoErrorSeen)
					UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
				ERROR_LOG(VIDEO, "FIFO: Unknown Opcode(0x%02x @ %p, preprocessing = %s)", cmd_byte, opcodeStart, is_preprocess ? "yes" : "no");
				s_bFifoErrorSeen = true;
				totalCycles += 1;
			}
			break;
		}

		// Display lists get added directly into the FIFO stream
		if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
		{
			u8* opcodeEnd;
			opcodeEnd = src.GetPointer();
			FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
		}
	}
Пример #12
0
void ShaderCache::HandleGSUIDChange(
	const GeometryShaderUid &gs_uid,
	bool on_gpu_thread)
{
	if (gs_uid.GetUidData().IsPassthrough())
	{
		s_last_geometry_shader_bytecode = &s_pass_entry;
		return;
	}

	s_shaders_lock.lock();
	ByteCodeCacheEntry* entry = &gs_bytecode_cache->GetOrAdd(gs_uid);
	s_shaders_lock.unlock();
	if (on_gpu_thread)
	{
		s_last_geometry_shader_bytecode = entry;
	}

	if (entry->m_initialized.test_and_set())
	{
		return;
	}

	// Need to compile a new shader
	ShaderCompilerWorkUnit *wunit = s_compiler->NewUnit(GEOMETRYSHADERGEN_BUFFERSIZE);
	wunit->GenerateCodeHandler = [gs_uid](ShaderCompilerWorkUnit* wunit)
	{
		ShaderCode code;
		code.SetBuffer(wunit->code.data());
		GenerateGeometryShaderCode(code, gs_uid.GetUidData(), API_D3D11);
		wunit->codesize = (u32)code.BufferSize();
	};

	wunit->entrypoint = "main";
	wunit->flags = D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3;
	wunit->target = D3D::GeometryShaderVersionString();

	wunit->ResultHandler = [gs_uid, entry](ShaderCompilerWorkUnit* wunit)
	{
		if (SUCCEEDED(wunit->cresult))
		{
			D3DBlob* shaderBuffer = new D3DBlob(wunit->shaderbytecode);
			s_gs_disk_cache.Append(gs_uid, shaderBuffer->Data(), shaderBuffer->Size());
			PushByteCode(entry, shaderBuffer);
			wunit->shaderbytecode->Release();
			wunit->shaderbytecode = nullptr;
			SETSTAT(stats.numGeometryShadersAlive, static_cast<int>(ps_bytecode_cache->size()));
			INCSTAT(stats.numGeometryShadersCreated);
		}
		else
		{
			static int num_failures = 0;
			std::string filename = StringFromFormat("%sbad_gs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
			std::ofstream file;
			OpenFStream(file, filename, std::ios_base::out);
			file << ((const char *)wunit->code.data());
			file << ((const char *)wunit->error->GetBufferPointer());
			file.close();

			PanicAlert("Failed to compile geometry shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
				filename,
				D3D::GeometryShaderVersionString(),
				(char*)wunit->error->GetBufferPointer());
		}
	};
	s_compiler->CompileShaderAsync(wunit);
}
Пример #13
0
void VertexManager::Draw(UINT stride)
{
	D3D::context->IASetVertexBuffers(0, 1, &m_vertexBuffers[m_activeVertexBuffer], &stride, &m_vertexDrawOffset);
	D3D::context->IASetIndexBuffer(m_indexBuffers[m_activeIndexBuffer], DXGI_FORMAT_R16_UINT, 0);
	
	if (IndexGenerator::GetNumTriangles() > 0)
	{
		D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
		D3D::context->DrawIndexed(IndexGenerator::GetTriangleindexLen(), m_triangleDrawIndex, 0);
		INCSTAT(stats.thisFrame.numIndexedDrawCalls);
	}
	// Disable culling for lines and points
	if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
		((DX11::Renderer*)g_renderer)->ApplyCullDisable();
	if (IndexGenerator::GetNumLines() > 0)
	{
		float lineWidth = float(bpmem.lineptwidth.linesize) / 6.f;
		float texOffset = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.lineoff];
		float vpWidth = 2.0f * xfregs.viewport.wd;
		float vpHeight = -2.0f * xfregs.viewport.ht;

		bool texOffsetEnable[8];

		for (int i = 0; i < 8; ++i)
			texOffsetEnable[i] = bpmem.texcoords[i].s.line_offset;

		if (m_lineShader.SetShader(g_nativeVertexFmt->m_components, lineWidth,
			texOffset, vpWidth, vpHeight, texOffsetEnable))
		{
			D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
			D3D::context->DrawIndexed(IndexGenerator::GetLineindexLen(), m_lineDrawIndex, 0);
			INCSTAT(stats.thisFrame.numIndexedDrawCalls);

			D3D::context->GSSetShader(NULL, NULL, 0);
		}
	}
	if (IndexGenerator::GetNumPoints() > 0)
	{
		float pointSize = float(bpmem.lineptwidth.pointsize) / 6.f;
		float texOffset = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.pointoff];
		float vpWidth = 2.0f * xfregs.viewport.wd;
		float vpHeight = -2.0f * xfregs.viewport.ht;

		bool texOffsetEnable[8];

		for (int i = 0; i < 8; ++i)
			texOffsetEnable[i] = bpmem.texcoords[i].s.point_offset;

		if (m_pointShader.SetShader(g_nativeVertexFmt->m_components, pointSize,
			texOffset, vpWidth, vpHeight, texOffsetEnable))
		{
			D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
			D3D::context->DrawIndexed(IndexGenerator::GetPointindexLen(), m_pointDrawIndex, 0);
			INCSTAT(stats.thisFrame.numIndexedDrawCalls);

			D3D::context->GSSetShader(NULL, NULL, 0);
		}
	}
	if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
		((DX11::Renderer*)g_renderer)->RestoreCull();
}
Пример #14
0
TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage)
{
	const FourTexUnits &tex = bpmem.tex[stage >> 2];
	const u32 id = stage & 3;
	const u32 address = (tex.texImage3[id].image_base/* & 0x1FFFFF*/) << 5;
	u32 width = tex.texImage0[id].width + 1;
	u32 height = tex.texImage0[id].height + 1;
	const int texformat = tex.texImage0[id].format;
	const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9;
	const u32 tlutfmt = tex.texTlut[id].tlut_format;
	const bool use_mipmaps = (tex.texMode0[id].min_filter & 3) != 0;
	u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1;
	const bool from_tmem = tex.texImage1[id].image_type != 0;

	if (0 == address)
		return nullptr;

	// TexelSizeInNibbles(format) * width * height / 16;
	const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat);
	const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat);

	unsigned int expandedWidth = ROUND_UP(width, bsw);
	unsigned int expandedHeight = ROUND_UP(height, bsh);
	const unsigned int nativeW = width;
	const unsigned int nativeH = height;

	// Hash assigned to texcache entry (also used to generate filenames used for texture dumping and custom texture lookup)
	u64 base_hash = TEXHASH_INVALID;
	u64 full_hash = TEXHASH_INVALID;

	u32 full_format = texformat;

	const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);

	// Reject invalid tlut format.
	if (isPaletteTexture && tlutfmt > GX_TL_RGB5A3)
		return nullptr;

	if (isPaletteTexture)
		full_format = texformat | (tlutfmt << 16);

	const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
	u32 additional_mips_size = 0; // not including level 0, which is texture_size

	// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain
	// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there
	tex_levels = std::min<u32>(IntLog2(std::max(width, height)) + 1, tex_levels);

	for (u32 level = 1; level != tex_levels; ++level)
	{
		// We still need to calculate the original size of the mips
		const u32 expanded_mip_width = ROUND_UP(CalculateLevelSize(width, level), bsw);
		const u32 expanded_mip_height = ROUND_UP(CalculateLevelSize(height, level), bsh);

		additional_mips_size += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
	}

	// If we are recording a FifoLog, keep track of what memory we read.
	// FifiRecorder does it's own memory modification tracking independant of the texture hashing below.
	if (g_bRecordFifoData && !from_tmem)
		FifoRecorder::GetInstance().UseMemory(address, texture_size + additional_mips_size, MemoryUpdate::TEXTURE_MAP);

	const u8* src_data;
	if (from_tmem)
		src_data = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE];
	else
		src_data = Memory::GetPointer(address);

	// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should)
	base_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
	u32 palette_size = 0;
	if (isPaletteTexture)
	{
		palette_size = TexDecoder_GetPaletteSize(texformat);
		full_hash = base_hash ^ GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
	}
	else
	{
		full_hash = base_hash;
	}

	// Search the texture cache for textures by address
	//
	// Find all texture cache entries for the current texture address, and decide whether to use one of
	// them, or to create a new one
	//
	// In most cases, the fastest way is to use only one texture cache entry for the same address. Usually,
	// when a texture changes, the old version of the texture is unlikely to be used again. If there were
	// new cache entries created for normal texture updates, there would be a slowdown due to a huge amount
	// of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is
	// faster than creating a new one from scratch.
	//
	// Some games use the same address for different textures though. If the same cache entry was used in
	// this case, it would be constantly overwritten, and effectively there wouldn't be any caching for
	// those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has multiple
	// sets of fonts on each other stored in a single texture and uses the palette to make different
	// characters visible or invisible. In Castlevania 3 some textures are used for 2 different things or
	// at least in 2 different ways(size 1024x1024 vs 1024x256).
	//
	// To determine whether to use multiple cache entries or a single entry, use the following heuristic:
	// If the same texture address is used several times during the same frame, assume the address is used
	// for different purposes and allow creating an additional cache entry. If there's at least one entry
	// that hasn't been used for the same frame, then overwrite it, in order to keep the cache as small as
	// possible. If the current texture is found in the cache, use that entry.
	//
	// For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else it was
	// done in vain.
	std::pair<TexCache::iterator, TexCache::iterator> iter_range = textures_by_address.equal_range((u64)address);
	TexCache::iterator iter = iter_range.first;
	TexCache::iterator oldest_entry = iter;
	int temp_frameCount = 0x7fffffff;
	TexCache::iterator unconverted_copy = textures_by_address.end();

	while (iter != iter_range.second)
	{
		TCacheEntryBase* entry = iter->second;
		// Do not load strided EFB copies, they are not meant to be used directly
		if (entry->IsEfbCopy() && entry->native_width == nativeW && entry->native_height == nativeH &&
			entry->memory_stride == entry->CacheLinesPerRow() * 32)
		{
			// EFB copies have slightly different rules as EFB copy formats have different
			// meanings from texture formats.
			if ((base_hash == entry->hash && (!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion)) ||
				IsPlayingBackFifologWithBrokenEFBCopies)
			{
				// TODO: We should check format/width/height/levels for EFB copies. Checking
				// format is complicated because EFB copy formats don't exactly match
				// texture formats. I'm not sure what effect checking width/height/levels
				// would have.
				if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion)
					return ReturnEntry(stage, entry);

				// Note that we found an unconverted EFB copy, then continue.  We'll
				// perform the conversion later.  Currently, we only convert EFB copies to
				// palette textures; we could do other conversions if it proved to be
				// beneficial.
				unconverted_copy = iter;
			}
			else
			{
				// Aggressively prune EFB copies: if it isn't useful here, it will probably
				// never be useful again.  It's theoretically possible for a game to do
				// something weird where the copy could become useful in the future, but in
				// practice it doesn't happen.
				iter = FreeTexture(iter);
				continue;
			}
		}
		else
		{
			// For normal textures, all texture parameters need to match
			if (entry->hash == full_hash && entry->format == full_format && entry->native_levels >= tex_levels &&
				entry->native_width == nativeW && entry->native_height == nativeH)
			{
				entry = DoPartialTextureUpdates(iter);

				return ReturnEntry(stage, entry);
			}
		}

		// Find the texture which hasn't been used for the longest time. Count paletted
		// textures as the same texture here, when the texture itself is the same. This
		// improves the performance a lot in some games that use paletted textures.
		// Example: Sonic the Fighters (inside Sonic Gems Collection)
		// Skip EFB copies here, so they can be used for partial texture updates
		if (entry->frameCount != FRAMECOUNT_INVALID && entry->frameCount < temp_frameCount &&
			!entry->IsEfbCopy() && !(isPaletteTexture && entry->base_hash == base_hash))
		{
			temp_frameCount = entry->frameCount;
			oldest_entry = iter;
		}
		++iter;
	}

	if (unconverted_copy != textures_by_address.end())
	{
		// Perform palette decoding.
		TCacheEntryBase *entry = unconverted_copy->second;

		TCacheEntryConfig config;
		config.rendertarget = true;
		config.width = entry->config.width;
		config.height = entry->config.height;
		config.layers = FramebufferManagerBase::GetEFBLayers();
		TCacheEntryBase *decoded_entry = AllocateTexture(config);

		decoded_entry->SetGeneralParameters(address, texture_size, full_format);
		decoded_entry->SetDimensions(entry->native_width, entry->native_height, 1);
		decoded_entry->SetHashes(base_hash, full_hash);
		decoded_entry->frameCount = FRAMECOUNT_INVALID;
		decoded_entry->is_efb_copy = false;

		g_texture_cache->ConvertTexture(decoded_entry, entry, &texMem[tlutaddr], (TlutFormat)tlutfmt);
		textures_by_address.emplace((u64)address, decoded_entry);
		return ReturnEntry(stage, decoded_entry);
	}

	// Search the texture cache for normal textures by hash
	//
	// If the texture was fully hashed, the address does not need to match. Identical duplicate textures cause unnecessary slowdowns
	// Example: Tales of Symphonia (GC) uses over 500 small textures in menus, but only around 70 different ones
	if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
		std::max(texture_size, palette_size) <= (u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
	{
		iter_range = textures_by_hash.equal_range(full_hash);
		iter = iter_range.first;
		while (iter != iter_range.second)
		{
			TCacheEntryBase* entry = iter->second;
			// All parameters, except the address, need to match here
			if (entry->format == full_format && entry->native_levels >= tex_levels &&
				entry->native_width == nativeW && entry->native_height == nativeH)
			{
				entry = DoPartialTextureUpdates(iter);

				return ReturnEntry(stage, entry);
			}
			++iter;
		}
	}

	// If at least one entry was not used for the same frame, overwrite the oldest one
	if (temp_frameCount != 0x7fffffff)
	{
		// pool this texture and make a new one later
		FreeTexture(oldest_entry);
	}

	std::shared_ptr<HiresTexture> hires_tex;
	if (g_ActiveConfig.bHiresTextures)
	{
		hires_tex = HiresTexture::Search(
			src_data, texture_size,
			&texMem[tlutaddr], palette_size,
			width, height,
			texformat, use_mipmaps
		);

		if (hires_tex)
		{
			auto& l = hires_tex->m_levels[0];
			if (l.width != width || l.height != height)
			{
				width = l.width;
				height = l.height;
			}
			expandedWidth = l.width;
			expandedHeight = l.height;
			CheckTempSize(l.data_size);
			memcpy(temp, l.data, l.data_size);
		}
	}

	if (!hires_tex)
	{
		if (!(texformat == GX_TF_RGBA8 && from_tmem))
		{
			const u8* tlut = &texMem[tlutaddr];
			TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, (TlutFormat)tlutfmt);
		}
		else
		{
			u8* src_data_gb = &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
			TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
		}
	}

	// how many levels the allocated texture shall have
	const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;

	// create the entry/texture
	TCacheEntryConfig config;
	config.width = width;
	config.height = height;
	config.levels = texLevels;

	TCacheEntryBase* entry = AllocateTexture(config);
	GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);

	iter = textures_by_address.emplace((u64)address, entry);
	if (g_ActiveConfig.iSafeTextureCache_ColorSamples == 0 ||
		std::max(texture_size, palette_size) <= (u32)g_ActiveConfig.iSafeTextureCache_ColorSamples * 8)
	{
		entry->textures_by_hash_iter = textures_by_hash.emplace(full_hash, entry);
	}

	entry->SetGeneralParameters(address, texture_size, full_format);
	entry->SetDimensions(nativeW, nativeH, tex_levels);
	entry->SetHashes(base_hash, full_hash);
	entry->is_efb_copy = false;
	entry->is_custom_tex = hires_tex != nullptr;

	// load texture
	entry->Load(width, height, expandedWidth, 0);

	std::string basename = "";
	if (g_ActiveConfig.bDumpTextures && !hires_tex)
	{
		basename = HiresTexture::GenBaseName(
			src_data, texture_size,
			&texMem[tlutaddr], palette_size,
			width, height,
			texformat, use_mipmaps,
			true
		);
		DumpTexture(entry, basename, 0);
	}

	if (hires_tex)
	{
		for (u32 level = 1; level != texLevels; ++level)
		{
			auto& l = hires_tex->m_levels[level];
			CheckTempSize(l.data_size);
			memcpy(temp, l.data, l.data_size);
			entry->Load(l.width, l.height, l.width, level);
		}
	}
	else
	{
		// load mips - TODO: Loading mipmaps from tmem is untested!
		src_data += texture_size;

		const u8* ptr_even = nullptr;
		const u8* ptr_odd = nullptr;
		if (from_tmem)
		{
			ptr_even = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE + texture_size];
			ptr_odd = &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
		}

		for (u32 level = 1; level != texLevels; ++level)
		{
			const u32 mip_width = CalculateLevelSize(width, level);
			const u32 mip_height = CalculateLevelSize(height, level);
			const u32 expanded_mip_width = ROUND_UP(mip_width, bsw);
			const u32 expanded_mip_height = ROUND_UP(mip_height, bsh);

			const u8*& mip_src_data = from_tmem
				? ((level % 2) ? ptr_odd : ptr_even)
				: src_data;
			const u8* tlut = &texMem[tlutaddr];
			TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, (TlutFormat)tlutfmt);
			mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);

			entry->Load(mip_width, mip_height, expanded_mip_width, level);

			if (g_ActiveConfig.bDumpTextures)
				DumpTexture(entry, basename, level);
		}
	}

	INCSTAT(stats.numTexturesUploaded);
	SETSTAT(stats.numTexturesAlive, textures_by_address.size());

	entry = DoPartialTextureUpdates(iter);

	return ReturnEntry(stage, entry);
}
Пример #15
0
TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
	u32 const address, unsigned int width, unsigned int height, int const texformat,
	unsigned int const tlutaddr, int const tlutfmt, bool const use_mipmaps, unsigned int maxlevel, bool const from_tmem)
{
	if (0 == address)
		return nullptr;

	// TexelSizeInNibbles(format) * width * height / 16;
	const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat) - 1;
	const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat) - 1;

	unsigned int expandedWidth  = (width  + bsw) & (~bsw);
	unsigned int expandedHeight = (height + bsh) & (~bsh);
	const unsigned int nativeW = width;
	const unsigned int nativeH = height;

	u32 texID = address;
	// Hash assigned to texcache entry (also used to generate filenames used for texture dumping and custom texture lookup)
	u64 tex_hash = TEXHASH_INVALID;
	u64 tlut_hash = TEXHASH_INVALID;

	u32 full_format = texformat;
	PC_TexFormat pcfmt = PC_TEX_FMT_NONE;

	const bool isPaletteTexture = (texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
	if (isPaletteTexture)
		full_format = texformat | (tlutfmt << 16);

	const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);

	const u8* src_data;
	if (from_tmem)
		src_data = &texMem[bpmem.tex[stage / 4].texImage1[stage % 4].tmem_even * TMEM_LINE_SIZE];
	else
		src_data = Memory::GetPointer(address);

	// TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data from the low tmem bank than it should)
	tex_hash = GetHash64(src_data, texture_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
	if (isPaletteTexture)
	{
		const u32 palette_size = TexDecoder_GetPaletteSize(texformat);
		tlut_hash = GetHash64(&texMem[tlutaddr], palette_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);

		// NOTE: For non-paletted textures, texID is equal to the texture address.
		//       A paletted texture, however, may have multiple texIDs assigned though depending on the currently used tlut.
		//       This (changing texID depending on the tlut_hash) is a trick to get around
		//       an issue with Metroid Prime's fonts (it has multiple sets of fonts on each other
		//       stored in a single texture and uses the palette to make different characters
		//       visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
		//       we must make sure that a paletted texture gets assigned multiple IDs for each tlut used.
		//
		// TODO: Because texID isn't always the same as the address now, CopyRenderTargetToTexture might be broken now
		texID ^= ((u32)tlut_hash) ^(u32)(tlut_hash >> 32);
		tex_hash ^= tlut_hash;
	}

	// D3D doesn't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain
	// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,1x1, so we limit the mipmap count to 6 there
	while (g_ActiveConfig.backend_info.bUseMinimalMipCount && std::max(expandedWidth, expandedHeight) >> maxlevel == 0)
		--maxlevel;

	TCacheEntryBase *entry = textures[texID];
	if (entry)
	{
		// 1. Calculate reference hash:
		// calculated from RAM texture data for normal textures. Hashes for paletted textures are modified by tlut_hash. 0 for virtual EFB copies.
		if (g_ActiveConfig.bCopyEFBToTexture && entry->IsEfbCopy())
			tex_hash = TEXHASH_INVALID;

		// 2. a) For EFB copies, only the hash and the texture address need to match
		if (entry->IsEfbCopy() && tex_hash == entry->hash && address == entry->addr)
		{
			entry->type = TCET_EC_VRAM;

			// TODO: Print a warning if the format changes! In this case,
			// we could reinterpret the internal texture object data to the new pixel format
			// (similar to what is already being done in Renderer::ReinterpretPixelFormat())
			return ReturnEntry(stage, entry);
		}

		// 2. b) For normal textures, all texture parameters need to match
		if (address == entry->addr && tex_hash == entry->hash && full_format == entry->format &&
			entry->num_mipmaps > maxlevel && entry->native_width == nativeW && entry->native_height == nativeH)
		{
			return ReturnEntry(stage, entry);
		}

		// 3. If we reach this line, we'll have to upload the new texture data to VRAM.
		//    If we're lucky, the texture parameters didn't change and we can reuse the internal texture object instead of destroying and recreating it.
		//
		// TODO: Don't we need to force texture decoding to RGBA8 for dynamic EFB copies?
		// TODO: Actually, it should be enough if the internal texture format matches...
		if ((entry->type == TCET_NORMAL &&
		     width == entry->virtual_width &&
		     height == entry->virtual_height &&
		     full_format == entry->format &&
		     entry->num_mipmaps > maxlevel) ||
		    (entry->type == TCET_EC_DYNAMIC &&
		     entry->native_width == width &&
		     entry->native_height == height))
		{
			// reuse the texture
		}
		else
		{
			// delete the texture and make a new one
			delete entry;
			entry = nullptr;
		}
	}

	bool using_custom_texture = false;

	if (g_ActiveConfig.bHiresTextures)
	{
		// This function may modify width/height.
		pcfmt = LoadCustomTexture(tex_hash, texformat, 0, width, height);
		if (pcfmt != PC_TEX_FMT_NONE)
		{
			if (expandedWidth != width || expandedHeight != height)
			{
				expandedWidth = width;
				expandedHeight = height;

				// If we thought we could reuse the texture before, make sure to pool it now!
				if (entry)
				{
					delete entry;
					entry = nullptr;
				}
			}
			using_custom_texture = true;
		}
	}

	if (!using_custom_texture)
	{
		if (!(texformat == GX_TF_RGBA8 && from_tmem))
		{
			pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth,
						expandedHeight, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
		}
		else
		{
			u8* src_data_gb = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE];
			pcfmt = TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
		}
	}

	u32 texLevels = use_mipmaps ? (maxlevel + 1) : 1;
	const bool using_custom_lods = using_custom_texture && CheckForCustomTextureLODs(tex_hash, texformat, texLevels);
	// Only load native mips if their dimensions fit to our virtual texture dimensions
	const bool use_native_mips = use_mipmaps && !using_custom_lods && (width == nativeW && height == nativeH);
	texLevels = (use_native_mips || using_custom_lods) ? texLevels : 1; // TODO: Should be forced to 1 for non-pow2 textures (e.g. efb copies with automatically adjusted IR)

	// create the entry/texture
	if (nullptr == entry)
	{
		textures[texID] = entry = g_texture_cache->CreateTexture(width, height, expandedWidth, texLevels, pcfmt);

		// Sometimes, we can get around recreating a texture if only the number of mip levels changes
		// e.g. if our texture cache entry got too many mipmap levels we can limit the number of used levels by setting the appropriate render states
		// Thus, we don't update this member for every Load, but just whenever the texture gets recreated

		// TODO: This is the wrong value. We should be storing the number of levels our actual texture has.
		// But that will currently make the above "existing entry" tests fail as "texLevels" is not calculated until after.
		// Currently, we might try to reuse a texture which appears to have more levels than actual, maybe..
		entry->num_mipmaps = maxlevel + 1;
		entry->type = TCET_NORMAL;

		GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true);
	}
	else
	{
		// load texture (CreateTexture also loads level 0)
		entry->Load(width, height, expandedWidth, 0);
	}

	entry->SetGeneralParameters(address, texture_size, full_format, entry->num_mipmaps);
	entry->SetDimensions(nativeW, nativeH, width, height);
	entry->hash = tex_hash;

	if (entry->IsEfbCopy() && !g_ActiveConfig.bCopyEFBToTexture)
		entry->type = TCET_EC_DYNAMIC;
	else
		entry->type = TCET_NORMAL;

	if (g_ActiveConfig.bDumpTextures && !using_custom_texture)
		DumpTexture(entry, 0);

	u32 level = 1;
	// load mips - TODO: Loading mipmaps from tmem is untested!
	if (pcfmt != PC_TEX_FMT_NONE)
	{
		if (use_native_mips)
		{
			src_data += texture_size;

			const u8* ptr_even = nullptr;
			const u8* ptr_odd = nullptr;
			if (from_tmem)
			{
				ptr_even = &texMem[bpmem.tex[stage/4].texImage1[stage%4].tmem_even * TMEM_LINE_SIZE + texture_size];
				ptr_odd = &texMem[bpmem.tex[stage/4].texImage2[stage%4].tmem_odd * TMEM_LINE_SIZE];
			}

			for (; level != texLevels; ++level)
			{
				const u32 mip_width = CalculateLevelSize(width, level);
				const u32 mip_height = CalculateLevelSize(height, level);
				const u32 expanded_mip_width = (mip_width + bsw) & (~bsw);
				const u32 expanded_mip_height = (mip_height + bsh) & (~bsh);

				const u8*& mip_src_data = from_tmem
					? ((level % 2) ? ptr_odd : ptr_even)
					: src_data;
				TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, tlutfmt, g_ActiveConfig.backend_info.bUseRGBATextures);
				mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);

				entry->Load(mip_width, mip_height, expanded_mip_width, level);

				if (g_ActiveConfig.bDumpTextures)
					DumpTexture(entry, level);
			}
		}
		else if (using_custom_lods)
		{
			for (; level != texLevels; ++level)
			{
				unsigned int mip_width = CalculateLevelSize(width, level);
				unsigned int mip_height = CalculateLevelSize(height, level);

				LoadCustomTexture(tex_hash, texformat, level, mip_width, mip_height);
				entry->Load(mip_width, mip_height, mip_width, level);
			}
		}
	}

	INCSTAT(stats.numTexturesCreated);
	SETSTAT(stats.numTexturesAlive, textures.size());

	return ReturnEntry(stage, entry);
}
Пример #16
0
SHADER* ProgramShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components, u32 primitive_type)
{
	SHADERUID uid;
	GetShaderId(&uid, dstAlphaMode, components, primitive_type);

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
			last_entry->shader.Bind();
			return &last_entry->shader;
		}
	}

	last_uid = uid;

	// Check if shader is already in cache
	PCache::iterator iter = pshaders.find(uid);
	if (iter != pshaders.end())
	{
		PCacheEntry *entry = &iter->second;
		last_entry = entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
		last_entry->shader.Bind();
		return &last_entry->shader;
	}

	// Make an entry in the table
	PCacheEntry& newentry = pshaders[uid];
	last_entry = &newentry;
	newentry.in_cache = 0;

	VertexShaderCode vcode;
	PixelShaderCode pcode;
	ShaderCode gcode;
	GenerateVertexShaderCode(vcode, components, API_OPENGL);
	GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
	if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough())
		GenerateGeometryShaderCode(gcode, primitive_type, API_OPENGL);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		newentry.shader.strvprog = vcode.GetBuffer();
		newentry.shader.strpprog = pcode.GetBuffer();
		newentry.shader.strgprog = gcode.GetBuffer();
	}

#if defined(_DEBUG) || defined(DEBUGFAST)
	if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
	{
		static int counter = 0;
		std::string filename =  StringFromFormat("%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(filename, vcode.GetBuffer());

		filename = StringFromFormat("%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(filename, pcode.GetBuffer());

		if (gcode.GetBuffer() != nullptr)
		{
			filename = StringFromFormat("%sgs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
			SaveData(filename, gcode.GetBuffer());
		}
	}
#endif

	if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer()))
	{
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return nullptr;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);

	last_entry->shader.Bind();
	return &last_entry->shader;
}
Пример #17
0
void VertexManager::Draw(u32 stride)
{
	u32 components = VertexLoaderManager::GetCurrentVertexFormat()->m_components;
	u32 indices = IndexGenerator::GetIndexLen();

	u32 zero = 0;
	D3D::context->IASetVertexBuffers(0, 1, &m_buffers[m_currentBuffer], &stride, &zero);
	D3D::context->IASetIndexBuffer(m_buffers[m_currentBuffer], DXGI_FORMAT_R16_UINT, 0);

	u32 baseVertex = m_vertexDrawOffset / stride;
	u32 startIndex = m_indexDrawOffset / sizeof(u16);

	if (current_primitive_type == PRIMITIVE_TRIANGLES)
	{
		D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
		D3D::context->DrawIndexed(indices, startIndex, baseVertex);
		INCSTAT(stats.thisFrame.numDrawCalls);
	}
	else if (current_primitive_type == PRIMITIVE_LINES)
	{
		float lineWidth = float(bpmem.lineptwidth.linesize) / 6.f;
		float texOffset = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.lineoff];
		float vpWidth = 2.0f * xfmem.viewport.wd;
		float vpHeight = -2.0f * xfmem.viewport.ht;

		bool texOffsetEnable[8];

		for (int i = 0; i < 8; ++i)
			texOffsetEnable[i] = bpmem.texcoords[i].s.line_offset;

		if (m_lineShader.SetShader(components, lineWidth,
			texOffset, vpWidth, vpHeight, texOffsetEnable))
		{
			((DX11::Renderer*)g_renderer)->ApplyCullDisable(); // Disable culling for lines and points
			D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
			D3D::context->DrawIndexed(indices, startIndex, baseVertex);
			INCSTAT(stats.thisFrame.numDrawCalls);

			D3D::context->GSSetShader(nullptr, nullptr, 0);
			((DX11::Renderer*)g_renderer)->RestoreCull();
		}
	}
	else //if (current_primitive_type == PRIMITIVE_POINTS)
	{
		float pointSize = float(bpmem.lineptwidth.pointsize) / 6.f;
		float texOffset = LINE_PT_TEX_OFFSETS[bpmem.lineptwidth.pointoff];
		float vpWidth = 2.0f * xfmem.viewport.wd;
		float vpHeight = -2.0f * xfmem.viewport.ht;

		bool texOffsetEnable[8];

		for (int i = 0; i < 8; ++i)
			texOffsetEnable[i] = bpmem.texcoords[i].s.point_offset;

		if (m_pointShader.SetShader(components, pointSize,
			texOffset, vpWidth, vpHeight, texOffsetEnable))
		{
			((DX11::Renderer*)g_renderer)->ApplyCullDisable(); // Disable culling for lines and points
			D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
			D3D::context->DrawIndexed(indices, startIndex, baseVertex);
			INCSTAT(stats.thisFrame.numDrawCalls);

			D3D::context->GSSetShader(nullptr, nullptr, 0);
			((DX11::Renderer*)g_renderer)->RestoreCull();
		}
	}
}
Пример #18
0
SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 components )
{
	SHADERUID uid;
	GetShaderId(&uid, dstAlphaMode, components);

	// Check if the shader is already set
	if (last_entry)
	{
		if (uid == last_uid)
		{
			GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
			last_entry->shader.Bind();
			return &last_entry->shader;
		}
	}

	last_uid = uid;

	// Check if shader is already in cache
	PCache::iterator iter = pshaders.find(uid);
	if (iter != pshaders.end())
	{
		PCacheEntry *entry = &iter->second;
		last_entry = entry;

		GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
		last_entry->shader.Bind();
		return &last_entry->shader;
	}

	// Make an entry in the table
	PCacheEntry& newentry = pshaders[uid];
	last_entry = &newentry;
	newentry.in_cache = 0;

	VertexShaderCode vcode;
	PixelShaderCode pcode;
	GenerateVertexShaderCode(vcode, components, API_OPENGL);
	GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);

	if (g_ActiveConfig.bEnableShaderDebugging)
	{
		newentry.shader.strvprog = vcode.GetBuffer();
		newentry.shader.strpprog = pcode.GetBuffer();
	}

#if defined(_DEBUG) || defined(DEBUGFAST)
	if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
		static int counter = 0;
		char szTemp[MAX_PATH];
		sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, vcode.GetBuffer());
		sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
		SaveData(szTemp, pcode.GetBuffer());
	}
#endif

	if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
		GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
		return NULL;
	}

	INCSTAT(stats.numPixelShadersCreated);
	SETSTAT(stats.numPixelShadersAlive, pshaders.size());
	GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);

	last_entry->shader.Bind();
	return &last_entry->shader;
}
Пример #19
0
/*
 * FUNCTION:		clnp_er_input
 *
 * PURPOSE:			Process an ER pdu.
 *
 * RETURNS:
 *
 * SIDE EFFECTS:
 *
 * NOTES:
 */
void
clnp_er_input(
	struct mbuf    *m,	/* ptr to packet itself */
	struct iso_addr *src,	/* ptr to src of er */
	u_int           reason)	/* reason code of er */
{
	int             cmd = -1;

#ifdef ARGO_DEBUG
	if (argo_debug[D_CTLINPUT]) {
		printf("clnp_er_input: m %p, src %s, reason x%x\n",
		    m, clnp_iso_addrp(src), reason);
	}
#endif

	INCSTAT(cns_er_inhist[clnp_er_index(reason)]);
	switch (reason) {
	case GEN_NOREAS:
	case GEN_PROTOERR:
		break;
	case GEN_BADCSUM:
		cmd = PRC_PARAMPROB;
		break;
	case GEN_CONGEST:
		cmd = PRC_QUENCH;
		break;
	case GEN_HDRSYNTAX:
		cmd = PRC_PARAMPROB;
		break;
	case GEN_SEGNEEDED:
		cmd = PRC_MSGSIZE;
		break;
	case GEN_INCOMPLETE:
		cmd = PRC_PARAMPROB;
		break;
	case GEN_DUPOPT:
		cmd = PRC_PARAMPROB;
		break;
	case ADDR_DESTUNREACH:
		cmd = PRC_UNREACH_HOST;
		break;
	case ADDR_DESTUNKNOWN:
		cmd = PRC_UNREACH_PROTOCOL;
		break;
	case SRCRT_UNSPECERR:
	case SRCRT_SYNTAX:
	case SRCRT_UNKNOWNADDR:
	case SRCRT_BADPATH:
		cmd = PRC_UNREACH_SRCFAIL;
		break;
	case TTL_EXPTRANSIT:
		cmd = PRC_TIMXCEED_INTRANS;
		break;
	case TTL_EXPREASS:
		cmd = PRC_TIMXCEED_REASS;
		break;
	case DISC_UNSUPPOPT:
	case DISC_UNSUPPVERS:
	case DISC_UNSUPPSECURE:
	case DISC_UNSUPPSRCRT:
	case DISC_UNSUPPRECRT:
		cmd = PRC_PARAMPROB;
		break;
	case REASS_INTERFERE:
		cmd = PRC_TIMXCEED_REASS;
		break;
	}

	/*
	 *	tpclnp_ctlinput1 is called directly so that we don't
	 *	have to build an iso_sockaddr out of src.
	 */
	if (cmd >= 0)
		tpclnp_ctlinput1(cmd, src);

	m_freem(m);
}