Example #1
0
/*
==================
==================
*/
void Process_Fragment_4x4(

	__int32 w_seed[2],
	__int32 i_tile_in,
	__int32 i_buffer_in,
	const unsigned __int32 coverage_mask,
	raster_output_& raster_output,
	shader_input_& shader_input
) {

	const __int32 i_buffer = i_buffer_in + (i_tile_in * 4 * 4);

	__m128i bazza[3][4];

	for (__int32 i_edge = 0; i_edge < 2; i_edge++) {
		__m128i w_row = set_all(w_seed[i_edge]);
		bazza[i_edge][0] = w_row + load_u(raster_output.reject_table[0][i_edge][0]);
		bazza[i_edge][1] = w_row + load_u(raster_output.reject_table[0][i_edge][1]);
		bazza[i_edge][2] = w_row + load_u(raster_output.reject_table[0][i_edge][2]);
		bazza[i_edge][3] = w_row + load_u(raster_output.reject_table[0][i_edge][3]);
	}

	pixel_shader(i_buffer, coverage_mask, bazza, shader_input);

	const __int32 i_buffer_depth_4x4 = i_buffer / (4 * 4);
	const __int32 i_buffer_depth_16x16 = i_buffer / (16 * 16);
	const __int32 i_buffer_depth_64x64 = i_buffer / (64 * 64);
	shader_input.depth_tiles_4x4[i_buffer_depth_4x4] = shader_input.z_max;
	shader_input.tile_mask_16x16 |= one_bit_64 << i_buffer_depth_16x16;
	shader_input.tile_mask_64x64 |= one_bit_64 << i_buffer_depth_64x64;
}
Example #2
0
void ShaderLoad::LoadData(const uint8_t* data, size_t size,
                          YCommon::YContainers::MemBuffer* buffer) {
  YASSERT(YEngineData::VerifyShaderBuffer(flatbuffers::Verifier(data, size)),
          "Invalid Shader Data.");

  shader_ids = nullptr;
  num_shader_ids = 0;

  const YEngineData::Shader* shader_data = YEngineData::GetShader(data);
  const char* shader_name = shader_data->name()->c_str();

  const auto variants = shader_data->variants();
  num_shader_ids = variants->size();
  shader_ids = static_cast<YRenderer::ShaderID*>(
      buffer->Allocate(sizeof(*shader_ids) * num_shader_ids));
  YASSERT(shader_ids, "Out of memory - could not load shader data.");

  YRenderer::ShaderID* shader_id_iter = shader_ids;
  for (auto variant_iter = variants->begin();
       variant_iter != variants->end();
       ++variant_iter) {
    const char* variant_name = variant_iter->name()->c_str();
    *shader_id_iter++ = YRenderer::Renderer::CreateShader(
        shader_name, variant_name,
        variant_iter->vertex_shader()->Data(),
        variant_iter->vertex_shader()->size(),
        variant_iter->pixel_shader()->Data(),
        variant_iter->pixel_shader()->size());
  }
}
Example #3
0
/*
==================
==================
*/
void Process_Fragments(

	raster_output_& raster_output,
	shader_input_& shader_input
) {

	const __m128 zero = set_all(0.0f);

	shader_input.tile_mask_16x16 = 0x0;
	shader_input.tile_mask_64x64 = 0x0;

	//===============================================================================================

	{
		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_64x64];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_64x64][i_fragment];

			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			Process_Fragment_64x64(

				raster_fragment.w,
				i_buffer,
				coverage_mask,
				raster_output,
				shader_input
			);
		}
	}
	//===============================================================================================
	{
		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_16x16];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_16x16][i_fragment];

			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			Process_Fragment_16x16(

				raster_fragment.w,
				0,
				i_buffer,
				coverage_mask,
				raster_output,
				shader_input
			);
		}
	}
	//===============================================================================================
	{

		const __int32 n_fragments = raster_output.n_fragments[raster_output_::TRIVIAL_ACCEPT_4x4];
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::TRIVIAL_ACCEPT_4x4][i_fragment];
			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;
			Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input);
		}
	}
	//===============================================================================================
	{
		//const __int32 start = raster_output_::MAX_FRAGMENTS - 1;
		//const __int32 end = raster_output.n_fragments[raster_output_::PARTIAL_ACCEPT_4x4];
		//for (__int32 i_fragment = start; i_fragment > end; i_fragment--) {


		//	raster_fragment_& raster_fragment = raster_output.raster_fragment[raster_output_::PARTIAL_ACCEPT_4x4][i_fragment];
		//	const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
		//	const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;
		//	Process_Fragment_4x4(raster_fragment.w, 0, i_buffer, coverage_mask, raster_output, shader_input);
		//}
	}
	//===============================================================================================
	{
		const __int32 n_fragments = raster_output.n_fragments_COMPLETE;
		__int32 n_depth_fragments = 0;
		for (__int32 i_fragment = 0; i_fragment < n_fragments; i_fragment++) {

			raster_fragment_complete_& raster_fragment = raster_output.raster_fragment_complete[i_fragment];
			const __int32 i_buffer = raster_fragment.buffer_mask_packed >> 16;
			const unsigned __int32 coverage_mask = raster_fragment.buffer_mask_packed & 0xffff;

			pixel_shader(i_buffer, coverage_mask, raster_fragment.bazza, shader_input);

			const __int32 i_buffer_depth_4x4 = i_buffer / (4 * 4);
			const __int32 i_buffer_depth_16x16 = i_buffer / (16 * 16);
			const __int32 i_buffer_depth_64x64 = i_buffer / (64 * 64);
			shader_input.depth_tiles_4x4[i_buffer_depth_4x4] = shader_input.z_max;
			shader_input.tile_mask_16x16 |= one_bit_64 << i_buffer_depth_16x16;
			shader_input.tile_mask_64x64 |= one_bit_64 << i_buffer_depth_64x64;
		}
	}
	//===============================================================================================
	{
		//printf_s(" %llu ", shader_input.tile_mask_16x16);

		__int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_16x16);

		for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) {

			unsigned long i_tile_16x16;
			_BitScanForward64(&i_tile_16x16, shader_input.tile_mask_16x16);
			shader_input.tile_mask_16x16 ^= one_bit_64 << i_tile_16x16;

			const __int32 i_tile_4x4 = i_tile_16x16 * (4 * 4);

			__m128 depth_4x4[4];
			depth_4x4[0] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (0 * 4));
			depth_4x4[1] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (1 * 4));
			depth_4x4[2] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (2 * 4));
			depth_4x4[3] = load_u(shader_input.depth_tiles_4x4 + i_tile_4x4 + (3 * 4));

			__m128 z_max;
			z_max = depth_4x4[0];
			z_max = min_vec(depth_4x4[1], z_max);
			z_max = min_vec(depth_4x4[2], z_max);
			z_max = min_vec(depth_4x4[3], z_max);

			__m128 z_out = z_max;
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);

			shader_input.depth_tiles_16x16[i_tile_16x16] = store_s(z_out);
		}
	}
	{
		__int64 n_tiles = _mm_popcnt_u64(shader_input.tile_mask_64x64);

		//printf_s(" %llu ", n_tiles);

		for (__int32 i_bit = 0; i_bit < n_tiles; i_bit++) {

			unsigned long i_tile_64x64;
			_BitScanForward64(&i_tile_64x64, shader_input.tile_mask_64x64);
			shader_input.tile_mask_64x64 ^= one_bit_64 << i_tile_64x64;

			const __int32 i_tile_16x16 = i_tile_64x64 * (4 * 4);

			__m128 depth_16x16[4];
			depth_16x16[0] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (0 * 4));
			depth_16x16[1] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (1 * 4));
			depth_16x16[2] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (2 * 4));
			depth_16x16[3] = load_u(shader_input.depth_tiles_16x16 + i_tile_16x16 + (3 * 4));

			__m128 z_max;
			z_max = depth_16x16[0];
			z_max = min_vec(depth_16x16[1], z_max);
			z_max = min_vec(depth_16x16[2], z_max);
			z_max = min_vec(depth_16x16[3], z_max);

			__m128 z_out = z_max;
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);
			z_max = rotate_left(z_max);
			z_out = min_vec(z_max, z_out);

			shader_input.depth_tiles_64x64[i_tile_64x64] = store_s(z_out);
		}
	}
}