// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
{
  // left, top, of source rectangle within source texture
  // width of the destination rectangle, scale_factor (1 or 2)
  if (ApiType == APIType::Vulkan)
    WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; } PC;\n");
  else
    WRITE(p, "uniform int4 position;\n");

  // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
  WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n");
  WRITE(p, "{\n");
  WRITE(p, "  return float4(src.xyz, 1.0);\n");
  WRITE(p, "}\n");

  WRITE(p, "float4 RGBA8ToRGBA6(float4 src)\n");
  WRITE(p, "{\n");
  WRITE(p, "  int4 val = int4(src * 255.0) >> 2;\n");
  WRITE(p, "  return float4(val) / 63.0;\n");
  WRITE(p, "}\n");

  WRITE(p, "float4 RGBA8ToRGB565(float4 src)\n");
  WRITE(p, "{\n");
  WRITE(p, "  int4 val = int4(src * 255.0);\n");
  WRITE(p, "  val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n");
  WRITE(p, "  return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n");
  WRITE(p, "}\n");

  int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
  int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
  int samples = GetEncodedSampleCount(format);

  if (ApiType == APIType::OpenGL)
  {
    WRITE(p, "#define samp0 samp9\n");
    WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n");

    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");
    WRITE(p, "void main()\n");
    WRITE(p, "{\n"
             "  int2 sampleUv;\n"
             "  int2 uv1 = int2(gl_FragCoord.xy);\n");
  }
  else if (ApiType == APIType::Vulkan)
  {
    WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
    WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n");

    WRITE(p, "void main()\n");
    WRITE(p, "{\n"
             "  int2 sampleUv;\n"
             "  int2 uv1 = int2(gl_FragCoord.xy);\n"
             "  int4 position = PC.position;\n");
  }
  else  // D3D
  {
    WRITE(p, "sampler samp0 : register(s0);\n");
    WRITE(p, "Texture2DArray Tex0 : register(t0);\n");

    WRITE(p, "void main(\n");
    WRITE(p, "  out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n");
    WRITE(p, "{\n"
             "  int2 sampleUv;\n"
             "  int2 uv1 = int2(rawpos.xy);\n");
  }

  WRITE(p, "  int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples),
        IntLog2(blkW));
  WRITE(p, "  int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
  if (samples == 1)
  {
    // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
    WRITE(p, "  bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2);
    samples = 2;
  }
  WRITE(p, "  int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1);
  WRITE(p, "  int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples));
  WRITE(p, "  int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1,
        IntLog2(samples));

  WRITE(p, "  sampleUv.x = x_block_position + x_offset_in_block;\n");
  WRITE(p, "  sampleUv.y = y_block_position + y_offset_in_block;\n");

  WRITE(p,
        "  float2 uv0 = float2(sampleUv);\n");  // sampleUv is the sample position in (int)gx_coords
  WRITE(p, "  uv0 += float2(0.5, 0.5);\n");     // move to center of pixel
  WRITE(p, "  uv0 *= float(position.w);\n");  // scale by two if needed (also move to pixel borders
                                              // so that linear filtering will average adjacent
                                              // pixel)
  WRITE(p, "  uv0 += float2(position.xy);\n");                    // move to copied rect
  WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT);  // normalize to [0:1]
  if (ApiType == APIType::OpenGL)                                 // ogl has to flip up and down
  {
    WRITE(p, "  uv0.y = 1.0-uv0.y;\n");
  }

  WRITE(p, "  float sample_offset = float(position.w) / float(%d);\n", EFB_WIDTH);
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat format,
                          APIType ApiType)
{
  WriteHeader(p, ApiType);
  WriteSampleFunction(p, params, ApiType);

  if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
  {
    WRITE(p, "void main()\n");
    WRITE(p, "{\n"
             "  int2 sampleUv;\n"
             "  int2 uv1 = int2(gl_FragCoord.xy);\n");
  }
  else  // D3D
  {
    WRITE(p, "void main(\n");
    WRITE(p, "  in float3 v_tex0 : TEXCOORD0,\n");
    WRITE(p, "  in float4 rawpos : SV_Position,\n");
    WRITE(p, "  out float4 ocol0 : SV_Target)\n");
    WRITE(p, "{\n"
             "  int2 sampleUv;\n"
             "  int2 uv1 = int2(rawpos.xy);\n");
  }

  int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
  int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
  int samples = GetEncodedSampleCount(format);

  WRITE(p, "  int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples),
        IntLog2(blkW));
  WRITE(p, "  int y_block_position = uv1.y << %d;\n", IntLog2(blkH));
  if (samples == 1)
  {
    // With samples == 1, we write out pairs of blocks; one A8R8, one G8B8.
    WRITE(p, "  bool first = (uv1.x & %d) == 0;\n", blkH * blkW / 2);
    samples = 2;
  }
  WRITE(p, "  int offset_in_block = uv1.x & %d;\n", (blkH * blkW / samples) - 1);
  WRITE(p, "  int y_offset_in_block = offset_in_block >> %d;\n", IntLog2(blkW / samples));
  WRITE(p, "  int x_offset_in_block = (offset_in_block & %d) << %d;\n", (blkW / samples) - 1,
        IntLog2(samples));

  WRITE(p, "  sampleUv.x = x_block_position + x_offset_in_block;\n");
  WRITE(p, "  sampleUv.y = y_block_position + y_offset_in_block;\n");

  WRITE(p,
        "  float2 uv0 = float2(sampleUv);\n");  // sampleUv is the sample position in (int)gx_coords
  WRITE(p, "  uv0 += float2(0.5, 0.5);\n");     // move to center of pixel
  WRITE(p, "  uv0 *= float(position.w);\n");  // scale by two if needed (also move to pixel borders
                                              // so that linear filtering will average adjacent
                                              // pixel)
  WRITE(p, "  uv0 += float2(position.xy);\n");                    // move to copied rect
  WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT);  // normalize to [0:1]
  WRITE(p, "  uv0 /= float2(1, y_scale);\n");                     // apply the y scaling
  if (ApiType == APIType::OpenGL)                                 // ogl has to flip up and down
  {
    WRITE(p, "  uv0.y = 1.0-uv0.y;\n");
  }

  WRITE(p, "  float2 pixel_size = float2(position.w, position.w) / float2(%d, %d);\n", EFB_WIDTH,
        EFB_HEIGHT);
}