NTSTATUS
RosKmAdapter::SubmitCommand(
    IN_CONST_PDXGKARG_SUBMITCOMMAND     pSubmitCommand)
{
    NTSTATUS        Status = STATUS_SUCCESS;

#if VC4

    if (!pSubmitCommand->Flags.Paging)
    {
        //
        // Patch DMA buffer self-reference
        //
        ROSDMABUFINFO  *pDmaBufInfo = (ROSDMABUFINFO *)pSubmitCommand->pDmaBufferPrivateData;
        BYTE           *pDmaBuf = pDmaBufInfo->m_pDmaBuffer;
        UINT            dmaBufPhysicalAddress;

        //
        // Need to record DMA buffer physical address for fully pre-patched DMA buffer
        //
        pDmaBufInfo->m_DmaBufferPhysicalAddress = pSubmitCommand->DmaBufferPhysicalAddress;

        dmaBufPhysicalAddress = GetAperturePhysicalAddress(
            pSubmitCommand->DmaBufferPhysicalAddress.LowPart);

        for (UINT i = 0; i < pDmaBufInfo->m_DmaBufState.m_NumDmaBufSelfRef; i++)
        {
            D3DDDI_PATCHLOCATIONLIST   *pPatchLoc = &pDmaBufInfo->m_DmaBufSelfRef[i];

            *((UINT *)(pDmaBuf + pPatchLoc->PatchOffset)) =
                dmaBufPhysicalAddress +
                m_busAddressOffset +
                pPatchLoc->AllocationOffset;
        }
    }

#endif

    // NOTE: pRosKmContext will be NULL for paging operations
    RosKmContext *pRosKmContext = (RosKmContext *)pSubmitCommand->hContext;
    pRosKmContext;

    QueueDmaBuffer(pSubmitCommand);

    //
    // Wake up the worker thread for the GPU node
    //
    KeSetEvent(&m_workerThreadEvent, 0, FALSE);

    return Status;
}
void
RosKmdRapAdapter::ProcessRenderBuffer(
    ROSDMABUFSUBMISSION * pDmaBufSubmission)
{
    ROSDMABUFINFO * pDmaBufInfo = pDmaBufSubmission->m_pDmaBufInfo;

    if (pDmaBufInfo->m_DmaBufState.m_bSwCommandBuffer)
    {
        NT_ASSERT(0 == (pDmaBufSubmission->m_EndOffset - pDmaBufSubmission->m_StartOffset) % sizeof(GpuCommand));

        GpuCommand * pGpuCommand = (GpuCommand *)(pDmaBufInfo->m_pDmaBuffer + pDmaBufSubmission->m_StartOffset);
        GpuCommand * pEndofCommand = (GpuCommand *)(pDmaBufInfo->m_pDmaBuffer + pDmaBufSubmission->m_EndOffset);

        for (; pGpuCommand < pEndofCommand; pGpuCommand++)
        {
            switch (pGpuCommand->m_commandId)
            {
            case Header:
            case Nop:
                break;
            case ResourceCopy:
            {
                RtlCopyMemory(
                    ((BYTE *)RosKmdGlobal::s_pVideoMemory) + pGpuCommand->m_resourceCopy.m_dstGpuAddress.QuadPart,
                    ((BYTE *)RosKmdGlobal::s_pVideoMemory) + pGpuCommand->m_resourceCopy.m_srcGpuAddress.QuadPart,
                    pGpuCommand->m_resourceCopy.m_sizeBytes);
            }
            break;
            default:
                break;
            }
        }
    }
    else
    {
        //
        // Submit HW command buffer to the GPU
        //

#if VC4
        if (m_flags.m_isVC4)
        {
            //
            // TODO[indyz]:
            //
            // 1. Submit the Binning and Rendering Control list simultaneously
            //    and use semaphore for synchronization
            // 2. Enable interrupt to signal end of frame
            //

            //
            // Generate the Rendering Control List
            //
            UINT    renderingControlListLength;
            renderingControlListLength = GenerateRenderingControlList(pDmaBufInfo);

#if 1

            // TODO[indyz]: Decide the best way to handle the cache 
            //
            KeInvalidateAllCaches();

            //
            // Flush the VC4 GPU caches
            //

            V3D_REG_L2CACTL regL2CACTL = { 0 };

            regL2CACTL.L2CCLR = 1;

            m_pVC4RegFile->V3D_L2CACTL = regL2CACTL.Value;

            V3D_REG_SLCACTL regSLCACTL = { 0 };

            regSLCACTL.ICCS0123 = 0xF;
            regSLCACTL.UCCS0123 = 0xF;
            regSLCACTL.T0CCS0123 = 0xF;
            regSLCACTL.T1CCS0123 = 0xF;

            m_pVC4RegFile->V3D_SLCACTL = regSLCACTL.Value;

#endif

            //
            // Submit the Binning Control List from UMD to the GPU
            //
            NT_ASSERT(pDmaBufInfo->m_DmaBufferPhysicalAddress.HighPart == 0);
            NT_ASSERT(pDmaBufInfo->m_DmaBufferSize <= kPageSize);

            UINT dmaBufBaseAddress;

            dmaBufBaseAddress = GetAperturePhysicalAddress(pDmaBufInfo->m_DmaBufferPhysicalAddress.LowPart);
            dmaBufBaseAddress += m_busAddressOffset;

            // Skip the command buffer header at the beginning
            SubmitControlList(
                true,
                dmaBufBaseAddress + pDmaBufSubmission->m_StartOffset + sizeof(GpuCommand),
                dmaBufBaseAddress + pDmaBufSubmission->m_EndOffset);

            //
            // Submit the Rendering Control List to the GPU
            //
            SubmitControlList(
                false,
                m_renderingControlListPhysicalAddress + m_busAddressOffset,
                m_renderingControlListPhysicalAddress + m_busAddressOffset + renderingControlListLength);

            MoveToNextBinnerRenderMemChunk(renderingControlListLength);

            //
            // Flush the VC4 GPU caches
            //

            m_pVC4RegFile->V3D_L2CACTL = regL2CACTL.Value;
            m_pVC4RegFile->V3D_SLCACTL = regSLCACTL.Value;
        }
#endif  // VC4
    }
}