static void __gen9_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) { /* Check for Render Engine */ if (FORCEWAKE_RENDER & fw_engine) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_RENDER_GEN9) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: Render forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_RENDER_GEN9, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_RENDER_GEN9) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: waiting for Render to ack.\n"); } /* Check for Media Engine */ if (FORCEWAKE_MEDIA & fw_engine) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_GEN9) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: Media forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_GEN9, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_GEN9) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: waiting for Media to ack.\n"); } /* Check for Blitter Engine */ if (FORCEWAKE_BLITTER & fw_engine) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_BLITTER_GEN9) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: Blitter forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_BLITTER_GEN9, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_BLITTER_GEN9) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: waiting for Blitter to ack.\n"); } }
static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv) { /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); /* WaDisableKillLogic:bxt,skl,kbl */ if (!IS_COFFEELAKE(dev_priv)) I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); if (HAS_LLC(dev_priv)) { /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl * * Must match Display Engine. See * WaCompressedResourceDisplayNewHashMode. */ I915_WRITE(MMCD_MISC_CTRL, I915_READ(MMCD_MISC_CTRL) | MMCD_PCLA | MMCD_HOTSPOT_EN); } /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | BDW_DISABLE_HDC_INVALIDATION); /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ if (IS_GEN9_LP(dev_priv)) { u32 val = I915_READ(GEN8_L3SQCREG1); val &= ~L3_PRIO_CREDITS_MASK; val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); I915_WRITE(GEN8_L3SQCREG1, val); } /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES); /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); }
static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; int irqs; u32 tmp; /* tell all command streamers to forward interrupts (but not vblank) to GuC */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv) I915_WRITE(RING_MODE_GEN7(engine), irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; /* These three registers have the same bit definitions */ I915_WRITE(GUC_BCS_RCS_IER, ~irqs); I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); I915_WRITE(GUC_WD_VECS_IER, ~irqs); /* * If GuC has routed PM interrupts to itself, don't keep it. * and keep other interrupts those are unmasked by GuC. */ tmp = I915_READ(GEN6_PMINTRMSK); if (tmp & GEN8_PMINTR_REDIRECT_TO_NON_DISP) { dev_priv->rps.pm_intr_keep |= ~(tmp & ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); dev_priv->rps.pm_intr_keep &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP; } }
/* * Transfer the firmware image to RAM for execution by the microcontroller. * * Architecturally, the DMA engine is bidirectional, and can potentially even * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. */ static int guc_xfer_ucode(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; unsigned long offset; /* * The header plus uCode will be copied to WOPCM via DMA, excluding any * other components */ I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); /* * Set the DMA destination. Current uCode expects the code to be * loaded at 8k; locations below this are used for the stack. */ I915_WRITE(DMA_ADDR_1_LOW, 0x2000); I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* Finally start the DMA */ I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); return guc_wait_ucode(guc); }
static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv, int fw_engine) { u32 forcewake_ack; if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev)) forcewake_ack = FORCEWAKE_ACK_HSW; else forcewake_ack = FORCEWAKE_MT_ACK; if (wait_for_atomic((__raw_i915_read32(dev_priv, forcewake_ack) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); /* something from same cacheline, but !FORCEWAKE_MT */ __raw_posting_read(dev_priv, ECOBUS); if (wait_for_atomic((__raw_i915_read32(dev_priv, forcewake_ack) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); /* WaRsForcewakeWaitTC0:ivb,hsw */ if (INTEL_INFO(dev_priv->dev)->gen < 8) __gen6_gt_wait_for_thread_c0(dev_priv); }
static inline void nugpgpu_ring_render_stop(struct nugpgpu_private *gpu_priv) { RING_WRITE_MODE(RING, _MASKED_BIT_ENABLE(STOP_RING)); if(wait_for((RING_READ_MODE(RING) & MODE_IDLE) != 0, 10000)) printk(LOG_ERR "MI_MODE failed to stop\n" LOG_END); else printk(LOG_INFO "MI_MODE ring stopped successfully\n" LOG_END); }
/* * Transfer the firmware image to RAM for execution by the microcontroller. * * Architecturally, the DMA engine is bidirectional, and can potentially even * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. * * Note that GuC needs the CSS header plus uKernel code to be copied by the * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. */ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, struct i915_vma *vma) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; unsigned long offset; struct sg_table *sg = vma->pages; u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; int i, ret = 0; /* where RSA signature starts */ offset = guc_fw->rsa_offset; /* Copy RSA signature from the fw image to HW for verification */ sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), offset); for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++) I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); /* The header plus uCode will be copied to WOPCM via DMA, excluding any * other components */ I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ offset = guc_ggtt_offset(vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); /* * Set the DMA destination. Current uCode expects the code to be * loaded at 8k; locations below this are used for the stack. */ I915_WRITE(DMA_ADDR_1_LOW, 0x2000); I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* Finally start the DMA */ I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); /* * Wait for the DMA to complete & the GuC to start up. * NB: Docs recommend not using the interrupt for completion. * Measurements indicate this should take no more than 20ms, so a * timeout here indicates that the GuC has failed and is unusable. * (Higher levels of the driver will attempt to fall back to * execlist mode if this happens.) */ ret = wait_for(guc_ucode_response(dev_priv, &status), 100); DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n", I915_READ(DMA_CTRL), status); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { DRM_ERROR("GuC firmware signature verification failed\n"); ret = -ENOEXEC; } DRM_DEBUG_DRIVER("returning %d\n", ret); return ret; }
static void __vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) { /* Check for Render Engine */ if (FORCEWAKE_RENDER & fw_engine) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: Render forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: waiting for Render to ack.\n"); } /* Check for Media Engine */ if (FORCEWAKE_MEDIA & fw_engine) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: Media forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out: waiting for media to ack.\n"); } /* WaRsForcewakeWaitTC0:vlv */ __gen6_gt_wait_for_thread_c0(dev_priv); }
static inline void flushtlb(struct nugpgpu_private *gpu_priv) { RING_WRITE_INSTPM(RING, _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | INSTPM_SYNC_FLUSH)); if( wait_for((RING_READ_INSTPM(RING) & INSTPM_SYNC_FLUSH) == 0, 1000)) printk(LOG_ERR "Wait for SyncFlush timedout\n" LOG_END); else printk(LOG_INFO "TLB invalidated successfully\n" LOG_END); }
static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv) { gen9_gt_workarounds_apply(dev_priv); /* WaDisablePooledEuLoadBalancingFix:bxt */ I915_WRITE(FF_SLICE_CS_CHICKEN2, _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); /* WaInPlaceDecompressionHang:bxt */ I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); }
static int gen8_ppgtt_enable(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; int i, j, ret; /* bit of a hack to find the actual last used pd */ int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); }
static void vlv_force_wake_get(struct drm_i915_private *dev_priv) { if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); __raw_i915_write32(dev_priv, FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n"); if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("Timed out waiting for media to ack forcewake request.\n"); /* WaRsForcewakeWaitTC0:vlv */ __gen6_gt_wait_for_thread_c0(dev_priv); }
void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) { int count; count = 0; while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_MT_ACK) & 1)) udelay(10); I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1)); POSTING_READ(FORCEWAKE_MT); count = 0; while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_MT_ACK) & 1) == 0) udelay(10); }
static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) { /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) I915_WRITE(GAMT_CHKN_BIT_REG, I915_READ(GAMT_CHKN_BIT_REG) | GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); /* WaInPlaceDecompressionHang:cnl */ I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); /* WaEnablePreemptionGranularityControlByUMD:cnl */ I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); }
static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; u32 tmp; /* tell all command streamers to forward interrupts (but not vblank) to GuC */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MODE_GEN7(engine), irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; /* These three registers have the same bit definitions */ I915_WRITE(GUC_BCS_RCS_IER, ~irqs); I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); I915_WRITE(GUC_WD_VECS_IER, ~irqs); /* * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all * (unmasked) PM interrupts to the GuC. All other bits of this * register *disable* generation of a specific interrupt. * * 'pm_intr_keep' indicates bits that are NOT to be set when * writing to the PM interrupt mask register, i.e. interrupts * that must not be disabled. * * If the GuC is handling these interrupts, then we must not let * the PM code disable ANY interrupt that the GuC is expecting. * So for each ENABLED (0) bit in this register, we must SET the * bit in pm_intr_keep so that it's left enabled for the GuC. * * OTOH the REDIRECT_TO_GUC bit is initially SET in pm_intr_keep * (so interrupts go to the DISPLAY unit at first); but here we * need to CLEAR that bit, which will result in the register bit * being left SET! */ tmp = I915_READ(GEN6_PMINTRMSK); if (tmp & GEN8_PMINTR_REDIRECT_TO_GUC) { dev_priv->rps.pm_intr_keep |= ~tmp; dev_priv->rps.pm_intr_keep &= ~GEN8_PMINTR_REDIRECT_TO_GUC; } }
static void bdw_forcewake_get(void __iomem *mmio) { WR(_MASKED_BIT_DISABLE(0xffff), FORCEWAKE_MT); RD(ECOBUS); if (wait_for((RD(FORCEWAKE_ACK_HSW) & FORCEWAKE_KERNEL) == 0, 50)) gvt_err("fail to wait forcewake idle\n"); WR(_MASKED_BIT_ENABLE(FORCEWAKE_KERNEL), FORCEWAKE_MT); if (wait_for((RD(FORCEWAKE_ACK_HSW) & FORCEWAKE_KERNEL), 50)) gvt_err("fail to wait forcewake ack\n"); if (wait_for((RD(GEN6_GT_THREAD_STATUS_REG) & GEN6_GT_THREAD_STATUS_CORE_MASK) == 0, 50)) gvt_err("fail to wait c0 wake up\n"); }
static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv) { struct intel_engine_cs *ring; int i, irqs; /* tell all command streamers to forward interrupts and vblank to GuC */ irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_ALWAYS); irqs |= _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_ring(ring, dev_priv, i) I915_WRITE(RING_MODE_GEN7(ring), irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; /* These three registers have the same bit definitions */ I915_WRITE(GUC_BCS_RCS_IER, ~irqs); I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs); I915_WRITE(GUC_WD_VECS_IER, ~irqs); }
/** * huc_fw_xfer() - DMA's the firmware * @huc_fw: the firmware descriptor * @vma: the firmware image (bound into the GGTT) * * Transfer the firmware image to RAM for execution by the microcontroller. * * Return: 0 on success, non-zero on failure */ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) { struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); struct drm_i915_private *dev_priv = huc_to_i915(huc); unsigned long offset = 0; u32 size; int ret; GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Set the source address for the uCode */ offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + huc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); /* Hardware doesn't look at destination address for HuC. Set it to 0, * but still program the correct address space. */ I915_WRITE(DMA_ADDR_1_LOW, 0); I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); size = huc_fw->header_size + huc_fw->ucode_size; I915_WRITE(DMA_COPY_SIZE, size); /* Start the DMA */ I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); /* Wait for DMA to finish */ ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); /* Disable the bits once DMA is over */ I915_WRITE(DMA_CTRL, _MASKED_BIT_DISABLE(HUC_UKERNEL)); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; }
/* * Transfer the firmware image to RAM for execution by the microcontroller. * * Architecturally, the DMA engine is bidirectional, and can potentially even * transfer between GTT locations. This functionality is left out of the API * for now as there is no need for it. */ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_uc_fw *guc_fw = &guc->fw; unsigned long offset; u32 status; int ret; /* * The header plus uCode will be copied to WOPCM via DMA, excluding any * other components */ I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ offset = guc_ggtt_offset(vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); /* * Set the DMA destination. Current uCode expects the code to be * loaded at 8k; locations below this are used for the stack. */ I915_WRITE(DMA_ADDR_1_LOW, 0x2000); I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* Finally start the DMA */ I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); /* Wait for DMA to finish */ ret = __intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 2, 100, &status); DRM_DEBUG_DRIVER("GuC DMA status %#x\n", status); return ret; }
static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) { /* This is not an Wa. Enable for better image quality */ I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); /* WaInPlaceDecompressionHang:icl */ I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); /* WaPipelineFlushCoherentLines:icl */ I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES); /* Wa_1405543622:icl * Formerly known as WaGAPZPriorityScheme */ I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | GEN11_ARBITRATION_PRIO_ORDER_MASK); /* Wa_1604223664:icl * Formerly known as WaL3BankAddressHashing */ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | GEN11_HASH_CTRL_EXCL_BIT0); I915_WRITE(GEN11_GLBLINVL, (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | GEN11_BANK_HASH_ADDR_EXCL_BIT0); /* WaModifyGamTlbPartitioning:icl */ I915_WRITE(GEN11_GACB_PERF_CTRL, (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); /* Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN11_LQSC_CLEAN_EVICT_DISABLE); /* Wa_1405766107:icl * Formerly known as WaCL2SFHalfMaxAlloc */ I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); /* Wa_220166154:icl * Formerly known as WaDisCtxReload */ I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | GAMW_ECO_DEV_CTX_RELOAD_DISABLE); /* Wa_1405779004:icl (pre-prod) */ if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | MSCUNIT_CLKGATE_DIS); /* Wa_1406680159:icl */ I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | GWUNIT_CLKGATE_DIS); /* Wa_1604302699:icl */ I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | GEN11_I2M_WRITE_DISABLE); /* Wa_1406838659:icl (pre-prod) */ if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) I915_WRITE(INF_UNIT_LEVEL_CLKGATE, I915_READ(INF_UNIT_LEVEL_CLKGATE) | CGPSF_CLKGATE_DIS); /* WaForwardProgressSoftReset:icl */ I915_WRITE(GEN10_SCRATCH_LNCF2, I915_READ(GEN10_SCRATCH_LNCF2) | PMFLUSHDONE_LNICRSDROP | PMFLUSH_GAPL3UNBLOCK | PMFLUSHDONE_LNEBLK); }
int nugpgpu_ringbuffer_render_init(struct nugpgpu_private *gpu_priv) { int ret; u32 head; printk(LOG_INFO "nugpgpu_ringbuffer_render_init\n" LOG_END); TRACE_IN RING->mmio_base = RENDER_RING_BASE; RING->size = PAGE_SIZE * RING_PAGES; /* Allocate the status page. */ ret = allocate_object(gpu_priv, &RING->status_obj, 1); if (ret){ printk(LOG_ERR "Failed to allocate the status page\n" LOG_END); return 1; } RING->gva_status = nugpgpu_gtt_insert(gpu_priv, RING->status_obj.pg_list, NUGPGPU_CACHE_LLC); if (RING->gva_status == (unsigned int)-1){ printk(LOG_ERR "Failed to insert the status page in gtt\n" LOG_END); return 1; } printk(LOG_INFO "RING->gva_status : 0x%x\n" LOG_END, (unsigned int) RING->gva_status); RING->page_status = kmap(sg_page(RING->status_obj.pg_list->sgl)); if (RING->page_status == NULL) { printk(LOG_ERR "Failed to map page_status\n" LOG_END); return 1; } memset(RING->page_status, 0, PAGE_SIZE); printk(LOG_INFO "RING->page_status : 0x%lx\n" LOG_END, (unsigned long) RING->page_status); /* Allocate the ringbuffer object */ ret = allocate_object(gpu_priv, &RING->ringbuf_obj, RING_PAGES); if (ret){ printk(LOG_ERR "Failed to allocate the status page\n" LOG_END); return 1; } RING->gva_ringbuffer = nugpgpu_gtt_insert(gpu_priv, RING->ringbuf_obj.pg_list, NUGPGPU_CACHE_LLC); if (RING->gva_ringbuffer == (unsigned int)-1){ printk(LOG_ERR "Failed to insert the status page in gtt\n" LOG_END); return 1; } printk(LOG_INFO "RING->gva_ringbuffer : 0x%x\n" LOG_END, (unsigned int) RING->gva_ringbuffer); RING->page_ringbuffer = kmap(sg_page(RING->ringbuf_obj.pg_list->sgl)); if (RING->page_ringbuffer == NULL) { printk(LOG_ERR "Failed to map page_ringbuffer\n" LOG_END); return 1; } RING->virtual_start = ioremap_wc(gpu_priv->gtt.mappable_base + PAGE_SIZE, RING->size); if (RING->virtual_start == NULL) { printk(LOG_ERR "Problem while mapping virtual start ioremap_wc\n" LOG_END); return 1; } printk(LOG_INFO "Allocated the ringbuffer\n" LOG_END); /* Initialize the ring now.*/ gpu_forcewake_get(gpu_priv); /* Write status page register */ printk(LOG_INFO "writing status page register\n" LOG_END); NUGPGPU_WRITE(RENDER_HWS_PGA_GEN7, RING->gva_status); NUGPGPU_READ(RENDER_HWS_PGA_GEN7); flushtlb(gpu_priv); // Stop ring printk(LOG_INFO "stopping ring\n" LOG_END); RING_WRITE_CTL(RING, 0); RING_WRITE_HEAD(RING, 0); RING_WRITE_TAIL(RING, 0); // The doc says this enforces ordering between multiple writes head = RING_READ_HEAD(RING) & RING_HEAD_ADDR; if ( head !=0 ){ printk(LOG_ERR "failed to set head to zero\n" LOG_END); RING_WRITE_HEAD(RING, 0); if (RING_READ_HEAD(RING) & RING_HEAD_ADDR) { printk(LOG_ERR "failed to set ring head to zero " "ctl %08x head %08x tail %08x start %08x\n" LOG_END, RING_READ_CTL(RING), RING_READ_HEAD(RING), RING_READ_TAIL(RING), RING_READ_START(RING)); } } /* i915 driver says the below line...?? */ /* Enforce ordering by reading HEAD register back */ RING_READ_HEAD(RING); /* Comment taken directly from i915 driver */ /* Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ RING_WRITE_START(RING, RING->gva_ringbuffer); RING_WRITE_CTL(RING, (((RING->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID)); /* If the head is still not zero, the ring is dead */ if( wait_for((RING_READ_CTL(RING) & RING_VALID) != 0 && RING_READ_START(RING) == RING->gva_ringbuffer && (RING_READ_HEAD(RING) & RING_HEAD_ADDR) == 0, 50) ){ printk(LOG_ERR "ring failed to start ring\n" LOG_END); return -EIO; } RING->head = RING_READ_HEAD(RING); RING->tail = RING_READ_TAIL(RING) & RING_TAIL_ADDR; RING->space = ring_space(RING); printk(LOG_INFO "ring->space = %d\n" LOG_END, RING->space); gpu_forcewake_put(gpu_priv); RING_WRITE_MODE(RING, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); RING_WRITE_MODE(RING, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); RING_WRITE_MODE_GEN7(RING, _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); RING_WRITE_INSTPM(RING, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); dword_check(gpu_priv, RING, temp); TRACE_OUT return 0; }