static void gen7_upload_urb(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const int push_size_kB = intel->is_haswell && intel->gt == 3 ? 32 : 16; /* Total space for entries is URB size - 16kB for push constants */ int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */ /* CACHE_NEW_VS_PROG */ unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); int nr_vs_entries = handle_region_size / (vs_size * 64); if (nr_vs_entries > brw->urb.max_vs_entries) nr_vs_entries = brw->urb.max_vs_entries; /* According to volume 2a, nr_vs_entries must be a multiple of 8. */ brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8); /* URB Starting Addresses are specified in multiples of 8kB. */ brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */ assert(brw->urb.nr_vs_entries % 8 == 0); assert(brw->urb.nr_gs_entries % 8 == 0); /* GS requirement */ assert(!brw->gs.prog_active); gen7_emit_vs_workaround_flush(intel); gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start); }
/* 3DSTATE_URB_VS * 3DSTATE_URB_HS * 3DSTATE_URB_DS * 3DSTATE_URB_GS * * If the 3DSTATE_URB_VS is emitted, than the others must be also. * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: * * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be * programmed in order for the programming of this state to be * valid. */ static void gen7_blorp_emit_urb_config(struct brw_context *brw) { unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16; gen7_emit_push_constant_state(brw, urb_size / 2 /* vs_size */, 0 /* hs_size */, 0 /* ds_size */, 0 /* gs_size */, urb_size / 2 /* fs_size */); /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword * 1.15:0 "VS Number of URB Entries". */ gen7_emit_urb_state(brw, 32 /* num_vs_entries */, 2 /* vs_size */, 2 /* vs_start */, 0 /* num_hs_entries */, 1 /* hs_size */, 2 /* hs_start */, 0 /* num_ds_entries */, 1 /* ds_size */, 2 /* ds_start */, 0 /* num_gs_entries */, 1 /* gs_size */, 2 /* gs_start */); }
/* 3DSTATE_URB_VS * 3DSTATE_URB_HS * 3DSTATE_URB_DS * 3DSTATE_URB_GS * * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be * programmed in order for the programming of this state to be * valid. */ static void gen7_blorp_emit_urb_config(struct brw_context *brw, const brw_blorp_params *params) { /* The minimum valid value is 32. See 3DSTATE_URB_VS, * Dword 1.15:0 "VS Number of URB Entries". */ int num_vs_entries = 32; int vs_size = 2; int vs_start = 2; /* skip over push constants */ gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start); }
/* 3DSTATE_URB_VS * 3DSTATE_URB_HS * 3DSTATE_URB_DS * 3DSTATE_URB_GS * * If the 3DSTATE_URB_VS is emitted, than the others must be also. * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: * * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be * programmed in order for the programming of this state to be * valid. */ void gen7_blorp_emit_urb_config(struct brw_context *brw) { /* URB allocations must be done in 8k chunks. */ const unsigned chunk_size_bytes = 8192; const unsigned urb_size = (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16; const unsigned push_constant_bytes = 1024 * urb_size; const unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; const unsigned vs_size = 1; const unsigned vs_start = push_constant_chunks; const unsigned vs_chunks = DIV_ROUND_UP(brw->urb.min_vs_entries * vs_size * 64, chunk_size_bytes); if (gen7_blorp_skip_urb_config(brw)) return; brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; gen7_emit_push_constant_state(brw, urb_size / 2 /* vs_size */, 0 /* hs_size */, 0 /* ds_size */, 0 /* gs_size */, urb_size / 2 /* fs_size */); gen7_emit_urb_state(brw, brw->urb.min_vs_entries /* num_vs_entries */, vs_size, vs_start, 0 /* num_hs_entries */, 1 /* hs_size */, vs_start + vs_chunks /* hs_start */, 0 /* num_ds_entries */, 1 /* ds_size */, vs_start + vs_chunks /* ds_start */, 0 /* num_gs_entries */, 1 /* gs_size */, vs_start + vs_chunks /* gs_start */); }
static void gen7_upload_urb(struct brw_context *brw) { const int push_size_kB = (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16; /* BRW_NEW_VS_PROG_DATA */ unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1); unsigned vs_entry_size_bytes = vs_size * 64; /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */ bool gs_present = brw->geometry_program; unsigned gs_size = gs_present ? brw->gs.prog_data->base.urb_entry_size : 1; unsigned gs_entry_size_bytes = gs_size * 64; /* If we're just switching between programs with the same URB requirements, * skip the rest of the logic. */ if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) && brw->urb.vsize == vs_size && brw->urb.gs_present == gs_present && brw->urb.gsize == gs_size) { return; } brw->urb.vsize = vs_size; brw->urb.gs_present = gs_present; brw->urb.gsize = gs_size; /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): * * VS Number of URB Entries must be divisible by 8 if the VS URB Entry * Allocation Size is less than 9 512-bit URB entries. * * Similar text exists for GS. */ unsigned vs_granularity = (vs_size < 9) ? 8 : 1; unsigned gs_granularity = (gs_size < 9) ? 8 : 1; /* URB allocations must be done in 8k chunks. */ unsigned chunk_size_bytes = 8192; /* Determine the size of the URB in chunks. */ unsigned urb_chunks = brw->urb.size * 1024 / chunk_size_bytes; /* Reserve space for push constants */ unsigned push_constant_bytes = 1024 * push_size_kB; unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; /* Initially, assign each stage the minimum amount of URB space it needs, * and make a note of how much additional space it "wants" (the amount of * additional space it could actually make use of). */ /* VS has a lower limit on the number of URB entries */ unsigned vs_chunks = ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes; unsigned vs_wants = ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes - vs_chunks; unsigned gs_chunks = 0; unsigned gs_wants = 0; if (gs_present) { /* There are two constraints on the minimum amount of URB space we can * allocate: * * (1) We need room for at least 2 URB entries, since we always operate * the GS in DUAL_OBJECT mode. * * (2) We can't allocate less than nr_gs_entries_granularity. */ gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes; gs_wants = ALIGN(brw->urb.max_gs_entries * gs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes - gs_chunks; } /* There should always be enough URB space to satisfy the minimum * requirements of each stage. */ unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; assert(total_needs <= urb_chunks); /* Mete out remaining space (if any) in proportion to "wants". */ unsigned total_wants = vs_wants + gs_wants; unsigned remaining_space = urb_chunks - total_needs; if (remaining_space > total_wants) remaining_space = total_wants; if (remaining_space > 0) { unsigned vs_additional = (unsigned) roundf(vs_wants * (((float) remaining_space) / total_wants)); vs_chunks += vs_additional; remaining_space -= vs_additional; gs_chunks += remaining_space; } /* Sanity check that we haven't over-allocated. */ assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); /* Finally, compute the number of entries that can fit in the space * allocated to each stage. */ unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; /* Since we rounded up when computing *_wants, this may be slightly more * than the maximum allowed amount, so correct for that. */ nr_vs_entries = MIN2(nr_vs_entries, brw->urb.max_vs_entries); nr_gs_entries = MIN2(nr_gs_entries, brw->urb.max_gs_entries); /* Ensure that we program a multiple of the granularity. */ nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); /* Finally, sanity check to make sure we have at least the minimum number * of entries needed for each stage. */ assert(nr_vs_entries >= brw->urb.min_vs_entries); if (gs_present) assert(nr_gs_entries >= 2); /* Gen7 doesn't actually use brw->urb.nr_{vs,gs}_entries, but it seems * better to put reasonable data in there rather than leave them * uninitialized. */ brw->urb.nr_vs_entries = nr_vs_entries; brw->urb.nr_gs_entries = nr_gs_entries; /* Lay out the URB in the following order: * - push constants * - VS * - GS */ brw->urb.vs_start = push_constant_chunks; brw->urb.gs_start = push_constant_chunks + vs_chunks; if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start, brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start); }