static VkResult compute_pipeline_create( VkDevice _device, struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); const struct anv_physical_device *physical_device = &device->instance->physicalDevice; const struct gen_device_info *devinfo = &physical_device->info; struct anv_pipeline *pipeline; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); pipeline->blend_state.map = NULL; result = anv_reloc_list_init(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (result != VK_SUCCESS) { vk_free2(&device->alloc, pAllocator, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->needs_data_cache = false; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, pCreateInfo->stage.pSpecializationInfo); if (result != VK_SUCCESS) { vk_free2(&device->alloc, pAllocator, pipeline); return result; } const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0); uint32_t group_size = cs_prog_data->local_size[0] * cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); if (remainder > 0) pipeline->cs_right_mask = ~0u >> (32 - remainder); else
VkResult genX(compute_pipeline_create)( VkDevice _device, struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline *pipeline; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); pipeline->blend_state.map = NULL; result = anv_reloc_list_init(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (result != VK_SUCCESS) { anv_free2(&device->alloc, pAllocator, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, pCreateInfo->stage.pSpecializationInfo); pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; unsigned push_constant_data_size = (prog_data->nr_params + local_id_dwords) * 4; unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; uint32_t group_size = cs_prog_data->local_size[0] * cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, cs_prog_data->simd_size); uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); if (remainder > 0) pipeline->cs_right_mask = ~0u >> (32 - remainder); else