VkResult anv_CreateSwapchainKHR( VkDevice _device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); struct anv_wsi_interface *iface = device->instance->physicalDevice.wsi[surface->platform]; struct anv_swapchain *swapchain; VkResult result = iface->create_swapchain(surface, device, pCreateInfo, pAllocator, &swapchain); if (result != VK_SUCCESS) return result; if (pAllocator) swapchain->alloc = *pAllocator; else swapchain->alloc = device->alloc; for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) swapchain->fences[i] = VK_NULL_HANDLE; *pSwapchain = anv_swapchain_to_handle(swapchain); return VK_SUCCESS; }
void anv_DestroySurfaceKHR( VkInstance _instance, VkSurfaceKHR _surface, const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_instance, instance, _instance); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); anv_free2(&instance->alloc, pAllocator, surface); }
VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR _surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct anv_wsi_interface *iface = device->wsi[surface->platform]; return iface->get_capabilities(surface, device, pSurfaceCapabilities); }
void anv_DestroyQueryPool( VkDevice _device, VkQueryPool _pool, const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, _pool); anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); vk_free2(&device->alloc, pAllocator, pool); }
VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR _surface, VkBool32* pSupported) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct anv_wsi_interface *iface = device->wsi[surface->platform]; return iface->get_support(surface, device, queueFamilyIndex, pSupported); }
VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR _surface, uint32_t* pPresentModeCount, VkPresentModeKHR* pPresentModes) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct anv_wsi_interface *iface = device->wsi[surface->platform]; return iface->get_present_modes(surface, device, pPresentModeCount, pPresentModes); }
VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR _surface, uint32_t* pSurfaceFormatCount, VkSurfaceFormatKHR* pSurfaceFormats) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); struct anv_wsi_interface *iface = device->wsi[surface->platform]; return iface->get_formats(surface, device, pSurfaceFormatCount, pSurfaceFormats); }
VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, struct wl_display* display) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); return wsi_wl_get_presentation_support(&physical_device->wsi_device, display); }
VkResult anv_GetSwapchainImagesKHR( VkDevice device, VkSwapchainKHR _swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages) { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); return swapchain->get_images(swapchain, pSwapchainImageCount, pSwapchainImages); }
void anv_GetPhysicalDeviceFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); anv_physical_device_get_format_properties( physical_device, format, pFormatProperties); }
VkResult anv_QueuePresentKHR( VkQueue _queue, const VkPresentInfoKHR* pPresentInfo) { ANV_FROM_HANDLE(anv_queue, queue, _queue); VkResult result; for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); assert(swapchain->device == queue->device); if (swapchain->fences[0] == VK_NULL_HANDLE) { result = anv_CreateFence(anv_device_to_handle(queue->device), &(VkFenceCreateInfo) { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = 0, }, &swapchain->alloc, &swapchain->fences[0]); if (result != VK_SUCCESS) return result; } else {
VkResult anv_AcquireNextImageKHR( VkDevice device, VkSwapchainKHR _swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); return swapchain->acquire_next_image(swapchain, timeout, semaphore, pImageIndex); }
void anv_DestroySwapchainKHR( VkDevice device, VkSwapchainKHR _swapchain, const VkAllocationCallbacks* pAllocator) { ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) { if (swapchain->fences[i] != VK_NULL_HANDLE) anv_DestroyFence(device, swapchain->fences[i], pAllocator); } swapchain->destroy(swapchain, pAllocator); }
VkResult anv_CreateQueryPool( VkDevice _device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_query_pool *pool; VkResult result; uint32_t slot_size; uint64_t size; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: return VK_ERROR_INCOMPATIBLE_DRIVER; default: assert(!"Invalid query type"); } slot_size = sizeof(struct anv_query_pool_slot); pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pool->type = pCreateInfo->queryType; pool->slots = pCreateInfo->queryCount; size = pCreateInfo->queryCount * slot_size; result = anv_bo_init_new(&pool->bo, device, size); if (result != VK_SUCCESS) goto fail; pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); *pQueryPool = anv_query_pool_to_handle(pool); return VK_SUCCESS; fail: vk_free2(&device->alloc, pAllocator, pool); return result; }
VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, xcb_connection_t* connection, xcb_visualid_t visual_id) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); return wsi_get_physical_device_xcb_presentation_support( &device->wsi_device, &device->instance->alloc, queueFamilyIndex, device->local_fd, false, connection, visual_id); }
VkBool32 anv_GetPhysicalDeviceXlibPresentationSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, Display* dpy, VisualID visualID) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); return wsi_get_physical_device_xcb_presentation_support( &device->wsi_device, &device->instance->alloc, queueFamilyIndex, device->local_fd, false, XGetXCBConnection(dpy), visualID); }
VkResult anv_CreateWaylandSurfaceKHR( VkInstance _instance, const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface) { ANV_FROM_HANDLE(anv_instance, instance, _instance); const VkAllocationCallbacks *alloc; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); if (pAllocator) alloc = pAllocator; else alloc = &instance->alloc; return wsi_create_wl_surface(alloc, pCreateInfo, pSurface); }
void anv_CmdResetQueryPool( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); for (uint32_t i = 0; i < queryCount; i++) { switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: { struct anv_query_pool_slot *slot = pool->bo.map; slot[firstQuery + i].available = 0; break; } default: assert(!"Invalid query type"); } } }
VkResult anv_CreateDmaBufImageINTEL( VkDevice _device, const VkDmaBufImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_device_memory *mem; struct anv_image *image; VkResult result; VkImage image_h; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); result = anv_image_create(_device, &(struct anv_image_create_info) { .isl_tiling_flags = ISL_TILING_X_BIT, .stride = pCreateInfo->strideInBytes, .vk_info = &(VkImageCreateInfo) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = pCreateInfo->format, .extent = pCreateInfo->extent, .mipLevels = 1, .arrayLayers = 1, .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, .flags = 0, }},
VkResult anv_GetQueryPoolResults( VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void* pData, VkDeviceSize stride, VkQueryResultFlags flags) { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); int64_t timeout = INT64_MAX; uint64_t result; int ret; assert(pool->type == VK_QUERY_TYPE_OCCLUSION || pool->type == VK_QUERY_TYPE_TIMESTAMP); if (pData == NULL) return VK_SUCCESS; if (flags & VK_QUERY_RESULT_WAIT_BIT) { ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); if (ret == -1) { /* We don't know the real error. */ return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "gem_wait failed %m"); } } void *data_end = pData + dataSize; struct anv_query_pool_slot *slot = pool->bo.map; for (uint32_t i = 0; i < queryCount; i++) { switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: { result = slot[firstQuery + i].end - slot[firstQuery + i].begin; break; } case VK_QUERY_TYPE_PIPELINE_STATISTICS: unreachable("pipeline stats not supported"); case VK_QUERY_TYPE_TIMESTAMP: { result = slot[firstQuery + i].begin; break; } default: unreachable("invalid pool type"); } if (flags & VK_QUERY_RESULT_64_BIT) { uint64_t *dst = pData; dst[0] = result; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) dst[1] = slot[firstQuery + i].available; } else { uint32_t *dst = pData; if (result > UINT32_MAX) result = UINT32_MAX; dst[0] = result; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) dst[1] = slot[firstQuery + i].available; } pData += stride; if (pData >= data_end) break; } return VK_SUCCESS; }
static VkResult compute_pipeline_create( VkDevice _device, struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); const struct anv_physical_device *physical_device = &device->instance->physicalDevice; const struct gen_device_info *devinfo = &physical_device->info; struct anv_pipeline *pipeline; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); pipeline->blend_state.map = NULL; result = anv_reloc_list_init(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (result != VK_SUCCESS) { vk_free2(&device->alloc, pAllocator, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->needs_data_cache = false; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, pCreateInfo->stage.pSpecializationInfo); if (result != VK_SUCCESS) { vk_free2(&device->alloc, pAllocator, pipeline); return result; } const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0); uint32_t group_size = cs_prog_data->local_size[0] * cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); if (remainder > 0) pipeline->cs_right_mask = ~0u >> (32 - remainder); else
VkResult anv_GetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags createFlags, VkImageFormatProperties* pImageFormatProperties) { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); VkFormatProperties format_props; VkFormatFeatureFlags format_feature_flags; VkExtent3D maxExtent; uint32_t maxMipLevels; uint32_t maxArraySize; VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; anv_physical_device_get_format_properties(physical_device, format, &format_props); /* Extract the VkFormatFeatureFlags that are relevant for the queried * tiling. */ if (tiling == VK_IMAGE_TILING_LINEAR) { format_feature_flags = format_props.linearTilingFeatures; } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { format_feature_flags = format_props.optimalTilingFeatures; } else { unreachable("bad VkImageTiling"); } switch (type) { default: unreachable("bad VkImageType"); case VK_IMAGE_TYPE_1D: maxExtent.width = 16384; maxExtent.height = 1; maxExtent.depth = 1; maxMipLevels = 15; /* log2(maxWidth) + 1 */ maxArraySize = 2048; sampleCounts = VK_SAMPLE_COUNT_1_BIT; break; case VK_IMAGE_TYPE_2D: /* FINISHME: Does this really differ for cube maps? The documentation * for RENDER_SURFACE_STATE suggests so. */ maxExtent.width = 16384; maxExtent.height = 16384; maxExtent.depth = 1; maxMipLevels = 15; /* log2(maxWidth) + 1 */ maxArraySize = 2048; break; case VK_IMAGE_TYPE_3D: maxExtent.width = 2048; maxExtent.height = 2048; maxExtent.depth = 2048; maxMipLevels = 12; /* log2(maxWidth) + 1 */ maxArraySize = 1; break; } if (tiling == VK_IMAGE_TILING_OPTIMAL && type == VK_IMAGE_TYPE_2D && (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); } if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { /* Meta implements transfers by sampling from the source image. */ if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; } } #if 0 if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { if (anv_format_for_vk_format(format)->has_stencil) { /* Not yet implemented because copying to a W-tiled surface is crazy * hard. */ anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " "stencil format"); goto unsupported; } } #endif if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; } } if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { goto unsupported; } } if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { goto unsupported; } } if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { goto unsupported; } } if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { /* Nothing to check. */ } if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { /* Ignore this flag because it was removed from the * provisional_I_20150910 header. */ } *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, .maxArrayLayers = maxArraySize, .sampleCounts = sampleCounts, /* FINISHME: Accurately calculate * VkImageFormatProperties::maxResourceSize. */ .maxResourceSize = UINT32_MAX, }; return VK_SUCCESS; unsupported: *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = { 0, 0, 0 }, .maxMipLevels = 0, .maxArrayLayers = 0, .sampleCounts = 0, .maxResourceSize = 0, }; return VK_ERROR_FORMAT_NOT_SUPPORTED; }
VkResult genX(compute_pipeline_create)( VkDevice _device, struct anv_pipeline_cache * cache, const VkComputePipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_pipeline *pipeline; VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pipeline == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->device = device; pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); pipeline->blend_state.map = NULL; result = anv_reloc_list_init(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (result != VK_SUCCESS) { anv_free2(&device->alloc, pAllocator, pipeline); return result; } pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; pipeline->total_scratch = 0; assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, pCreateInfo->stage.pName, pCreateInfo->stage.pSpecializationInfo); pipeline->use_repclear = false; const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; unsigned push_constant_data_size = (prog_data->nr_params + local_id_dwords) * 4; unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; uint32_t group_size = cs_prog_data->local_size[0] * cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, cs_prog_data->simd_size); uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); if (remainder > 0) pipeline->cs_right_mask = ~0u >> (32 - remainder); else