/** * Create new pipe_resource given the template information. */ static struct pipe_resource * softpipe_resource_create(struct pipe_screen *screen, const struct pipe_resource *templat) { struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource); if (!spr) return NULL; assert(templat->format != PIPE_FORMAT_NONE); spr->base = *templat; pipe_reference_init(&spr->base.reference, 1); spr->base.screen = screen; spr->pot = (util_is_power_of_two(templat->width0) && util_is_power_of_two(templat->height0) && util_is_power_of_two(templat->depth0)); if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { if (!softpipe_displaytarget_layout(screen, spr)) goto fail; } else { if (!softpipe_resource_layout(screen, spr)) goto fail; } return &spr->base; fail: FREE(spr); return NULL; }
static unsigned nvfx_miptree_layout(struct nvfx_miptree *mt) { struct pipe_resource* pt = &mt->base.base; uint offset = 0; if(!nvfx_screen(pt->screen)->is_nv4x) { assert(pt->target == PIPE_TEXTURE_RECT || (util_is_power_of_two(pt->width0) && util_is_power_of_two(pt->height0))); } for (unsigned l = 0; l <= pt->last_level; l++) { unsigned size; mt->level_offset[l] = offset; if(mt->linear_pitch) size = mt->linear_pitch; else size = util_format_get_stride(pt->format, u_minify(pt->width0, l)); size = util_format_get_2d_size(pt->format, size, u_minify(pt->height0, l)); if(pt->target == PIPE_TEXTURE_3D) size *= u_minify(pt->depth0, l); offset += size; } offset = align(offset, 128); mt->face_size = offset; if(mt->base.base.target == PIPE_TEXTURE_CUBE) offset += 5 * mt->face_size; return offset; }
static struct pipe_resource * softpipe_resource_from_handle(struct pipe_screen *screen, const struct pipe_resource *templat, struct winsys_handle *whandle) { struct sw_winsys *winsys = softpipe_screen(screen)->winsys; struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource); if (!spr) return NULL; spr->base = *templat; pipe_reference_init(&spr->base.reference, 1); spr->base.screen = screen; spr->pot = (util_is_power_of_two(templat->width0) && util_is_power_of_two(templat->height0) && util_is_power_of_two(templat->depth0)); spr->dt = winsys->displaytarget_from_handle(winsys, templat, whandle, &spr->stride[0]); if (!spr->dt) goto fail; return &spr->base; fail: FREE(spr); return NULL; }
static struct pipe_sampler_view *etna_pipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templat) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view); sv->base = *templat; sv->base.context = pipe; sv->base.texture = 0; pipe_resource_reference(&sv->base.texture, texture); sv->base.texture = texture; assert(sv->base.texture); struct compiled_sampler_view *cs = CALLOC_STRUCT(compiled_sampler_view); struct etna_resource *res = etna_resource(sv->base.texture); assert(res != NULL); cs->TE_SAMPLER_CONFIG0 = VIVS_TE_SAMPLER_CONFIG0_TYPE(translate_texture_target(res->base.target, false)) | VIVS_TE_SAMPLER_CONFIG0_FORMAT(translate_texture_format(sv->base.format, false)); /* merged with sampler state */ cs->TE_SAMPLER_CONFIG0_MASK = 0xffffffff; cs->TE_SAMPLER_CONFIG1 = VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_R(templat->swizzle_r) | VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_G(templat->swizzle_g) | VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_B(templat->swizzle_b) | VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A(templat->swizzle_a) | VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign); cs->TE_SAMPLER_SIZE = VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0)| VIVS_TE_SAMPLER_SIZE_HEIGHT(res->base.height0); cs->TE_SAMPLER_LOG_SIZE = VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) | VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(res->base.height0)); /* Set up levels-of-detail */ for(int lod=0; lod<=res->base.last_level; ++lod) { cs->TE_SAMPLER_LOD_ADDR[lod] = etna_bo_gpu_address(res->bo) + res->levels[lod].offset; } cs->min_lod = sv->base.u.tex.first_level << 5; cs->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5; /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is supported when the * appropriate capability is not set. */ if(!priv->specs.npot_tex_any_wrap && (!util_is_power_of_two(res->base.width0) || !util_is_power_of_two(res->base.height0))) { cs->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK | VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK); cs->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE); } sv->internal = cs; pipe_reference_init(&sv->base.reference, 1); return &sv->base; }
static void r300_setup_flags(struct r300_resource *tex) { tex->tex.uses_stride_addressing = !util_is_power_of_two(tex->b.b.b.width0) || (tex->tex.stride_in_bytes_override && stride_to_width(tex->b.b.b.format, tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); tex->tex.is_npot = tex->tex.uses_stride_addressing || !util_is_power_of_two(tex->b.b.b.height0) || !util_is_power_of_two(tex->b.b.b.depth0); }
static void r300_setup_flags(struct r300_texture_desc *desc) { desc->uses_stride_addressing = !util_is_power_of_two(desc->b.b.width0) || !util_is_power_of_two(desc->b.b.height0) || (desc->stride_in_bytes_override && stride_to_width(desc->b.b.format, desc->stride_in_bytes_override) != desc->b.b.width0); desc->is_npot = desc->uses_stride_addressing || !util_is_power_of_two(desc->b.b.height0); }
static void nvfx_miptree_choose_format(struct nvfx_miptree *mt) { struct pipe_resource *pt = &mt->base.base; unsigned uniform_pitch = 0; static int no_swizzle = -1; if(no_swizzle < 0) no_swizzle = debug_get_bool_option("NV40_NO_SWIZZLE", FALSE); /* this will break things on nv30 */ if (!util_is_power_of_two(pt->width0) || !util_is_power_of_two(pt->height0) || !util_is_power_of_two(pt->depth0) || (!nvfx_screen(pt->screen)->is_nv4x && pt->target == PIPE_TEXTURE_RECT) ) uniform_pitch = 1; if ( (pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET)) || (pt->usage & PIPE_USAGE_DYNAMIC) || (pt->usage & PIPE_USAGE_STAGING) || util_format_is_compressed(pt->format) || no_swizzle ) mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; /* non compressed formats with uniform pitch must be linear, and vice versa */ if(!util_format_is_s3tc(pt->format) && (uniform_pitch || mt->base.base.flags & NVFX_RESOURCE_FLAG_LINEAR)) { mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR; uniform_pitch = 1; } if(uniform_pitch) { mt->linear_pitch = util_format_get_stride(pt->format, pt->width0); // TODO: this is only a constraint for rendering and not sampling, apparently // we may also want this unconditionally if(pt->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT)) mt->linear_pitch = align(mt->linear_pitch, 64); } else mt->linear_pitch = 0; }
/** * Concatenates several (must be a power of 2) vectors (of same type) * into a larger one. * Most useful for building up a 256bit sized vector out of two 128bit ones. */ LLVMValueRef lp_build_concat(struct gallivm_state *gallivm, LLVMValueRef src[], struct lp_type src_type, unsigned num_vectors) { unsigned new_length, i; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH/2]; LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; assert(src_type.length * num_vectors <= Elements(shuffles)); assert(util_is_power_of_two(num_vectors)); new_length = src_type.length; for (i = 0; i < num_vectors; i++) tmp[i] = src[i]; while (num_vectors > 1) { num_vectors >>= 1; new_length <<= 1; for (i = 0; i < new_length; i++) { shuffles[i] = lp_build_const_int32(gallivm, i); } for (i = 0; i < num_vectors; i++) { tmp[i] = LLVMBuildShuffleVector(gallivm->builder, tmp[i*2], tmp[i*2 + 1], LLVMConstVector(shuffles, new_length), ""); } } return tmp[0]; }
static struct pipe_resource * galahad_screen_resource_create(struct pipe_screen *_screen, const struct pipe_resource *templat) { struct galahad_screen *glhd_screen = galahad_screen(_screen); struct pipe_screen *screen = glhd_screen->screen; struct pipe_resource *result; if (templat->target >= PIPE_MAX_TEXTURE_TYPES) glhd_warn("Received bogus resource target %d", templat->target); if(templat->target != PIPE_TEXTURE_RECT && templat->target != PIPE_BUFFER && !screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { if(!util_is_power_of_two(templat->width0) || !util_is_power_of_two(templat->height0)) glhd_warn("Requested NPOT (%ux%u) non-rectangle texture without NPOT support", templat->width0, templat->height0); } if(templat->target == PIPE_TEXTURE_RECT && templat->last_level) glhd_warn("Rectangle textures cannot have mipmaps, but last_level = %u", templat->last_level); if(templat->target == PIPE_BUFFER && templat->last_level) glhd_warn("Buffers cannot have mipmaps, but last_level = %u", templat->last_level); if(templat->target != PIPE_TEXTURE_3D && templat->depth0 != 1) glhd_warn("Only 3D textures can have depth != 1, but received target %u and depth %u", templat->target, templat->depth0); if(templat->target == PIPE_TEXTURE_1D && templat->height0 != 1) glhd_warn("1D textures must have height 1 but got asked for height %u", templat->height0); if(templat->target == PIPE_BUFFER && templat->height0 != 1) glhd_warn("Buffers must have height 1 but got asked for height %u", templat->height0); if(templat->target == PIPE_TEXTURE_CUBE && templat->width0 != templat->height0) glhd_warn("Cube maps must be square, but got asked for %ux%u", templat->width0, templat->height0); result = screen->resource_create(screen, templat); if (result) return galahad_resource_create(glhd_screen, result); return NULL; }
/** * The texutre is for transfer only. We can define our own layout to save * space. */ static void layout_init_for_transfer(struct ilo_layout *layout, const struct ilo_dev_info *dev, const struct pipe_resource *templ) { const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? templ->depth0 : templ->array_size; unsigned layer_width, layer_height; assert(templ->last_level == 0); assert(templ->nr_samples <= 1); layout->aux = ILO_LAYOUT_AUX_NONE; layout->width0 = templ->width0; layout->height0 = templ->height0; layout->format = templ->format; layout->block_width = util_format_get_blockwidth(templ->format); layout->block_height = util_format_get_blockheight(templ->format); layout->block_size = util_format_get_blocksize(templ->format); layout->walk = ILO_LAYOUT_WALK_LOD; layout->valid_tilings = LAYOUT_TILING_NONE; layout->tiling = INTEL_TILING_NONE; layout->align_i = layout->block_width; layout->align_j = layout->block_height; assert(util_is_power_of_two(layout->block_width) && util_is_power_of_two(layout->block_height)); /* use packed layout */ layer_width = align(templ->width0, layout->align_i); layer_height = align(templ->height0, layout->align_j); layout->lods[0].slice_width = layer_width; layout->lods[0].slice_height = layer_height; layout->bo_stride = (layer_width / layout->block_width) * layout->block_size; layout->bo_stride = align(layout->bo_stride, 64); layout->bo_height = (layer_height / layout->block_height) * num_layers; }
bool ImmediateValue::isPow2() const { switch (reg.type) { case TYPE_U8: case TYPE_U16: case TYPE_U32: return util_is_power_of_two(reg.data.u32); default: return false; } }
static bool image_get_gen6_layout(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { ILO_DEV_ASSERT(dev, 6, 8); if (!image_validate_gen6(dev, info)) return false; if (image_bind_gpu(info) || info->level_count > 1) { if (!image_init_gen6_hardware_layout(dev, info, layout)) return false; } else { if (!image_init_gen6_transfer_layout(dev, info, layout)) return false; } /* * the fact that align i and j are multiples of block width and height * respectively is what makes the size of the bo a multiple of the block * size, slices start at block boundaries, and many of the computations * work. */ assert(layout->align_i % info->block_width == 0); assert(layout->align_j % info->block_height == 0); /* make sure align() works */ assert(util_is_power_of_two(layout->align_i) && util_is_power_of_two(layout->align_j)); assert(util_is_power_of_two(info->block_width) && util_is_power_of_two(info->block_height)); image_get_gen6_lods(dev, info, layout); assert(layout->walk_layer_height % info->block_height == 0); assert(layout->monolithic_width % info->block_width == 0); assert(layout->monolithic_height % info->block_height == 0); return true; }
static void anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, VkFormat format, VkFormatProperties *out_properties) { int gen = physical_device->info->gen * 10; if (physical_device->info->is_haswell) gen += 5; VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; if (anv_format_is_depth_or_stencil(&anv_formats[format])) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; if (physical_device->info->gen >= 8) tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; } else { enum isl_format linear_fmt, tiled_fmt; struct anv_format_swizzle linear_swizzle, tiled_swizzle; linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_TILING_LINEAR, &linear_swizzle); tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); linear = get_image_format_properties(gen, linear_fmt, linear_fmt, linear_swizzle); tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, tiled_swizzle); buffer = get_buffer_format_properties(gen, linear_fmt); /* XXX: We handle 3-channel formats by switching them out for RGBX or * RGBA formats behind-the-scenes. This works fine for textures * because the upload process will fill in the extra channel. * We could also support it for render targets, but it will take * substantially more work and we have enough RGBX formats to handle * what most clients will want. */ if (linear_fmt != ISL_FORMAT_UNSUPPORTED && !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & ~VK_FORMAT_FEATURE_BLIT_DST_BIT; } } out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; out_properties->bufferFeatures = buffer; return; }
/** * Exactly one bit must be set in \a aspect. */ enum isl_format anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, VkImageTiling tiling, struct anv_format_swizzle *swizzle) { const struct anv_format *anv_fmt = &anv_formats[format]; if (swizzle) *swizzle = anv_fmt->swizzle; switch (aspect) { case VK_IMAGE_ASPECT_COLOR_BIT: if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { return ISL_FORMAT_UNSUPPORTED; } else if (tiling == VK_IMAGE_TILING_OPTIMAL && !util_is_power_of_two(anv_fmt->isl_layout->bs)) { /* Tiled formats *must* be power-of-two because we need up upload * them with the render pipeline. For 3-channel formats, we fix * this by switching them over to RGBX or RGBA formats under the * hood. */ enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); if (rgbx != ISL_FORMAT_UNSUPPORTED) return rgbx; else return isl_format_rgb_to_rgba(anv_fmt->isl_format); } else { return anv_fmt->isl_format; } case VK_IMAGE_ASPECT_DEPTH_BIT: case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): assert(anv_fmt->has_depth); return anv_fmt->isl_format; case VK_IMAGE_ASPECT_STENCIL_BIT: assert(anv_fmt->has_stencil); return ISL_FORMAT_R8_UINT; default: unreachable("bad VkImageAspect"); return ISL_FORMAT_UNSUPPORTED; } }
struct util_ringbuffer *util_ringbuffer_create( unsigned dwords ) { struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer); if (ring == NULL) return NULL; assert(util_is_power_of_two(dwords)); ring->buf = MALLOC( dwords * sizeof(unsigned) ); if (ring->buf == NULL) goto fail; ring->mask = dwords - 1; pipe_condvar_init(ring->change); pipe_mutex_init(ring->mutex); return ring; fail: FREE(ring->buf); FREE(ring); return NULL; }
static void layout_init_alignments(struct ilo_layout *layout, struct ilo_layout_params *params) { const struct pipe_resource *templ = params->templ; /* * From the Sandy Bridge PRM, volume 1 part 1, page 113: * * "surface format align_i align_j * YUV 4:2:2 formats 4 *see below * BC1-5 4 4 * FXT1 8 4 * all other formats 4 *see below" * * "- align_j = 4 for any depth buffer * - align_j = 2 for separate stencil buffer * - align_j = 4 for any render target surface is multisampled (4x) * - align_j = 4 for any render target surface with Surface Vertical * Alignment = VALIGN_4 * - align_j = 2 for any render target surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 2 for all other render target surface * - align_j = 2 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 4 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_4" * * From the Sandy Bridge PRM, volume 4 part 1, page 86: * * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if * the Surface Format is 96 bits per element (BPE)." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_S8_UINT 4 2 * other depth/stencil formats 4 4 * 4x multisampled 4 4 * bpp 96 4 2 * others 4 2 or 4 */ /* * From the Ivy Bridge PRM, volume 1 part 1, page 110: * * "surface defined by surface format align_i align_j * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 * not D16_UNORM 4 4 * 3DSTATE_STENCIL_BUFFER N/A 8 8 * SURFACE_STATE BC*, ETC*, EAC* 4 4 * FXT1 8 4 * all others (set by SURFACE_STATE)" * * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "- This field (Surface Vertical Aligment) is intended to be set to * VALIGN_4 if the surface was rendered as a depth buffer, for a * multisampled (4x) render target, or for a multisampled (8x) * render target, since these surfaces support only alignment of 4. * - Use of VALIGN_4 for other surfaces is supported, but uses more * memory. * - This field must be set to VALIGN_4 for all tiled Y Render Target * surfaces. * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field * must be set to VALIGN_4." * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." * * "- This field (Surface Horizontal Aligment) is intended to be set to * HALIGN_8 only if the surface was rendered as a depth buffer with * Z16 format or a stencil buffer, since these surfaces support only * alignment of 8. * - Use of HALIGN_8 for other surfaces is supported, but uses more * memory. * - This field must be set to HALIGN_4 if the Surface Format is BC*. * - This field must be set to HALIGN_8 if the Surface Format is * FXT1." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * PIPE_FORMAT_Z16_UNORM 8 4 * PIPE_FORMAT_S8_UINT 8 8 * other depth/stencil formats 4 4 * 2x or 4x multisampled 4 or 8 4 * tiled Y 4 or 8 4 (if rt) * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 * others 4 or 8 2 or 4 */ if (params->compressed) { /* this happens to be the case */ layout->align_i = layout->block_width; layout->align_j = layout->block_height; } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { switch (layout->format) { case PIPE_FORMAT_Z16_UNORM: layout->align_i = 8; layout->align_j = 4; break; case PIPE_FORMAT_S8_UINT: layout->align_i = 8; layout->align_j = 8; break; default: layout->align_i = 4; layout->align_j = 4; break; } } else { switch (layout->format) { case PIPE_FORMAT_S8_UINT: layout->align_i = 4; layout->align_j = 2; break; default: layout->align_i = 4; layout->align_j = 4; break; } } } else { const bool valign_4 = (templ->nr_samples > 1) || (ilo_dev_gen(params->dev) >= ILO_GEN(7) && layout->tiling == INTEL_TILING_Y && (templ->bind & PIPE_BIND_RENDER_TARGET)); if (valign_4) assert(layout->block_size != 12); layout->align_i = 4; layout->align_j = (valign_4) ? 4 : 2; } /* * the fact that align i and j are multiples of block width and height * respectively is what makes the size of the bo a multiple of the block * size, slices start at block boundaries, and many of the computations * work. */ assert(layout->align_i % layout->block_width == 0); assert(layout->align_j % layout->block_height == 0); /* make sure align() works */ assert(util_is_power_of_two(layout->align_i) && util_is_power_of_two(layout->align_j)); assert(util_is_power_of_two(layout->block_width) && util_is_power_of_two(layout->block_height)); }
struct pipe_resource * nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv30_miptree *mt = CALLOC_STRUCT(nv30_miptree); struct pipe_resource *pt = &mt->base.base; unsigned blocksz, size; unsigned w, h, d, l; int ret; switch (tmpl->nr_samples) { case 4: mt->ms_mode = 0x00004000; mt->ms_x = 1; mt->ms_y = 1; break; case 2: mt->ms_mode = 0x00003000; mt->ms_x = 1; mt->ms_y = 0; break; default: mt->ms_mode = 0x00000000; mt->ms_x = 0; mt->ms_y = 0; break; } mt->base.vtbl = &nv30_miptree_vtbl; *pt = *tmpl; pipe_reference_init(&pt->reference, 1); pt->screen = pscreen; w = pt->width0 << mt->ms_x; h = pt->height0 << mt->ms_y; d = (pt->target == PIPE_TEXTURE_3D) ? pt->depth0 : 1; blocksz = util_format_get_blocksize(pt->format); if ((pt->target == PIPE_TEXTURE_RECT) || !util_is_power_of_two(pt->width0) || !util_is_power_of_two(pt->height0) || !util_is_power_of_two(pt->depth0) || util_format_is_compressed(pt->format) || util_format_is_float(pt->format) || mt->ms_mode) { mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz; mt->uniform_pitch = align(mt->uniform_pitch, 64); } if (!mt->uniform_pitch) mt->swizzled = TRUE; size = 0; for (l = 0; l <= pt->last_level; l++) { struct nv30_miptree_level *lvl = &mt->level[l]; unsigned nbx = util_format_get_nblocksx(pt->format, w); unsigned nby = util_format_get_nblocksx(pt->format, h); lvl->offset = size; lvl->pitch = mt->uniform_pitch; if (!lvl->pitch) lvl->pitch = nbx * blocksz; lvl->zslice_size = lvl->pitch * nby; size += lvl->zslice_size * d; w = u_minify(w, 1); h = u_minify(h, 1); d = u_minify(d, 1); } mt->layer_size = size; if (pt->target == PIPE_TEXTURE_CUBE) { if (!mt->uniform_pitch) mt->layer_size = align(mt->layer_size, 128); size = mt->layer_size * 6; } ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 256, size, NULL, &mt->base.bo); if (ret) { FREE(mt); return NULL; } mt->base.domain = NOUVEAU_BO_VRAM; return &mt->base.base; }
static struct pipe_sampler_view * etna_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *so) { struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view); struct etna_resource *res = etna_resource(prsc); struct etna_context *ctx = etna_context(pctx); const uint32_t format = translate_texture_format(so->format); const bool ext = !!(format & EXT_FORMAT); const uint32_t swiz = get_texture_swiz(so->format, so->swizzle_r, so->swizzle_g, so->swizzle_b, so->swizzle_a); if (!sv) return NULL; if (!etna_resource_sampler_compatible(res)) { /* The original resource is not compatible with the sampler. * Allocate an appropriately tiled texture. */ if (!res->texture) { struct pipe_resource templat = *prsc; templat.bind &= ~(PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET | PIPE_BIND_BLENDABLE); res->texture = etna_resource_alloc(pctx->screen, ETNA_LAYOUT_TILED, DRM_FORMAT_MOD_LINEAR, &templat); } if (!res->texture) { free(sv); return NULL; } res = etna_resource(res->texture); } sv->base = *so; pipe_reference_init(&sv->base.reference, 1); sv->base.texture = NULL; pipe_resource_reference(&sv->base.texture, prsc); sv->base.context = pctx; /* merged with sampler state */ sv->TE_SAMPLER_CONFIG0 = COND(!ext, VIVS_TE_SAMPLER_CONFIG0_FORMAT(format)); sv->TE_SAMPLER_CONFIG0_MASK = 0xffffffff; switch (sv->base.target) { case PIPE_TEXTURE_1D: /* For 1D textures, we will have a height of 1, so we can use 2D * but set T wrap to repeat */ sv->TE_SAMPLER_CONFIG0_MASK = ~VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK; sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_REPEAT); /* fallthrough */ case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D); break; case PIPE_TEXTURE_CUBE: sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_CUBE_MAP); break; default: BUG("Unhandled texture target"); free(sv); return NULL; } sv->TE_SAMPLER_CONFIG1 = COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) | VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz; sv->TE_SAMPLER_SIZE = VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0) | VIVS_TE_SAMPLER_SIZE_HEIGHT(res->base.height0); sv->TE_SAMPLER_LOG_SIZE = VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) | VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(res->base.height0)); /* Set up levels-of-detail */ for (int lod = 0; lod <= res->base.last_level; ++lod) { sv->TE_SAMPLER_LOD_ADDR[lod].bo = res->bo; sv->TE_SAMPLER_LOD_ADDR[lod].offset = res->levels[lod].offset; sv->TE_SAMPLER_LOD_ADDR[lod].flags = ETNA_RELOC_READ; } sv->min_lod = sv->base.u.tex.first_level << 5; sv->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5; /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is * supported when the appropriate capability is not set. */ if (!ctx->specs.npot_tex_any_wrap && (!util_is_power_of_two(res->base.width0) || !util_is_power_of_two(res->base.height0))) { sv->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK | VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK); sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE); } return &sv->base; }
/** * This uses a blit to copy the read buffer to a texture format which matches * the format and type combo and then a fast read-back is done using memcpy. * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is * a format which matches the swizzling. * * If such a format isn't available, we fall back to _mesa_readpixels. * * NOTE: Some drivers use a blit to convert between tiled and linear * texture layouts during texture uploads/downloads, so the blit * we do here should be free in such cases. */ static void st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid *pixels) { struct st_context *st = st_context(ctx); struct gl_renderbuffer *rb = _mesa_get_read_renderbuffer_for_format(ctx, format); struct st_renderbuffer *strb = st_renderbuffer(rb); struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct pipe_resource *src; struct pipe_resource *dst = NULL; struct pipe_resource dst_templ; enum pipe_format dst_format, src_format; struct pipe_blit_info blit; unsigned bind = PIPE_BIND_TRANSFER_READ; struct pipe_transfer *tex_xfer; ubyte *map = NULL; /* Validate state (to be sure we have up-to-date framebuffer surfaces) * and flush the bitmap cache prior to reading. */ st_validate_state(st); st_flush_bitmap_cache(st); if (!st->prefer_blit_based_texture_transfer) { goto fallback; } /* This must be done after state validation. */ src = strb->texture; /* XXX Fallback for depth-stencil formats due to an incomplete * stencil blit implementation in some drivers. */ if (format == GL_DEPTH_STENCIL) { goto fallback; } /* We are creating a texture of the size of the region being read back. * Need to check for NPOT texture support. */ if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) && (!util_is_power_of_two(width) || !util_is_power_of_two(height))) { goto fallback; } /* If the base internal format and the texture format don't match, we have * to use the slow path. */ if (rb->_BaseFormat != _mesa_get_format_base_format(rb->Format)) { goto fallback; } /* See if the texture format already matches the format and type, * in which case the memcpy-based fast path will likely be used and * we don't have to blit. */ if (_mesa_format_matches_format_and_type(rb->Format, format, type, pack->SwapBytes)) { goto fallback; } if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) { goto fallback; } /* Convert the source format to what is expected by ReadPixels * and see if it's supported. */ src_format = util_format_linear(src->format); src_format = util_format_luminance_to_red(src_format); src_format = util_format_intensity_to_red(src_format); if (!src_format || !screen->is_format_supported(screen, src_format, src->target, src->nr_samples, PIPE_BIND_SAMPLER_VIEW)) { goto fallback; } if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) bind |= PIPE_BIND_DEPTH_STENCIL; else bind |= PIPE_BIND_RENDER_TARGET; /* Choose the destination format by finding the best match * for the format+type combo. */ dst_format = st_choose_matching_format(screen, bind, format, type, pack->SwapBytes); if (dst_format == PIPE_FORMAT_NONE) { goto fallback; } /* create the destination texture */ memset(&dst_templ, 0, sizeof(dst_templ)); dst_templ.target = PIPE_TEXTURE_2D; dst_templ.format = dst_format; dst_templ.bind = bind; dst_templ.usage = PIPE_USAGE_STAGING; st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1, &dst_templ.width0, &dst_templ.height0, &dst_templ.depth0, &dst_templ.array_size); dst = screen->resource_create(screen, &dst_templ); if (!dst) { goto fallback; } memset(&blit, 0, sizeof(blit)); blit.src.resource = src; blit.src.level = strb->surface->u.tex.level; blit.src.format = src_format; blit.dst.resource = dst; blit.dst.level = 0; blit.dst.format = dst->format; blit.src.box.x = x; blit.dst.box.x = 0; blit.src.box.y = y; blit.dst.box.y = 0; blit.src.box.z = strb->surface->u.tex.first_layer; blit.dst.box.z = 0; blit.src.box.width = blit.dst.box.width = width; blit.src.box.height = blit.dst.box.height = height; blit.src.box.depth = blit.dst.box.depth = 1; blit.mask = st_get_blit_mask(rb->_BaseFormat, format); blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { blit.src.box.y = rb->Height - blit.src.box.y; blit.src.box.height = -blit.src.box.height; } /* blit */ st->pipe->blit(st->pipe, &blit); /* map resources */ pixels = _mesa_map_pbo_dest(ctx, pack, pixels); map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ, 0, 0, 0, width, height, 1, &tex_xfer); if (!map) { _mesa_unmap_pbo_dest(ctx, pack); pipe_resource_reference(&dst, NULL); goto fallback; } /* memcpy data into a user buffer */ { const uint bytesPerRow = width * util_format_get_blocksize(dst_format); GLuint row; for (row = 0; row < (unsigned) height; row++) { GLvoid *dest = _mesa_image_address3d(pack, pixels, width, height, format, type, 0, row, 0); memcpy(dest, map, bytesPerRow); map += tex_xfer->stride; } } pipe_transfer_unmap(pipe, tex_xfer); _mesa_unmap_pbo_dest(ctx, pack); pipe_resource_reference(&dst, NULL); return; fallback: _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); }
/** * Initialize lp_sampler_static_state object with the gallium sampler * and texture state. * The former is considered to be static and the later dynamic. */ void lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_view *view, const struct pipe_sampler_state *sampler) { const struct pipe_resource *texture = view->texture; memset(state, 0, sizeof *state); if(!texture) return; if(!sampler) return; /* * We don't copy sampler state over unless it is actually enabled, to avoid * spurious recompiles, as the sampler static state is part of the shader * key. * * Ideally the state tracker or cso_cache module would make all state * canonical, but until that happens it's better to be safe than sorry here. * * XXX: Actually there's much more than can be done here, especially * regarding 1D/2D/3D/CUBE textures, wrap modes, etc. */ state->format = view->format; state->swizzle_r = view->swizzle_r; state->swizzle_g = view->swizzle_g; state->swizzle_b = view->swizzle_b; state->swizzle_a = view->swizzle_a; state->target = texture->target; state->pot_width = util_is_power_of_two(texture->width0); state->pot_height = util_is_power_of_two(texture->height0); state->pot_depth = util_is_power_of_two(texture->depth0); state->wrap_s = sampler->wrap_s; state->wrap_t = sampler->wrap_t; state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; state->mag_img_filter = sampler->mag_img_filter; if (view->u.tex.last_level && sampler->max_lod > 0.0f) { state->min_mip_filter = sampler->min_mip_filter; } else { state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; } if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { if (sampler->lod_bias != 0.0f) { state->lod_bias_non_zero = 1; } /* If min_lod == max_lod we can greatly simplify mipmap selection. * This is a case that occurs during automatic mipmap generation. */ if (sampler->min_lod == sampler->max_lod) { state->min_max_lod_equal = 1; } else { if (sampler->min_lod > 0.0f) { state->apply_min_lod = 1; } if (sampler->max_lod < (float)view->u.tex.last_level) { state->apply_max_lod = 1; } } } state->compare_mode = sampler->compare_mode; if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; /* * FIXME: Handle the remainder of pipe_sampler_view. */ }
/** * Check alignment. * * It is important that this check is not implemented as a macro or inlined * function, as the compiler assumptions in respect to alignment of global * and stack variables would often make the check a no op, defeating the * whole purpose of the exercise. */ extern "C" boolean lp_check_alignment(const void *ptr, unsigned alignment) { assert(util_is_power_of_two(alignment)); return ((uintptr_t)ptr & (alignment - 1)) == 0; }
static bool ps_get_gen6_ff_kernels(const struct ilo_dev *dev, const struct ilo_state_ps_info *info, struct pixel_ff *ff) { const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; uint32_t scratch_size; ILO_DEV_ASSERT(dev, 6, 8); ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info); /* initialize kernel offsets and GRF starts */ if (util_is_power_of_two(ff->dispatch_modes)) { if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) { ff->kernel_offsets[0] = kernel_8->offset; ff->grf_starts[0] = kernel_8->grf_start; } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) { ff->kernel_offsets[0] = kernel_16->offset; ff->grf_starts[0] = kernel_16->grf_start; } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) { ff->kernel_offsets[0] = kernel_32->offset; ff->grf_starts[0] = kernel_32->grf_start; } } else { ff->kernel_offsets[0] = kernel_8->offset; ff->kernel_offsets[1] = kernel_32->offset; ff->kernel_offsets[2] = kernel_16->offset; ff->grf_starts[0] = kernel_8->grf_start; ff->grf_starts[1] = kernel_32->grf_start; ff->grf_starts[2] = kernel_16->grf_start; } /* we do not want to save it */ assert(ff->kernel_offsets[0] == 0); ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && kernel_8->pcb_attr_count) || ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && kernel_16->pcb_attr_count) || ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && kernel_32->pcb_attr_count)); scratch_size = 0; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && scratch_size < kernel_8->scratch_size) scratch_size = kernel_8->scratch_size; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && scratch_size < kernel_16->scratch_size) scratch_size = kernel_16->scratch_size; if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && scratch_size < kernel_32->scratch_size) scratch_size = kernel_32->scratch_size; /* next power of two, starting from 1KB */ ff->scratch_space = (scratch_size > 1024) ? (util_last_bit(scratch_size - 1) - 10): 0; /* GPU hangs on Haswell if none of the dispatch mode bits is set */ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) ff->dispatch_modes |= GEN6_PS_DISPATCH_8; return true; }
/** * Gather one element from scatter positions in memory. * Nearly the same as above, however the individual elements * may be vectors themselves, and fetches may be float type. * Can also do pad vector instead of ZExt. * * @sa lp_build_gather() */ static LLVMValueRef lp_build_gather_elem_vec(struct gallivm_state *gallivm, unsigned length, unsigned src_width, LLVMTypeRef src_type, struct lp_type dst_type, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMValueRef ptr, res; LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_type.width * dst_type.length); if (src_width < dst_type.width * dst_type.length) { if (dst_type.length > 1) { res = lp_build_pad_vector(gallivm, res, dst_type.length); /* * vector_justify hopefully a non-issue since we only deal * with src_width >= 32 here? */ } else { LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); /* * Only valid if src_ptr_type is int type... */ res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); #ifdef PIPE_ARCH_BIG_ENDIAN if (vector_justify) { res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_type.width - src_width, 0), ""); } if (src_width == 48) { /* Load 3x16 bit vector. * The sequence of loads on big-endian hardware proceeds as follows. * 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence * of three fields appears in the order X, Y, Z. * * Load 32-bit word: 0.0.X.Y * Load 16-bit halfword: 0.0.0.Z * Rotate left: 0.X.Y.0 * Bitwise OR: 0.X.Y.Z * * The order in which we need the fields in the result is 0.Z.Y.X, * the same as on little-endian; permute 16-bit fields accordingly * within 64-bit register: */ LLVMValueRef shuffles[4] = { lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, 3), }; res = LLVMBuildBitCast(gallivm->builder, res, lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), ""); res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), ""); res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, ""); } #endif } } return res; }
/** * Gather one element from scatter positions in memory. * * @sa lp_build_gather() */ LLVMValueRef lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, boolean vector_justify) { LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMValueRef ptr; LLVMValueRef res; assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); /* XXX * On some archs we probably really want to avoid having to deal * with alignments lower than 4 bytes (if fetch size is a power of * two >= 32). On x86 it doesn't matter, however. * We should be able to guarantee full alignment for any kind of texture * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends * but I don't think that's quite what we wanted). * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT * looks like a good fit, but it seems this cap bit (and OpenGL) aren't * enforcing what we want (which is what d3d10 does, the offset needs to * be aligned to element size, but GL has bytes regardless of element * size which would only leave us with minimum alignment restriction of 16 * which doesn't make much sense if the type isn't 4x32bit). Due to * translation of offsets to first_elem in sampler_views it actually seems * gallium could not do anything else except 16 no matter what... */ if (!aligned) { LLVMSetAlignment(res, 1); } else if (!util_is_power_of_two(src_width)) { /* * Full alignment is impossible, assume the caller really meant * the individual elements were aligned (e.g. 3x32bit format). * And yes the generated code may otherwise crash, llvm will * really assume 128bit alignment with a 96bit fetch (I suppose * that makes sense as it can just assume the upper 32bit to be * whatever). * Maybe the caller should be able to explicitly set this, but * this should cover all the 3-channel formats. */ if (((src_width / 24) * 24 == src_width) && util_is_power_of_two(src_width / 24)) { LLVMSetAlignment(res, src_width / 24); } else { LLVMSetAlignment(res, 1); } } assert(src_width <= dst_width); if (src_width < dst_width) { res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); if (vector_justify) { #ifdef PIPE_ARCH_BIG_ENDIAN res = LLVMBuildShl(gallivm->builder, res, LLVMConstInt(dst_elem_type, dst_width - src_width, 0), ""); #endif } } return res; }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
static void r300_setup_flags(struct r300_texture* tex) { tex->is_npot = !util_is_power_of_two(tex->tex.width0) || !util_is_power_of_two(tex->tex.height0); }