void vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { const struct brw_tes_prog_data *tes_prog_data = (const struct brw_tes_prog_data *) prog_data; switch (instr->intrinsic) { case nir_intrinsic_load_tess_coord: /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(brw_vec8_grf(1, 0)))); break; case nir_intrinsic_load_tess_level_outer: if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_ZWZW))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } break; case nir_intrinsic_load_tess_level_inner: if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), BRW_SWIZZLE_WZYX))); } else { emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(ATTR, 1, glsl_type::float_type))); } break; case nir_intrinsic_load_primitive_id: emit(TES_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; src_reg header = input_read_header; bool is_64bit = nir_dest_bit_size(instr->dest) == 64; unsigned first_component = nir_intrinsic_component(instr); if (is_64bit) first_component /= 2; if (indirect_offset.file != BAD_FILE) { header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), input_read_header, indirect_offset); } else { /* Arbitrarily only push up to 24 vec4 slots worth of data, * which is 12 registers (since each holds 2 vec4 slots). */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { const glsl_type *src_glsl_type = is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); const brw_reg_type dst_reg_type = is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); break; } } if (!is_64bit) { dst_reg temp(this, glsl_type::ivec4_type); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; src_reg src = src_reg(temp); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); /* Copy to target. We might end up with some funky writemasks landing * in here, but we really don't want them in the above pseudo-ops. */ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src)); } else { /* For 64-bit we need to load twice as many 32-bit components, and for * dvec3/4 we need to emit 2 URB Read messages */ dst_reg temp(this, glsl_type::dvec4_type); dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); read->offset = imm_offset; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; if (instr->num_components > 2) { read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), src_reg(header)); read->offset = imm_offset + 1; read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; } src_reg temp_as_src = src_reg(temp); temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); dst_reg shuffled(this, glsl_type::dvec4_type); shuffle_64bit_data(shuffled, temp_as_src, false); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); dst.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dst, src_reg(shuffled))); } break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
static void merge_edgeflags( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, tmp0, brw_imm_ud(_3DPRIM_POLYGON)); /* Get away with using reg.vertex because we know that this is not * a _3DPRIM_TRISTRIP_REVERSE: */ brw_IF(p, BRW_EXECUTE_1); { brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); brw_MOV(p, byte_offset(c->reg.vertex[0], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); brw_MOV(p, byte_offset(c->reg.vertex[2], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ENDIF(p); }
static void copy_bfc( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; GLuint conditional; /* Do we have any colors to copy? */ if (!(brw_clip_have_varying(c, VARYING_SLOT_COL0) && brw_clip_have_varying(c, VARYING_SLOT_BFC0)) && !(brw_clip_have_varying(c, VARYING_SLOT_COL1) && brw_clip_have_varying(c, VARYING_SLOT_BFC1))) return; /* In some wierd degnerate cases we can end up testing the * direction twice, once for culling and once for bfc copying. Oh * well, that's what you get for setting wierd GL state. */ if (c->key.copy_bfc_ccw) conditional = BRW_CONDITIONAL_GE; else conditional = BRW_CONDITIONAL_L; brw_CMP(p, vec1(brw_null_reg()), conditional, get_element(c->reg.dir, 2), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { GLuint i; for (i = 0; i < 3; i++) { if (brw_clip_have_varying(c, VARYING_SLOT_COL0) && brw_clip_have_varying(c, VARYING_SLOT_BFC0)) brw_MOV(p, byte_offset(c->reg.vertex[i], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL0)), byte_offset(c->reg.vertex[i], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC0))); if (brw_clip_have_varying(c, VARYING_SLOT_COL1) && brw_clip_have_varying(c, VARYING_SLOT_BFC1)) brw_MOV(p, byte_offset(c->reg.vertex[i], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL1)), byte_offset(c->reg.vertex[i], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC1))); } } brw_ENDIF(p); }
/* Distribute flatshaded attributes from provoking vertex prior to * clipping. */ void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c, GLuint to, GLuint from ) { struct brw_codegen *p = &c->func; for (int i = 0; i < c->vue_map.num_slots; i++) { if (c->key.interpolation_mode.mode[i] == INTERP_QUALIFIER_FLAT) { brw_MOV(p, byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)), byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i))); } } }
/* If flatshading, distribute color from provoking vertex prior to * clipping. */ void brw_clip_copy_colors( struct brw_clip_compile *c, unsigned to, unsigned from ) { #if 0 struct brw_compile *p = &c->func; if (c->offset[VERT_RESULT_COL0]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); if (c->offset[VERT_RESULT_COL1]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); if (c->offset[VERT_RESULT_BFC0]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); if (c->offset[VERT_RESULT_BFC1]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); #else #warning "disabled" #endif }
/* If flatshading, distribute color from provoking vertex prior to * clipping. */ void brw_clip_copy_colors( struct brw_clip_compile *c, GLuint to, GLuint from ) { struct brw_compile *p = &c->func; if (c->offset[VERT_RESULT_COL0]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); if (c->offset[VERT_RESULT_COL1]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); if (c->offset[VERT_RESULT_BFC0]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); if (c->offset[VERT_RESULT_BFC1]) brw_MOV(p, byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); }
void brw_copy_from_indirect(struct brw_compile *p, struct brw_reg dst, struct brw_indirect ptr, GLuint count) { GLuint i; dst = vec4(dst); for (i = 0; i < count; i++) { GLuint delta = i*32; brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); } }
void brw_copy8(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, GLuint count) { GLuint i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { GLuint delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
std::pair<bool, bool> MemoryType::checkAndMergeMemType(Location addr, const RsType* type) { const size_t ofs = byte_offset(startAddress, addr, blockSize(), 0); return checkAndMergeMemType(ofs, type); }
void brw_copy8(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src, unsigned count) { unsigned i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { unsigned delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
void brw_copy_from_indirect(struct brw_codegen *p, struct brw_reg dst, struct brw_indirect ptr, unsigned count) { unsigned i; dst = vec4(dst); for (i = 0; i < count; i++) { unsigned delta = i*32; brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); } }
/* This is performed against the original triangles, so no indirection * required: BZZZT! */ static void compute_tri_direction( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map, VARYING_SLOT_POS); struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset); struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset); struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset); struct brw_reg v0n = get_tmp(c); struct brw_reg v1n = get_tmp(c); struct brw_reg v2n = get_tmp(c); /* Convert to NDC. * NOTE: We can't modify the original vertex coordinates, * as it may impact further operations. * So, we have to keep normalized coordinates in temp registers. * * TBD-KC * Try to optimize unnecessary MOV's. */ brw_MOV(p, v0n, v0); brw_MOV(p, v1n, v1); brw_MOV(p, v2n, v2); brw_clip_project_position(c, v0n); brw_clip_project_position(c, v1n); brw_clip_project_position(c, v2n); /* Calculate the vectors of two edges of the triangle: */ brw_ADD(p, e, v0n, negate(v2n)); brw_ADD(p, f, v1n, negate(v2n)); /* Take their crossproduct: */ brw_set_access_mode(p, BRW_ALIGN_16); brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); }
void brw_copy4(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, unsigned count) { unsigned i; dst = vec4(dst); src = vec4(src); for (i = 0; i < count; i++) { unsigned delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); } }
static void copy_bfc( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_instruction *ccw; GLuint conditional; /* Do we have any colors to copy? */ if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) return; /* In some wierd degnerate cases we can end up testing the * direction twice, once for culling and once for bfc copying. Oh * well, that's what you get for setting wierd GL state. */ if (c->key.copy_bfc_ccw) conditional = BRW_CONDITIONAL_GE; else conditional = BRW_CONDITIONAL_L; brw_CMP(p, vec1(brw_null_reg()), conditional, get_element(c->reg.dir, 2), brw_imm_f(0)); ccw = brw_IF(p, BRW_EXECUTE_1); { GLuint i; for (i = 0; i < 3; i++) { if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) brw_MOV(p, byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) brw_MOV(p, byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); } } brw_ENDIF(p, ccw); }
/// @copydoc block_partitioning::bytes_used(uint32_t) const uint32_t bytes_used(uint32_t block_id) const { assert(block_id < m_total_blocks); uint32_t offset = byte_offset(block_id); assert(offset < m_object_size); uint32_t remaining = m_object_size - offset; uint32_t the_block_size = block_size(block_id); return std::min(remaining, the_block_size); }
bool MemoryType::isInitialized(Location addr, size_t len) const { const size_t offset = byte_offset(startAddress, addr, blockSize(), 0); const size_t limit = offset + len; for (size_t i = offset; i < limit; ++i) { if ( !byteInitialized(i) ) return false; } return true; }
static int pack_double (grib_accessor* a, const double* val, size_t *len) { grib_accessor_ibmfloat* self = (grib_accessor_ibmfloat*)a; int ret = 0; unsigned long i = 0; unsigned long rlen = *len; size_t buflen = 0; unsigned char *buf = NULL; long off = 0; if(*len < 1) { grib_context_log(a->parent->h->context, GRIB_LOG_ERROR, " wrong size for %s it pack at least 1 values ", a->name , rlen ); *len = 0; return GRIB_ARRAY_TOO_SMALL; } if (rlen == 1){ /* double x = 0; grib_nearest_smaller_ibm_float(val[0],&x); double y = grib_long_to_ibm(grib_ibm_to_long(val[0])); printf("IBMFLOAT val=%.20f nearest_smaller_ibm_float=%.20f long_to_ibm=%.20f\n",val[0],x ,y); */ off = byte_offset(a)*8; ret = grib_encode_unsigned_long(a->parent->h->buffer->data,grib_ibm_to_long(val[0]), &off, 32); if (*len > 1) grib_context_log(a->parent->h->context, GRIB_LOG_WARNING, "grib_accessor_unsigned : Trying to pack %d values in a scalar %s, packing first value", *len, a->name ); if (ret == GRIB_SUCCESS) len[0] = 1; return ret; } buflen = rlen*4; buf = (unsigned char*)grib_context_malloc(a->parent->h->context,buflen); for(i=0; i < rlen;i++){ grib_encode_unsigned_longb(buf,grib_ibm_to_long(val[i]), &off, 32); } ret = grib_set_long_internal(a->parent->h,grib_arguments_get_name(a->parent->h,self->arg,0),rlen); if(ret == GRIB_SUCCESS) grib_buffer_replace(a, buf, buflen,1,1); else *len = 0; grib_context_free(a->parent->h->context,buf); a->length = byte_count(a); return ret; }
/* This is performed against the original triangles, so no indirection * required: BZZZT! */ static void compute_tri_direction( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); /* Calculate the vectors of two edges of the triangle: */ brw_ADD(p, e, v0, negate(v2)); brw_ADD(p, f, v1, negate(v2)); /* Take their crossproduct: */ brw_set_access_mode(p, BRW_ALIGN_16); brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); }
// declared static by preceeding declaration std::ptrdiff_t byte_offset(Address base, Address elem, size_t blocksize, size_t blockofs) { ez::unused(blocksize), ez::unused(blockofs); #if WITH_UPC if (blocksize > 0) { base.local -= blockofs; return byte_offset(base, elem, blocksize) - blockofs; } #endif /* WITH_UPC */ assert(blocksize == 0 && blockofs == 0); return elem.local - base.local; }
/* If flatshading, distribute color from provoking vertex prior to * clipping. */ void brw_clip_copy_colors( struct brw_clip_compile *c, GLuint to, GLuint from ) { struct brw_compile *p = &c->func; if (brw_clip_have_varying(c, VARYING_SLOT_COL0)) brw_MOV(p, byte_offset(c->reg.vertex[to], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL0)), byte_offset(c->reg.vertex[from], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL0))); if (brw_clip_have_varying(c, VARYING_SLOT_COL1)) brw_MOV(p, byte_offset(c->reg.vertex[to], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL1)), byte_offset(c->reg.vertex[from], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_COL1))); if (brw_clip_have_varying(c, VARYING_SLOT_BFC0)) brw_MOV(p, byte_offset(c->reg.vertex[to], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC0)), byte_offset(c->reg.vertex[from], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC0))); if (brw_clip_have_varying(c, VARYING_SLOT_BFC1)) brw_MOV(p, byte_offset(c->reg.vertex[to], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC1)), byte_offset(c->reg.vertex[from], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_BFC1))); }
std::pair<MemoryType*, bool> MemoryManager::checkWrite(Location addr, size_t size, const RsType* t) { if ( diagnostics::message(diagnostics::memory) ) { std::stringstream msg; msg << " ++ checkWrite: " << addr << " size: " << size; RuntimeSystem::instance().printMessage(msg.str()); } bool statuschange = false; MemoryType* mt = ::checkLocation(*this, addr, size, RuntimeViolation::INVALID_WRITE); // the address has to be inside the block assert(mt && mt->containsMemArea(addr, size)); const long blocksize = mt->blockSize(); const size_t ofs = byte_offset(mt->beginAddress(), addr, blocksize, 0); if (t) { if ( diagnostics::message(diagnostics::memory) ) { RuntimeSystem& rs = RuntimeSystem::instance(); std::stringstream msg; msg << "++ found memory addr: " << *mt << " for " << addr << " with size " << ToString(size); rs.printMessage(msg.str()); } statuschange = mt->registerMemType(addr, ofs, t); } const bool initmod = mt->initialize(ofs, size); if ( diagnostics::message(diagnostics::memory) ) { RuntimeSystem::instance().printMessage(" ++ checkWrite done."); } return std::make_pair(mt, initmod || statuschange); }
static std::ptrdiff_t byte_offset(Address base, Address elem, size_t blocksize) { assert(blocksize != 0); // adjust base so that it is aligned on a blocksize address const size_t baseaddr = reinterpret_cast<size_t>(base.local); const size_t basescale = baseaddr / blocksize; const size_t baseshift = baseaddr % blocksize; // adjust elem by the same amount const size_t elemaddr = reinterpret_cast<size_t>(elem.local - baseshift); const size_t elemscale = elemaddr / blocksize; const size_t elemphase = elemaddr % blocksize; // use simpler calculation offset calculation const std::ptrdiff_t scaleofs = byte_offset(basescale, base.thread_id, elemscale, elem.thread_id); // unscale and add back the elem's phase return scaleofs * blocksize + elemphase; }
void vec4_tcs_visitor::emit_urb_write(const src_reg &value, unsigned writemask, unsigned base_offset, const src_reg &indirect_offset) { if (writemask == 0) return; src_reg message(this, glsl_type::uvec4_type, 2); vec4_instruction *inst; inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message), brw_imm_ud(writemask), indirect_offset); inst->force_writemask_all = true; inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE), value)); inst->force_writemask_all = true; inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message); inst->offset = base_offset; inst->mlen = 2; inst->base_mrf = -1; }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->key.nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; if (c->chipset.is_igdng) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (c->need_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }
static long next_offset(grib_accessor* a) { return byte_offset(a)+a->length; }
bool MemoryType::containsMemArea(Location queryAddress, size_t len) const { std::ptrdiff_t ofs = byte_offset(startAddress, queryAddress, blockSize(), 0); return (ofs >= 0) && ((ofs+len) <= getSize()); }
bool MemoryManager::checkIfSameChunk(Location addr1, Location addr2, size_t typeSize, RuntimeViolation::Type violation) const { RuntimeViolation::Type access_violation = violation; if (access_violation != RuntimeViolation::NONE) access_violation = RuntimeViolation::INVALID_READ; const MemoryType* mem1 = checkLocation(addr1, typeSize, access_violation); const bool sameChunk = (mem1 && mem1->containsMemArea(addr2, typeSize)); // check that addr1 and addr2 point to the same base location if (! sameChunk) { const MemoryType* mem2 = checkLocation(addr2, typeSize, access_violation); // the error is skipped, if a chunk is not available b/c pointer errors // should be recorded only when the out-of-bounds memory is accessed, but // not when the pointer is moved out of bounds. const bool skiperr = (violation == RuntimeViolation::NONE && !(mem1 && mem2)); if (!skiperr) { assert(mem1 && mem2 && mem1 != mem2); RuntimeSystem& rs = RuntimeSystem::instance(); std::stringstream ss; ss << "Pointer changed allocation block from " << addr1 << " to " << addr2 << std::endl; rs.violationHandler( violation, ss.str() ); } return false; } // so far we know that addr1, addr2 have been allocated within the same chunk // now, test for same array chunks const size_t totalblocksize = mem1->blockSize(); const Location memloc = mem1->beginAddress(); const std::ptrdiff_t ofs1 = byte_offset(memloc, addr1, totalblocksize, 0); const std::ptrdiff_t ofs2 = byte_offset(memloc, addr2, totalblocksize, 0); //~ std::cerr << *mem1 << std::endl; //~ std::cerr << "a = " << addr1 << std::endl; //~ std::cerr << "b = " << addr2 << std::endl; //~ std::cerr << "ts = " << typeSize << std::endl; //~ std::cerr << "tbs = " << totalblocksize << std::endl; // \pp as far as I understand, getTypeAt returns the innermost type // that overlaps a certain memory region [addr1, typeSize). // The current implementation stops at the highest level of array... const RsType* type1 = mem1 -> getTypeAt( ofs1, typeSize ); const RsType* type2 = mem1 -> getTypeAt( ofs2, typeSize ); std::auto_ptr<const RsType> guard1(NULL); std::auto_ptr<const RsType> guard2(NULL); if( !type1 ) { //~ std::cerr << "type1! " << std::endl; type1 = mem1->computeCompoundTypeAt( ofs1, typeSize ); guard1.reset(type1); } if( !type2 ) { //~ std::cerr << "type2! " << std::endl; type2 = mem1->computeCompoundTypeAt( ofs2, typeSize ); guard2.reset(type2); } assert( type1 && type2 ); //~ std::cerr << "-- T1 = " << typeid(*type1).name() << std::endl; //~ std::cerr << "-- T2 = " << typeid(*type2).name() << std::endl; bool accessOK = type1->isConsistentWith( *type2 ); const RsArrayType* array = (accessOK ? dynamic_cast< const RsArrayType* >( type1 ) : 0); //~ std::cerr << "accOK: " << accessOK << std::endl; if (array) { // \pp in order to calculate the last memory location of elem, we need to // know whether its type is an array type, and whether this is an // immediate (there is no user defined type or pointer in between) // subarray of the containing array. const size_t blockofs = 0; /* \todo calculate offset from addr1 */ // \pp \todo // not sure why bounds checking is based on a relative address (addr1) // and not on absolute boundaries of the chunk where this address // is located... // e.g., addr2 < array(addr).lb || array(addr).ub < (addr2+typeSize) // - a reason is that we want to bounds check within a multi-dimensional // array; a sub-array might not start at the same address as the // allocated chunk (which is always at [0][0]...) // \bug this works fine for arrays, but it does not seem to be OK for // pointers; in which case addr1 might point in the middle of a chunk. const Location arrlb = addr1; const std::ptrdiff_t arrlen = array->getByteSize(); const Location elemlo = addr2; Location elemhi = addr2; const RsType* memtype = mem1->getTypeAt( 0, mem1->getSize() ); const size_t basesize = basetypeSize(memtype); if (typeSize > basesize) { // elem is another array // assert( rs->testing() || dynamic_cast< const RsArrayType* >(type2) ); const long elemblockofs = 0; // \todo calculate the ofs of elem's address (is this needed?) elemhi = ::add(elemhi, typeSize-1, totalblocksize, elemblockofs); } else { elemhi.local += (typeSize-1); } // \pp \note [arrlb, arrub] and [elemlo, elemhi] (therefore typeSize - 1) // arrub < elemhi this holds for UPC // in contrast, using a range [elemlb, elemub) causes problems // for UPC where the elemub can be larger than arrub // for last elements on threads other than 0. // e.g., shared int x[THREADS]; // arrub == &x@thread(0) + sizeof(int) // writing to x[1] has the following upper bound // elemub == &x@thread(1) + sizeof(int) // consequently arrub < elemub :( #if EXTENDEDDBG std::cerr << " arrlb = " << arrlb << "( lb + " << arrlen << " [" << totalblocksize << "] )" << std::endl; std::cerr << " elmlb = " << elemlo << std::endl; std::cerr << " elmub = " << elemhi << " ( lb + " << typeSize << " )" << std::endl; std::cerr << " ofs(elemlb) >= 0 : " << byte_offset(arrlb, elemlo, totalblocksize, blockofs) << std::endl; std::cerr << " ofs(elemub) < sz(arr) : " << (byte_offset(arrlb, elemhi, totalblocksize, blockofs)) << std::endl; #endif /* EXTENDEDDBG */ // the offset of the element in respect to the array has to be positive // and the offset of the elements last byte has to be less than the // array length. accessOK = (byte_offset(arrlb, elemlo, totalblocksize, blockofs) >= 0); accessOK &= (byte_offset(arrlb, elemhi, totalblocksize, blockofs) < arrlen); // the array element might be before it [ -1 ] // ... or after the array [ N ]... // was: accessOK = ! ( (elemlo < arrlb) || (arrub < elemhi) ) /* was: if !( addr1 + array -> getByteSize() >= addr2 + typeSize // the array element might be after the array [ N ]... && addr1 <= addr2 // ... or before it [ -1 ] * (12) (8) ) { // out of bounds error (e.g. int[2][3], ref [0][3]) consistent = false; } */ } if (!accessOK) failNotSameChunk( *type1, *type2, addr1, addr2, *mem1, violation ); return accessOK; }
static long next_offset(grib_accessor* a){ return byte_offset(a)+byte_count(a); }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->vue_map.num_slots % 2) { /* The VUE has an odd number of slots so the last register is only half * used. Fill the second half with zero. */ for (j = 0; j < 3; j++) { GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }