Пример #1
1
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
   const struct brw_tes_prog_data *tes_prog_data =
      (const struct brw_tes_prog_data *) prog_data;

   switch (instr->intrinsic) {
   case nir_intrinsic_load_tess_coord:
      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
               src_reg(brw_vec8_grf(1, 0))));
      break;
   case nir_intrinsic_load_tess_level_outer:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_ZWZW)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      }
      break;
   case nir_intrinsic_load_tess_level_inner:
      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
                          BRW_SWIZZLE_WZYX)));
      } else {
         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
                  src_reg(ATTR, 1, glsl_type::float_type)));
      }
      break;
   case nir_intrinsic_load_primitive_id:
      emit(TES_OPCODE_GET_PRIMITIVE_ID,
           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
      break;

   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_vertex_input: {
      src_reg indirect_offset = get_indirect_offset(instr);
      unsigned imm_offset = instr->const_index[0];
      src_reg header = input_read_header;
      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
      unsigned first_component = nir_intrinsic_component(instr);
      if (is_64bit)
         first_component /= 2;

      if (indirect_offset.file != BAD_FILE) {
         header = src_reg(this, glsl_type::uvec4_type);
         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
              input_read_header, indirect_offset);
      } else {
         /* Arbitrarily only push up to 24 vec4 slots worth of data,
          * which is 12 registers (since each holds 2 vec4 slots).
          */
         const unsigned max_push_slots = 24;
         if (imm_offset < max_push_slots) {
            const glsl_type *src_glsl_type =
               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

            const brw_reg_type dst_reg_type =
               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));

            prog_data->urb_read_length =
               MAX2(prog_data->urb_read_length,
                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
            break;
         }
      }

      if (!is_64bit) {
         dst_reg temp(this, glsl_type::ivec4_type);
         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         src_reg src = src_reg(temp);
         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         /* Copy to target.  We might end up with some funky writemasks landing
          * in here, but we really don't want them in the above pseudo-ops.
          */
         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src));
      } else {
         /* For 64-bit we need to load twice as many 32-bit components, and for
          * dvec3/4 we need to emit 2 URB Read messages
          */
         dst_reg temp(this, glsl_type::dvec4_type);
         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);

         vec4_instruction *read =
            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
         read->offset = imm_offset;
         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;

         if (instr->num_components > 2) {
            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
                        src_reg(header));
            read->offset = imm_offset + 1;
            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
         }

         src_reg temp_as_src = src_reg(temp);
         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);

         dst_reg shuffled(this, glsl_type::dvec4_type);
         shuffle_64bit_data(shuffled, temp_as_src, false);

         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
         dst.writemask = brw_writemask_for_size(instr->num_components);
         emit(MOV(dst, src_reg(shuffled)));
      }
      break;
   }
   default:
      vec4_visitor::nir_emit_intrinsic(instr);
   }
}
Пример #2
0
static void merge_edgeflags( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);

   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
   brw_CMP(p, 
	   vec1(brw_null_reg()), 
	   BRW_CONDITIONAL_EQ, 
	   tmp0,
	   brw_imm_ud(_3DPRIM_POLYGON));

   /* Get away with using reg.vertex because we know that this is not
    * a _3DPRIM_TRISTRIP_REVERSE:
    */
   brw_IF(p, BRW_EXECUTE_1);
   {   
      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
      brw_MOV(p, byte_offset(c->reg.vertex[0],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);

      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
      brw_MOV(p, byte_offset(c->reg.vertex[2],
                             brw_varying_to_offset(&c->vue_map,
                                                   VARYING_SLOT_EDGE)),
              brw_imm_f(0));
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
   }
   brw_ENDIF(p);
}
Пример #3
0
static void copy_bfc( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   GLuint conditional;

   /* Do we have any colors to copy?
    */
   if (!(brw_clip_have_varying(c, VARYING_SLOT_COL0) &&
         brw_clip_have_varying(c, VARYING_SLOT_BFC0)) &&
       !(brw_clip_have_varying(c, VARYING_SLOT_COL1) &&
         brw_clip_have_varying(c, VARYING_SLOT_BFC1)))
      return;

   /* In some wierd degnerate cases we can end up testing the
    * direction twice, once for culling and once for bfc copying.  Oh
    * well, that's what you get for setting wierd GL state.
    */
   if (c->key.copy_bfc_ccw)
      conditional = BRW_CONDITIONAL_GE;
   else
      conditional = BRW_CONDITIONAL_L;

   brw_CMP(p,
	   vec1(brw_null_reg()),
	   conditional,
	   get_element(c->reg.dir, 2),
	   brw_imm_f(0));

   brw_IF(p, BRW_EXECUTE_1);
   {
      GLuint i;

      for (i = 0; i < 3; i++) {
	 if (brw_clip_have_varying(c, VARYING_SLOT_COL0) &&
             brw_clip_have_varying(c, VARYING_SLOT_BFC0))
	    brw_MOV(p,
		    byte_offset(c->reg.vertex[i],
                                brw_varying_to_offset(&c->vue_map,
                                                      VARYING_SLOT_COL0)),
		    byte_offset(c->reg.vertex[i],
                                brw_varying_to_offset(&c->vue_map,
                                                      VARYING_SLOT_BFC0)));

	 if (brw_clip_have_varying(c, VARYING_SLOT_COL1) &&
             brw_clip_have_varying(c, VARYING_SLOT_BFC1))
	    brw_MOV(p,
		    byte_offset(c->reg.vertex[i],
                                brw_varying_to_offset(&c->vue_map,
                                                      VARYING_SLOT_COL1)),
		    byte_offset(c->reg.vertex[i],
                                brw_varying_to_offset(&c->vue_map,
                                                      VARYING_SLOT_BFC1)));
      }
   }
   brw_ENDIF(p);
}
/* Distribute flatshaded attributes from provoking vertex prior to
 * clipping.
 */
void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c,
			   GLuint to, GLuint from )
{
   struct brw_codegen *p = &c->func;

   for (int i = 0; i < c->vue_map.num_slots; i++) {
      if (c->key.interpolation_mode.mode[i] == INTERP_QUALIFIER_FLAT) {
         brw_MOV(p,
                 byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)),
                 byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i)));
      }
   }
}
Пример #5
0
/* If flatshading, distribute color from provoking vertex prior to
 * clipping.
 */
void brw_clip_copy_colors( struct brw_clip_compile *c,
			   unsigned to, unsigned from )
{
#if 0
   struct brw_compile *p = &c->func;

   if (c->offset[VERT_RESULT_COL0])
      brw_MOV(p,
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));

   if (c->offset[VERT_RESULT_COL1])
      brw_MOV(p,
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));

   if (c->offset[VERT_RESULT_BFC0])
      brw_MOV(p,
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));

   if (c->offset[VERT_RESULT_BFC1])
      brw_MOV(p,
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
#else
         #warning "disabled"
#endif
}
Пример #6
0
/* If flatshading, distribute color from provoking vertex prior to
 * clipping.
 */
void brw_clip_copy_colors( struct brw_clip_compile *c,
			   GLuint to, GLuint from )
{
   struct brw_compile *p = &c->func;

   if (c->offset[VERT_RESULT_COL0])
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));

   if (c->offset[VERT_RESULT_COL1])
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));

   if (c->offset[VERT_RESULT_BFC0])
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));

   if (c->offset[VERT_RESULT_BFC1])
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
}
Пример #7
0
void brw_copy_from_indirect(struct brw_compile *p,
			    struct brw_reg dst,
			    struct brw_indirect ptr,
			    GLuint count)
{
   GLuint i;

   dst = vec4(dst);

   for (i = 0; i < count; i++)
   {
      GLuint delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
   }
}
Пример #8
0
void brw_copy8(struct brw_compile *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       GLuint count)
{
   GLuint i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      GLuint delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
Пример #9
0
std::pair<bool, bool>
MemoryType::checkAndMergeMemType(Location addr, const RsType* type)
{
  const size_t ofs = byte_offset(startAddress, addr, blockSize(), 0);

  return checkAndMergeMemType(ofs, type);
}
Пример #10
0
void brw_copy8(struct brw_codegen *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       unsigned count)
{
   unsigned i;

   dst = vec8(dst);
   src = vec8(src);

   for (i = 0; i < count; i++)
   {
      unsigned delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
   }
}
Пример #11
0
void brw_copy_from_indirect(struct brw_codegen *p,
			    struct brw_reg dst,
			    struct brw_indirect ptr,
			    unsigned count)
{
   unsigned i;

   dst = vec4(dst);

   for (i = 0; i < count; i++)
   {
      unsigned delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
   }
}
Пример #12
0
/* This is performed against the original triangles, so no indirection
 * required:
BZZZT!
 */
static void compute_tri_direction( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg e = c->reg.tmp0;
   struct brw_reg f = c->reg.tmp1;
   GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map,
                                                  VARYING_SLOT_POS);
   struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset);
   struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset);
   struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset);


   struct brw_reg v0n = get_tmp(c);
   struct brw_reg v1n = get_tmp(c);
   struct brw_reg v2n = get_tmp(c);

   /* Convert to NDC.
    * NOTE: We can't modify the original vertex coordinates,
    * as it may impact further operations.
    * So, we have to keep normalized coordinates in temp registers.
    *
    * TBD-KC
    * Try to optimize unnecessary MOV's.
    */
   brw_MOV(p, v0n, v0);
   brw_MOV(p, v1n, v1);
   brw_MOV(p, v2n, v2);

   brw_clip_project_position(c, v0n);
   brw_clip_project_position(c, v1n);
   brw_clip_project_position(c, v2n);

   /* Calculate the vectors of two edges of the triangle:
    */
   brw_ADD(p, e, v0n, negate(v2n)); 
   brw_ADD(p, f, v1n, negate(v2n)); 

   /* Take their crossproduct:
    */
   brw_set_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
   brw_set_access_mode(p, BRW_ALIGN_1);

   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
Пример #13
0
void brw_copy4(struct brw_compile *p,
	       struct brw_reg dst,
	       struct brw_reg src,
	       unsigned count)
{
   unsigned i;

   dst = vec4(dst);
   src = vec4(src);

   for (i = 0; i < count; i++)
   {
      unsigned delta = i*32;
      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
   }
}
Пример #14
0
static void copy_bfc( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_instruction *ccw;
   GLuint conditional;

   /* Do we have any colors to copy? 
    */
   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
      return;

   /* In some wierd degnerate cases we can end up testing the
    * direction twice, once for culling and once for bfc copying.  Oh
    * well, that's what you get for setting wierd GL state.
    */
   if (c->key.copy_bfc_ccw)
      conditional = BRW_CONDITIONAL_GE;
   else
      conditional = BRW_CONDITIONAL_L;

   brw_CMP(p,
	   vec1(brw_null_reg()),
	   conditional,
	   get_element(c->reg.dir, 2),
	   brw_imm_f(0));
   
   ccw = brw_IF(p, BRW_EXECUTE_1);
   {
      GLuint i;

      for (i = 0; i < 3; i++) {
	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
	    brw_MOV(p, 
		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));

	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
	    brw_MOV(p, 
		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
      }
   }
   brw_ENDIF(p, ccw);
}
        /// @copydoc block_partitioning::bytes_used(uint32_t) const
        uint32_t bytes_used(uint32_t block_id) const
        {
            assert(block_id < m_total_blocks);

            uint32_t offset = byte_offset(block_id);

            assert(offset < m_object_size);
            uint32_t remaining =  m_object_size - offset;
            uint32_t the_block_size = block_size(block_id);

            return std::min(remaining, the_block_size);
        }
Пример #16
0
bool MemoryType::isInitialized(Location addr, size_t len) const
{
    const size_t           offset = byte_offset(startAddress, addr, blockSize(), 0);
    const size_t           limit = offset + len;

    for (size_t i = offset; i < limit; ++i)
    {
        if ( !byteInitialized(i) ) return false;
    }

    return true;
}
static int pack_double   (grib_accessor* a, const double* val, size_t *len)
{
    grib_accessor_ibmfloat* self = (grib_accessor_ibmfloat*)a;
    int ret = 0;
    unsigned long i = 0;
    unsigned long rlen = *len;
    size_t buflen  = 0;
    unsigned char *buf = NULL;
    long off = 0;

    if(*len < 1)
    {
        grib_context_log(a->parent->h->context, GRIB_LOG_ERROR, " wrong size for %s it pack at least 1 values ", a->name , rlen );
        *len = 0;
        return GRIB_ARRAY_TOO_SMALL;
    }

    if (rlen == 1){
        /*
    double x = 0;
    grib_nearest_smaller_ibm_float(val[0],&x);
    double y = grib_long_to_ibm(grib_ibm_to_long(val[0]));
    printf("IBMFLOAT val=%.20f nearest_smaller_ibm_float=%.20f long_to_ibm=%.20f\n",val[0],x ,y);
         */
        off = byte_offset(a)*8;
        ret =  grib_encode_unsigned_long(a->parent->h->buffer->data,grib_ibm_to_long(val[0]), &off,  32);
        if (*len > 1)  grib_context_log(a->parent->h->context, GRIB_LOG_WARNING, "grib_accessor_unsigned : Trying to pack %d values in a scalar %s, packing first value",  *len, a->name  );
        if (ret == GRIB_SUCCESS) len[0] = 1;
        return ret;
    }

    buflen = rlen*4;

    buf = (unsigned char*)grib_context_malloc(a->parent->h->context,buflen);

    for(i=0; i < rlen;i++){
        grib_encode_unsigned_longb(buf,grib_ibm_to_long(val[i]), &off,  32);
    }
    ret = grib_set_long_internal(a->parent->h,grib_arguments_get_name(a->parent->h,self->arg,0),rlen);

    if(ret == GRIB_SUCCESS)
        grib_buffer_replace(a, buf, buflen,1,1);
    else
        *len = 0;

    grib_context_free(a->parent->h->context,buf);

    a->length = byte_count(a);

    return ret;
}
Пример #18
0
/* This is performed against the original triangles, so no indirection
 * required:
BZZZT!
 */
static void compute_tri_direction( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg e = c->reg.tmp0;
   struct brw_reg f = c->reg.tmp1;
   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 


   /* Calculate the vectors of two edges of the triangle:
    */
   brw_ADD(p, e, v0, negate(v2)); 
   brw_ADD(p, f, v1, negate(v2)); 

   /* Take their crossproduct:
    */
   brw_set_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
   brw_set_access_mode(p, BRW_ALIGN_1);

   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
Пример #19
0
// declared static by preceeding declaration
std::ptrdiff_t byte_offset(Address base, Address elem, size_t blocksize, size_t blockofs)
{
  ez::unused(blocksize), ez::unused(blockofs);

#if WITH_UPC
  if (blocksize > 0)
  {
    base.local -= blockofs;
    return byte_offset(base, elem, blocksize) - blockofs;
  }
#endif /* WITH_UPC */

  assert(blocksize == 0 && blockofs == 0);
  return elem.local - base.local;
}
Пример #20
0
/* If flatshading, distribute color from provoking vertex prior to
 * clipping.
 */
void brw_clip_copy_colors( struct brw_clip_compile *c,
			   GLuint to, GLuint from )
{
   struct brw_compile *p = &c->func;

   if (brw_clip_have_varying(c, VARYING_SLOT_COL0))
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_COL0)),
	      byte_offset(c->reg.vertex[from],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_COL0)));

   if (brw_clip_have_varying(c, VARYING_SLOT_COL1))
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_COL1)),
	      byte_offset(c->reg.vertex[from],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_COL1)));

   if (brw_clip_have_varying(c, VARYING_SLOT_BFC0))
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_BFC0)),
	      byte_offset(c->reg.vertex[from],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_BFC0)));

   if (brw_clip_have_varying(c, VARYING_SLOT_BFC1))
      brw_MOV(p, 
	      byte_offset(c->reg.vertex[to],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_BFC1)),
	      byte_offset(c->reg.vertex[from],
                          brw_varying_to_offset(&c->vue_map,
                                                VARYING_SLOT_BFC1)));
}
Пример #21
0
std::pair<MemoryType*, bool>
MemoryManager::checkWrite(Location addr, size_t size, const RsType* t)
{
    if ( diagnostics::message(diagnostics::memory) )
    {
      std::stringstream msg;

      msg << "   ++ checkWrite: " << addr << " size: " << size;
      RuntimeSystem::instance().printMessage(msg.str());
    }

    bool           statuschange = false;
    MemoryType*    mt = ::checkLocation(*this, addr, size, RuntimeViolation::INVALID_WRITE);

    // the address has to be inside the block
    assert(mt && mt->containsMemArea(addr, size));
    const long    blocksize = mt->blockSize();
    const size_t  ofs = byte_offset(mt->beginAddress(), addr, blocksize, 0);

    if (t)
    {
      if ( diagnostics::message(diagnostics::memory) )
      {
        RuntimeSystem&    rs = RuntimeSystem::instance();
        std::stringstream msg;

        msg << "++ found memory addr: " << *mt << " for " << addr << " with size " << ToString(size);
        rs.printMessage(msg.str());
      }

      statuschange = mt->registerMemType(addr, ofs, t);
      }

    const bool initmod = mt->initialize(ofs, size);

    if ( diagnostics::message(diagnostics::memory) )
    {
      RuntimeSystem::instance().printMessage("   ++ checkWrite done.");
    }

    return std::make_pair(mt, initmod || statuschange);
}
Пример #22
0
static
std::ptrdiff_t byte_offset(Address base, Address elem, size_t blocksize)
{
  assert(blocksize != 0);

  // adjust base so that it is aligned on a blocksize address
  const size_t         baseaddr  = reinterpret_cast<size_t>(base.local);
  const size_t         basescale = baseaddr / blocksize;
  const size_t         baseshift = baseaddr % blocksize;

  // adjust elem by the same amount
  const size_t         elemaddr  = reinterpret_cast<size_t>(elem.local - baseshift);
  const size_t         elemscale = elemaddr / blocksize;
  const size_t         elemphase = elemaddr % blocksize;

  // use simpler calculation offset calculation
  const std::ptrdiff_t scaleofs = byte_offset(basescale, base.thread_id, elemscale, elem.thread_id);

  // unscale and add back the elem's phase
  return scaleofs * blocksize + elemphase;
}
Пример #23
0
void
vec4_tcs_visitor::emit_urb_write(const src_reg &value,
                                 unsigned writemask,
                                 unsigned base_offset,
                                 const src_reg &indirect_offset)
{
   if (writemask == 0)
      return;

   src_reg message(this, glsl_type::uvec4_type, 2);
   vec4_instruction *inst;

   inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
               brw_imm_ud(writemask), indirect_offset);
   inst->force_writemask_all = true;
   inst = emit(MOV(byte_offset(dst_reg(retype(message, value.type)), REG_SIZE),
                   value));
   inst->force_writemask_all = true;

   inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message);
   inst->offset = base_offset;
   inst->mlen = 2;
   inst->base_mrf = -1;
}
Пример #24
0
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->key.nr_attrs & 1) {
      for (j = 0; j < 3; j++) {
	 GLuint delta = c->key.nr_attrs*16 + 32;

         if (c->chipset.is_igdng)
             delta = c->key.nr_attrs * 16 + 32 * 3;

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (c->need_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}
static long next_offset(grib_accessor* a) {
    return byte_offset(a)+a->length;
}
Пример #26
0
bool MemoryType::containsMemArea(Location queryAddress, size_t len) const
{
    std::ptrdiff_t ofs = byte_offset(startAddress, queryAddress, blockSize(), 0);

    return (ofs >= 0) && ((ofs+len) <= getSize());
}
Пример #27
0
bool
MemoryManager::checkIfSameChunk(Location addr1, Location addr2, size_t typeSize, RuntimeViolation::Type violation) const
{
  RuntimeViolation::Type access_violation = violation;

  if (access_violation != RuntimeViolation::NONE) access_violation = RuntimeViolation::INVALID_READ;

  const MemoryType*      mem1 = checkLocation(addr1, typeSize, access_violation);
  const bool             sameChunk = (mem1 && mem1->containsMemArea(addr2, typeSize));

  // check that addr1 and addr2 point to the same base location
  if (! sameChunk)
  {
    const MemoryType*    mem2 = checkLocation(addr2, typeSize, access_violation);
    // the error is skipped, if a chunk is not available b/c pointer errors
    // should be recorded only when the out-of-bounds memory is accessed, but
    // not when the pointer is moved out of bounds.
    const bool           skiperr = (violation == RuntimeViolation::NONE && !(mem1 && mem2));

    if (!skiperr)
    {
      assert(mem1 && mem2 && mem1 != mem2);
      RuntimeSystem&       rs = RuntimeSystem::instance();
      std::stringstream    ss;

      ss << "Pointer changed allocation block from " << addr1 << " to " << addr2 << std::endl;

      rs.violationHandler( violation, ss.str() );
    }

    return false;
  }

  // so far we know that addr1, addr2 have been allocated within the same chunk
  // now, test for same array chunks
  const size_t                totalblocksize = mem1->blockSize();
  const Location              memloc = mem1->beginAddress();
  const std::ptrdiff_t        ofs1 = byte_offset(memloc, addr1, totalblocksize, 0);
  const std::ptrdiff_t        ofs2 = byte_offset(memloc, addr2, totalblocksize, 0);

  //~ std::cerr << *mem1 << std::endl;
  //~ std::cerr << "a = " << addr1 << std::endl;
  //~ std::cerr << "b = " << addr2 << std::endl;
  //~ std::cerr << "ts = " << typeSize << std::endl;
  //~ std::cerr << "tbs = " << totalblocksize << std::endl;

  // \pp as far as I understand, getTypeAt returns the innermost type
  //     that overlaps a certain memory region [addr1, typeSize).
  //     The current implementation stops at the highest level of array...
  const RsType*               type1 = mem1 -> getTypeAt( ofs1, typeSize );
  const RsType*               type2 = mem1 -> getTypeAt( ofs2, typeSize );
  std::auto_ptr<const RsType> guard1(NULL);
  std::auto_ptr<const RsType> guard2(NULL);

  if( !type1 ) {
      //~ std::cerr << "type1! " << std::endl;
      type1 = mem1->computeCompoundTypeAt( ofs1, typeSize );
      guard1.reset(type1);
  }

  if( !type2 ) {
      //~ std::cerr << "type2! " << std::endl;
      type2 = mem1->computeCompoundTypeAt( ofs2, typeSize );
      guard2.reset(type2);
  }

  assert( type1 && type2 );

  //~ std::cerr << "-- T1 = " << typeid(*type1).name() << std::endl;
  //~ std::cerr << "-- T2 = " << typeid(*type2).name() << std::endl;

  bool               accessOK = type1->isConsistentWith( *type2 );
  const RsArrayType* array = (accessOK ? dynamic_cast< const RsArrayType* >( type1 ) : 0);

  //~ std::cerr << "accOK: " << accessOK << std::endl;

  if (array)
  {
    // \pp in order to calculate the last memory location of elem, we need to
    //     know whether its type is an array type, and whether this is an
    //     immediate (there is no user defined type or pointer in between)
    //     subarray of the containing array.
    const size_t          blockofs = 0; /* \todo calculate offset from addr1 */

    // \pp \todo
    //     not sure why bounds checking is based on a relative address (addr1)
    //     and not on absolute boundaries of the chunk where this address
    //     is located...
    //     e.g., addr2 < array(addr).lb || array(addr).ub < (addr2+typeSize)
    //     - a reason is that we want to bounds check within a multi-dimensional
    //       array; a sub-array might not start at the same address as the
    //       allocated chunk (which is always at [0][0]...)
    // \bug  this works fine for arrays, but it does not seem to be OK for
    //       pointers; in which case addr1 might point in the middle of a chunk.
    const Location       arrlb = addr1;
    const std::ptrdiff_t arrlen = array->getByteSize();
    const Location       elemlo = addr2;
          Location       elemhi = addr2;
    const RsType*        memtype = mem1->getTypeAt( 0, mem1->getSize() );
    const size_t         basesize = basetypeSize(memtype);

    if (typeSize > basesize)
    {
      // elem is another array
      // assert( rs->testing() || dynamic_cast< const RsArrayType* >(type2) );

      const long   elemblockofs = 0; // \todo calculate the ofs of elem's address (is this needed?)

      elemhi = ::add(elemhi, typeSize-1, totalblocksize, elemblockofs);
    }
    else
    {
      elemhi.local += (typeSize-1);
    }

    // \pp \note [arrlb, arrub] and [elemlo, elemhi]  (therefore typeSize - 1)
    //           arrub < elemhi this holds for UPC
    //           in contrast, using a range [elemlb, elemub) causes problems
    //           for UPC where the elemub can be larger than arrub
    //           for last elements on threads other than 0.
    //           e.g., shared int x[THREADS];
    //                   arrub == &x@thread(0) + sizeof(int)
    //                 writing to x[1] has the following upper bound
    //                   elemub == &x@thread(1) + sizeof(int)
    //                 consequently arrub < elemub :(

#if EXTENDEDDBG
    std::cerr << "  arrlb = " << arrlb << "( lb + " << arrlen << " [" << totalblocksize << "] )" << std::endl;
    std::cerr << "  elmlb = " << elemlo << std::endl;
    std::cerr << "  elmub = " << elemhi << " ( lb + " << typeSize << " )" << std::endl;

    std::cerr << "  ofs(elemlb) >= 0 : " << byte_offset(arrlb, elemlo, totalblocksize, blockofs) << std::endl;
    std::cerr << "  ofs(elemub) < sz(arr) : " << (byte_offset(arrlb, elemhi, totalblocksize, blockofs)) << std::endl;
#endif /* EXTENDEDDBG */

    // the offset of the element in respect to the array has to be positive
    //   and the offset of the elements last byte has to be less than the
    //   array length.
    accessOK  = (byte_offset(arrlb, elemlo, totalblocksize, blockofs) >= 0);
    accessOK &= (byte_offset(arrlb, elemhi, totalblocksize, blockofs) < arrlen);

    // the array element might be before it [ -1 ]
    // ... or after the array [ N ]...
    // was: accessOK = ! ( (elemlo < arrlb) || (arrub < elemhi) )
    /* was:
    if  !(  addr1 + array -> getByteSize() >= addr2 + typeSize  // the array element might be after the array [ N ]...
         && addr1 <= addr2 // ... or before it [ -1 ]
         *   (12)     (8)
         )
    {
      // out of bounds error (e.g. int[2][3], ref [0][3])
      consistent = false;
    }
    */
  }

  if (!accessOK)
    failNotSameChunk( *type1, *type2, addr1, addr2, *mem1, violation );

  return accessOK;
}
static long next_offset(grib_accessor* a){
  return byte_offset(a)+byte_count(a);
}
Пример #29
0
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
			      GLuint nr_verts )
{
   struct intel_context *intel = &c->func.brw->intel;
   GLuint i = 0,j;

   /* Register usage is static, precompute here:
    */
   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;

   if (c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec4_grf(i, 0);
      i += (6 + c->key.nr_userclip + 1) / 2;

      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
   }
   else
      c->prog_data.curb_read_length = 0;


   /* Payload vertices plus space for more generated vertices:
    */
   for (j = 0; j < nr_verts; j++) {
      c->reg.vertex[j] = brw_vec4_grf(i, 0);
      i += c->nr_regs;
   }

   if (c->vue_map.num_slots % 2) {
      /* The VUE has an odd number of slots so the last register is only half
       * used.  Fill the second half with zero.
       */
      for (j = 0; j < 3; j++) {
	 GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);

	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
      }
   }

   c->reg.t          = brw_vec1_grf(i, 0);
   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
   c->reg.plane_equation = brw_vec4_grf(i, 4);
   i++;

   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
   c->reg.dp         = brw_vec1_grf(i, 4);
   i++;

   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
   i++;

   if (!c->key.nr_userclip) {
      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
      i++;
   }

   if (c->key.do_unfilled) {
      c->reg.dir     = brw_vec4_grf(i, 0);
      c->reg.offset  = brw_vec4_grf(i, 4);
      i++;
      c->reg.tmp0    = brw_vec4_grf(i, 0);
      c->reg.tmp1    = brw_vec4_grf(i, 4);
      i++;
   }

   if (intel->needs_ff_sync) {
      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
      i++;
   }

   c->first_tmp = i;
   c->last_tmp = i;

   c->prog_data.urb_read_length = c->nr_regs; /* ? */
   c->prog_data.total_grf = i;
}