Ejemplo n.º 1
0
/**
 * Compute the VUE map for tessellation control shader outputs and
 * tessellation evaluation shader inputs.
 */
void
brw_compute_tess_vue_map(struct brw_vue_map *vue_map,
                         GLbitfield64 vertex_slots,
                         GLbitfield patch_slots)
{
   /* I don't think anything actually uses this... */
   vue_map->slots_valid = vertex_slots;

   vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
                     VARYING_BIT_TESS_LEVEL_INNER);

   /* Make sure that the values we store in vue_map->varying_to_slot and
    * vue_map->slot_to_varying won't overflow the signed chars that are used
    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
    * values equal to VARYING_SLOT_TESS_MAX , we need to ensure that
    * VARYING_SLOT_TESS_MAX is <= 127, not 128.
    */
   STATIC_ASSERT(VARYING_SLOT_TESS_MAX <= 127);

   for (int i = 0; i < VARYING_SLOT_TESS_MAX ; ++i) {
      vue_map->varying_to_slot[i] = -1;
      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD;
   }

   int slot = 0;

   /* The first 8 DWords are reserved for the "Patch Header".
    *
    * VARYING_SLOT_TESS_LEVEL_OUTER / INNER live here, but the exact layout
    * depends on the domain type.  They might not be in slots 0 and 1 as
    * described here, but pretending they're separate allows us to uniquely
    * identify them by distinct slot locations.
    */
   assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_INNER, slot++);
   assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_OUTER, slot++);

   /* first assign per-patch varyings */
   while (patch_slots != 0) {
      const int varying = ffsll(patch_slots) - 1;
      if (vue_map->varying_to_slot[varying + VARYING_SLOT_PATCH0] == -1) {
         assign_vue_slot(vue_map, varying + VARYING_SLOT_PATCH0, slot++);
      }
      patch_slots &= ~BITFIELD64_BIT(varying);
   }

   /* apparently, including the patch header... */
   vue_map->num_per_patch_slots = slot;

   /* then assign per-vertex varyings for each vertex in our patch */
   while (vertex_slots != 0) {
      const int varying = ffsll(vertex_slots) - 1;
      if (vue_map->varying_to_slot[varying] == -1) {
         assign_vue_slot(vue_map, varying, slot++);
      }
      vertex_slots &= ~BITFIELD64_BIT(varying);
   }

   vue_map->num_per_vertex_slots = slot - vue_map->num_per_patch_slots;
   vue_map->num_slots = slot;
}
Ejemplo n.º 2
0
inline void bishop_move(struct position *pos, struct move_array *m,
            unsigned char bishops)
{
    uint64_t bishop_pos = pos->pieces[bishops];
    uint64_t moves;
    unsigned char index_from, index_to;
    for (int i = 0; i < 10; i++) {  // up to 10 bishops of same color on the board, (for loop for compiler loop unrolling)
        if ((index_from = ffsll(bishop_pos)) != 0) {
            index_from--;
            moves = BishopMoves[index_from][magictransform((pos->allpieces & BishopMasks[index_from]), BishopMagic[index_from], BishopMagicSize [index_from])] & ~pos-> sumpieces[bishops & COLOR];
            for (int i = 0; i < 14; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_move(m, index_from, index_to, bishops, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }
            bishop_pos &= notlinboard[index_from];
        } else {
            break;
        }
    }
}
Ejemplo n.º 3
0
// get/wait for next item for consumer.
// Does not set any bmap bits. Item must remain in buffer
// until we are done with it.
qitem* queue_pop(queue *q)
{
	qitem *buf        = (qitem*)(q->buf + q->mapbytes*2);
	atomic_llong *map = (atomic_llong*)(q->buf + q->mapbytes);
	int sbit;
	int next;

	// printf("POP %lld, %lld %d\n",(long long int)q->buf, (long long int)map, q->mapbytes);

	while (1)
	{
		long long int mval = atomic_load(&map[q->last_map_pos]);

		if ((sbit = ffsll(mval & (~q->visited))))
		{
			// printf("SET BIT ! %d %d %lld\n",sbit,q->last_map_pos, mval);
			--sbit;
			q->visited |= (((long long int)1) << sbit);
			atomic_fetch_sub(&q->size, 1);
			return (qitem*)&buf[q->last_map_pos*64 + sbit];
		}

		if (q->last_map_pos == q->map_elements-1)
		{
			next = 0;
		}
		else
		{
			next = q->last_map_pos+1;
		}

		q->last_map_pos = next;
		q->visited = (long long int)0;
		mval = atomic_load(&map[next]);
		if ((sbit = ffsll(mval)))
		{
			--sbit;
			q->visited |= (((long long int)1) << sbit);
			atomic_fetch_sub(&q->size, 1);
			return (qitem*)&buf[next*64 + sbit];
		}
		else
		{
			q->last_map_pos = 0;
		}

		usleep(DELAY_BY);
	}
}
Ejemplo n.º 4
0
int _gnix_find_first_zero_bit(gnix_bitmap_t *bitmap)
{
	int i, pos;
	gnix_bitmap_value_t value;

	for (i = 0, pos = 0;
			i < GNIX_BITMAP_BLOCKS(bitmap->length);
			++i, pos += GNIX_BITMAP_BUCKET_LENGTH) {
		/* invert the bits to check for first zero bit */
		value = ~(__gnix_load_block(bitmap, i));

		if (value != 0) {
			/* no need to check for errors because we have
			   established there is an unset bit */
			pos += ffsll(value) - 1;

			if (pos < bitmap->length)
				return pos;
			else
				return -FI_EAGAIN;
		}
	}

	return -FI_EAGAIN;
}
Ejemplo n.º 5
0
picture_t *picture_pool_Wait(picture_pool_t *pool)
{
    unsigned i;

    vlc_mutex_lock(&pool->lock);
    assert(pool->refs > 0);

    while (pool->available == 0)
        vlc_cond_wait(&pool->wait, &pool->lock);

    i = ffsll(pool->available);
    assert(i > 0);
    pool->available &= ~(1ULL << (i - 1));
    vlc_mutex_unlock(&pool->lock);

    picture_t *picture = pool->picture[i - 1];

    if (pool->pic_lock != NULL && pool->pic_lock(picture) != 0) {
        vlc_mutex_lock(&pool->lock);
        pool->available |= 1ULL << (i - 1);
        vlc_cond_signal(&pool->wait);
        vlc_mutex_unlock(&pool->lock);
        return NULL;
    }

    picture_t *clone = picture_pool_ClonePicture(pool, i - 1);
    if (clone != NULL) {
        assert(clone->p_next == NULL);
        atomic_fetch_add(&pool->refs, 1);
    }
    return clone;
}
Ejemplo n.º 6
0
inline void king_move(struct position *pos, struct move_array *m, unsigned char king)
{
    unsigned char from = ffsll(pos->pieces[king]) - 1;
    uint64_t moves = kingmoves[from] & ~pos->sumpieces[king & COLOR];

    unsigned char index;
    for (int i = 0; i < 8; i++) {
        if ((index = ffsll(moves)) != 0) {
            index--;
            add_move(m, from, index, king, find_piece(pos, index));
            moves &= notlinboard[index];
        } else {
            break;
        }
    }
}
Ejemplo n.º 7
0
picture_t *picture_pool_Get(picture_pool_t *pool)
{
    vlc_mutex_lock(&pool->lock);
    assert(pool->refs > 0);

    for (unsigned i = ffsll(pool->available); i; i = fnsll(pool->available, i))
    {
        pool->available &= ~(1ULL << (i - 1));
        vlc_mutex_unlock(&pool->lock);

        picture_t *picture = pool->picture[i - 1];

        if (pool->pic_lock != NULL && pool->pic_lock(picture) != 0) {
            vlc_mutex_lock(&pool->lock);
            pool->available |= 1ULL << (i - 1);
            continue;
        }

        picture_t *clone = picture_pool_ClonePicture(pool, i - 1);
        if (clone != NULL) {
            assert(clone->p_next == NULL);
            atomic_fetch_add(&pool->refs, 1);
        }
        return clone;
    }

    vlc_mutex_unlock(&pool->lock);
    return NULL;
}
Ejemplo n.º 8
0
bool
_mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao)
{
   /* Walk those enabled arrays that have the default vbo attached */
   GLbitfield64 mask = vao->_Enabled & ~vao->VertexAttribBufferMask;

   while (mask) {
      /* Do not use u_bit_scan64 as we can walk multiple
       * attrib arrays at once
       */
      const int i = ffsll(mask) - 1;
      const struct gl_vertex_attrib_array *attrib_array =
         &vao->VertexAttrib[i];
      const struct gl_vertex_buffer_binding *buffer_binding =
         &vao->VertexBinding[attrib_array->VertexBinding];

      /* Only enabled arrays shall appear in the _Enabled bitmask */
      assert(attrib_array->Enabled);
      /* We have already masked out vao->VertexAttribBufferMask  */
      assert(!_mesa_is_bufferobj(buffer_binding->BufferObj));

      /* Bail out once we find the first non vbo with a non zero stride */
      if (buffer_binding->Stride != 0)
         return false;

      /* Note that we cannot use the xor variant since the _BoundArray mask
       * may contain array attributes that are bound but not enabled.
       */
      mask &= ~buffer_binding->_BoundArrays;
   }

   return true;
}
Ejemplo n.º 9
0
inline uint64_t is_check(struct position *pos)
{
    unsigned char king_pos = ffsll(pos->pieces[wking_n | pos->tomove]) - 1;
    uint64_t bishop_checks = bishopmoves(pos, king_pos) & (pos->pieces[wbishops_n | pos->towait] | pos->pieces[wqueens_n | pos->towait]);
    uint64_t knight_checks = knightmoves[king_pos] & (pos->pieces[wknights_n | pos->towait]);
    uint64_t rook_checks = rookmoves(pos, king_pos) & (pos->pieces[wrooks_n | pos->towait] | pos->pieces[wqueens_n | pos->towait]);
    uint64_t pawn_checks = pawn_attacks[pos->towait][king_pos] & (pos->pieces[wpawns_n | pos->towait]);
    return (bishop_checks | knight_checks | rook_checks | pawn_checks);
}
Ejemplo n.º 10
0
int FindSetBit(BitBoard b)
{
#if HAVE_FFSLL
    return 64 - ffsll(b);
#else
    // return ffsl(b) - 1;
    union {
        BitBoard b;
        unsigned short sh[4];
        unsigned char  ch[8];
    } d;

    d.b = b;

#ifdef WORDS_BIGENDIAN
    if(d.sh[1]) return FirstBit16[d.sh[1]] + 16;
    if(d.sh[2]) return FirstBit16[d.sh[2]] + 32;
    if(d.sh[0]) return FirstBit16[d.sh[0]];
    return FirstBit16[d.sh[3]] + 48;
#else

#if USE_8BIT
    if(d.sh[1]) {
        if(d.ch[3]) return FirstBit8[d.ch[3]] + 32;
        else        return FirstBit8[d.ch[2]] + 40;
    }
    if(d.sh[2]) {
        if(d.ch[4]) return FirstBit8[d.ch[4]] + 24;
        else        return FirstBit8[d.ch[5]] + 16;
    }
    if(d.sh[0]) {
        if(d.ch[1]) return FirstBit8[d.ch[1]] + 48;
        else        return FirstBit8[d.ch[0]] + 56;
    }
    if(d.ch[6]) return FirstBit8[d.ch[6]] + 8;
    else        return FirstBit8[d.ch[7]];
#endif /* USE_8BIT */

#if USE_16BIT
    if(d.sh[1]) {
        return FirstBit16[d.sh[1]] + 32;
    }
    if(d.sh[2]) {
        return FirstBit16[d.sh[2]] + 16;
    }
    if(d.sh[0]) {
        return FirstBit16[d.sh[0]] + 48;
    }
    return FirstBit16[d.sh[3]];
#endif /* USE_16BIT */

#endif
#endif
}
Ejemplo n.º 11
0
static void test_ffs(void *p)
{
	/* ffs */
	int_check(ffs(0), 0);
	int_check(ffs(1), 1);
	int_check(ffs(3), 1);
	int_check(ffs((int)-1), 1);
	int_check(ffs(ror32(1,1)), 32);

	/* flsl */
	int_check(ffsl(0), 0);
	int_check(ffsl(1), 1);
	int_check(ffsl(3), 1);
	int_check(ffsl((long)-1), 1);
	if (sizeof(long) == 4)
		int_check(ffsl(ror32(1,1)), 32);
	else
		int_check(ffsl(ror64(1,1)), 64);

	/* ffsll */
	int_check(ffsll(0), 0);
	int_check(ffsll(1), 1);
	int_check(ffsll(3), 1);
	int_check(ffsll((long long)-1), 1);
	ull_check((1ULL << 63), ror64(1,1));
	int_check(ffsll(1ULL << 63), 64);
	int_check(ffsll(ror64(1,1)), 64);
end:;
}
Ejemplo n.º 12
0
static void scan_table(fd_set *t, table f)
{
    u64 b = (void *)t;
    unsigned int i;
    for (i = 0 ; i <(FDSIZE/64); i++) {
        descriptor d;
        while ((d = ffsll(b[i]))) {
            d = (d-1) + (64*i);
            FD_CLR(d, t);
            thunk handler =(thunk)table_find(f, (void *)(unsigned long)d);
            table_set(f, (void *)(unsigned long)d, 0);
            apply(handler);
        }
    }
}
Ejemplo n.º 13
0
inline void knight_move(struct position *pos, struct move_array *m, unsigned char knights)
{
    uint64_t knight_pos = pos->pieces[knights];
    uint64_t moves;
    unsigned char index_from, index_to;
    for (int i = 0; i < 10; i++) {  // can have up to 10 knights of same color on the board, (for loop for compiler loop unrolling)
        if ((index_from = ffsll(knight_pos)) != 0) {
            index_from--;
            moves = knightmoves[index_from] & ~pos->sumpieces[knights & COLOR];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_move(m, index_from, index_to, knights, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }
            knight_pos &= notlinboard[index_from];
        } else {
            break;
        }
    }
}
Ejemplo n.º 14
0
/**
 * Helper for _mesa_update_array_object_max_element().
 * \return  min(arrayObj->VertexAttrib[*]._MaxElement).
 */
static GLuint
compute_max_element(struct gl_array_object *arrayObj, GLbitfield64 enabled)
{
   GLuint min = ~((GLuint)0);
   
   while (enabled) {
      struct gl_client_array *client_array;
      GLint attrib = ffsll(enabled) - 1;
      enabled ^= BITFIELD64_BIT(attrib);
      
      client_array = &arrayObj->VertexAttrib[attrib];
      assert(client_array->Enabled);
      _mesa_update_array_max_element(client_array);
      min = MIN2(min, client_array->_MaxElement);
   }
   
   return min;
}
Ejemplo n.º 15
0
StreamIDPtr StreamManager::get()
{

    if ( m_numStreams < 2 )
    {
        return m_default;
    }

    // we've got too many streams so use the locking version
    if ( m_numStreams > sizeof(m_streams) * 8 )
    {
        Alembic::Util::scoped_lock l( m_lock );

        // we've used up more than we have, just return the default
        if ( m_curStream >= m_numStreams )
        {
            return m_default;
        }

        return StreamIDPtr( new StreamID( this,
            m_streamIDs[ m_curStream ++ ] ) );
    }

    // CAS (compare and swap) non locking version
    Alembic::Util::int64_t val = 0;
    Alembic::Util::int64_t oldVal = 0;
    Alembic::Util::int64_t newVal = 0;

    do
    {
        oldVal = m_streams;
        val = ffsll( oldVal );

        if ( val == 0 )
        {
            return m_default;
        }

        newVal = oldVal & ~( 1 << (val - 1) );
    }
    while ( !__sync_bool_compare_and_swap( &m_streams, oldVal, newVal ) );

    return StreamIDPtr( new StreamID( this, ( std::size_t ) val - 1 ) );
}
Ejemplo n.º 16
0
static void vhost_dev_sync_region(struct vhost_dev *dev,
                                  MemoryRegionSection *section,
                                  uint64_t mfirst, uint64_t mlast,
                                  uint64_t rfirst, uint64_t rlast)
{
    uint64_t start = MAX(mfirst, rfirst);
    uint64_t end = MIN(mlast, rlast);
    vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
    vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
    uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;

    if (end < start) {
        return;
    }
    assert(end / VHOST_LOG_CHUNK < dev->log_size);
    assert(start / VHOST_LOG_CHUNK < dev->log_size);

    for (;from < to; ++from) {
        vhost_log_chunk_t log;
        int bit;
        /* We first check with non-atomic: much cheaper,
         * and we expect non-dirty to be the common case. */
        if (!*from) {
            addr += VHOST_LOG_CHUNK;
            continue;
        }
        /* Data must be read atomically. We don't really
         * need the barrier semantics of __sync
         * builtins, but it's easier to use them than
         * roll our own. */
        log = __sync_fetch_and_and(from, 0);
        while ((bit = sizeof(log) > sizeof(int) ?
                ffsll(log) : ffs(log))) {
            ram_addr_t ram_addr;
            bit -= 1;
            ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE;
            memory_region_set_dirty(section->mr, ram_addr, VHOST_LOG_PAGE);
            log &= ~(0x1ull << bit);
        }
        addr += VHOST_LOG_CHUNK;
    }
}
Ejemplo n.º 17
0
static Int
msb(Int inp USES_REGS)	/* calculate the most significant bit for an integer */
{
  /* the obvious solution: do it by using binary search */
  Int out = 0;

  if (inp < 0) {
    return Yap_ArithError(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, MkIntegerTerm(inp),
	      "msb/1 received %d", inp);
  }

#if HAVE__BUILTIN_FFSLL
      out = __builtin_ffsll(inp);
#elif HAVE_FFSLL
      out = ffsll(inp);
#else
  if (inp==0)
    return 0L;
#if SIZEOF_INT_P == 8
  if (inp & ((CELL)0xffffffffLL << 32)) {inp >>= 32; out += 32;}
Ejemplo n.º 18
0
/* This should be called with global_sem protection */
int get_new_thread_id(pthread_t *thread){

    long long int map;
    struct timeval l_tv;

    map = sleep_map_array[0];

    /* First 32 thread slots are for slurmd, last 32 ones for slurmctld */
    if(slurmd_pid[0] == getpid())
        map |= 0xFFFFFFFF00000000ULL;
    else
        map |= 0xFFFFFFFFULL;

    map = ~map;

#if 0
    real_gettimeofday(&l_tv, NULL);
    sim_lib_printf(0, "[%ld-%ld] Using map: %016llx\n", l_tv.tv_sec, l_tv.tv_usec, map);
    sim_lib_printf(0, "get_new_thread_id: [%16llx][%016llx], threads counter= %d\n", sleep_map_array[0], thread_exit_array[0], current_threads[0]);
#endif

    /* Getting first slot available */
    map = ffsll(map);

    if(map == 0){
        /*printf("WARNING!: space no available for a new threads. Current threads: %u\n", current_threads[0]);*/
        return -1;
    }

    /* Bit 0 is bit 1(ffsll returns ordinal value) */
    map = map - 1;

    sleep_map_array[0] |= (1ULL << map);
    current_threads[0]++;
    if(current_threads[0] == 62){   /* 62 because we have slots for main slurmctl and slurmd threads */
        printf("SIM ERROR: %d threads is not possible\n", current_threads[0]);
        return -1;
    }

    return map;
}
Ejemplo n.º 19
0
static int freemap_alloc(freemap_t *freemap)
{
    bucket_t  mask;
    bucket_t *bucket;
    int       nbucket;
    bucket_t *buckets;
    int       bucket_idx;
    int       bit_idx;
    int       index;
    size_t    size;

    for (bucket_idx = 0;   bucket_idx < freemap->nbucket;   bucket_idx++) {
        bucket = freemap->buckets + bucket_idx;

        if (*bucket && (bit_idx = ffsll(*bucket) - 1) >= 0) {
            index = bucket_idx * bits_per_bucket + bit_idx;
            mask  = ~(((bucket_t)1) << bit_idx);
            *bucket &= mask;
            return index;
        }
    }

    index   = bucket_idx * bits_per_bucket;
    nbucket = bucket_idx + 1;
    size    = sizeof(bucket_t) * nbucket;
    buckets = realloc(freemap->buckets, size);

    if (!buckets) {
        errno = ENOMEM;
        return HANDLE_INDEX_INVALID;
    }

    buckets[bucket_idx] = ~((bucket_t)1);

    freemap->nbucket = nbucket;
    freemap->buckets = buckets;

    return index;
}
/**
 * Updates the derived gl_client_arrays when a gl_vertex_attrib_array
 * or a gl_vertex_buffer_binding has changed.
 */
void
_mesa_update_vao_client_arrays(struct gl_context *ctx,
                               struct gl_vertex_array_object *vao)
{
   GLbitfield64 arrays = vao->NewArrays;

   while (arrays) {
      struct gl_client_array *client_array;
      struct gl_vertex_attrib_array *attrib_array;
      struct gl_vertex_buffer_binding *buffer_binding;

      GLint attrib = ffsll(arrays) - 1;
      arrays ^= BITFIELD64_BIT(attrib);

      attrib_array = &vao->VertexAttrib[attrib];
      buffer_binding = &vao->VertexBinding[attrib_array->VertexBinding];
      client_array = &vao->_VertexAttrib[attrib];

      _mesa_update_client_array(ctx, client_array, attrib_array,
                                buffer_binding);
   }
}
Ejemplo n.º 21
0
static void
ntb_transport_doorbell_callback(void *data, uint32_t vector)
{
	struct ntb_transport_ctx *nt = data;
	struct ntb_transport_qp *qp;
	struct _qpset db_bits;
	uint64_t vec_mask;
	unsigned qp_num;

	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
	BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);

	vec_mask = ntb_db_vector_mask(nt->ntb, vector);
	while (vec_mask != 0) {
		qp_num = ffsll(vec_mask) - 1;

		if (test_bit(qp_num, &db_bits)) {
			qp = &nt->qp_vec[qp_num];
			taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
		}

		vec_mask &= ~(1ull << qp_num);
	}
}
Ejemplo n.º 22
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_VS_PROG_DATA */
   const struct brw_vs_prog_data *vs_prog_data =
      brw_vs_prog_data(brw->vs.base.prog_data);
   GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   unsigned i;
   int delta, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint index = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[index];

      vs_inputs &= ~BITFIELD64_BIT(index);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   /* The range of data in a given buffer represented as [min, max) */
   struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
   uint32_t buffer_range_start[VERT_ATTRIB_MAX];
   uint32_t buffer_range_end[VERT_ATTRIB_MAX];

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);

         const uint32_t offset = (uintptr_t)glarray->Ptr;

         /* Start with the worst case */
         uint32_t start = 0;
         uint32_t range = intel_buffer->Base.Size;
         if (glarray->InstanceDivisor) {
            if (brw->num_instances) {
               start = offset + glarray->StrideB * brw->baseinstance;
               range = (glarray->StrideB * ((brw->num_instances - 1) /
                                            glarray->InstanceDivisor) +
                        glarray->_ElementSize);
            }
         } else {
            if (brw->vb.index_bounds_valid) {
               start = offset + min_index * glarray->StrideB;
               range = (glarray->StrideB * (max_index - min_index) +
                        glarray->_ElementSize);
            }
         }

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 unsigned k;
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;

               buffer_range_start[input->buffer] =
                  MIN2(buffer_range_start[input->buffer], start);
               buffer_range_end[input->buffer] =
                  MAX2(buffer_range_end[input->buffer], start + range);
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = offset;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;
            buffer->size = glarray->BufferObj->Size - offset;

            enabled_buffer[j] = intel_buffer;
            buffer_range_start[j] = start;
            buffer_range_end[j] = start + range;

	    input->buffer = j++;
	    input->offset = 0;
	 }
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* Now that we've set up all of the buffers, we walk through and reference
    * each of them.  We do this late so that we get the right size in each
    * buffer and don't reference too little data.
    */
   for (i = 0; i < j; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
      if (buffer->bo)
         continue;

      const uint32_t start = buffer_range_start[i];
      const uint32_t range = buffer_range_end[i] - buffer_range_start[i];

      buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
      drm_intel_bo_reference(buffer->bo);
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;
         buffer->size += delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->size += delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}
Ejemplo n.º 23
0
static int
pci_vtblk_init(struct pci_devinst *pi, char *opts)
{
	char bident[sizeof("XX:X:X")];
	struct blockif_ctxt *bctxt;
	MD5_CTX mdctx;
	u_char digest[16];
	struct pci_vtblk_softc *sc;
	off_t size;
	int i, sectsz, sts, sto;

	if (opts == NULL) {
		printf("virtio-block: backing device required\n");
		return (1);
	}

	/*
	 * The supplied backing file has to exist
	 */
	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
	bctxt = blockif_open(opts, bident);
	if (bctxt == NULL) {
		perror("Could not open backing file");
		return (1);
	}

	size = blockif_size(bctxt);
	sectsz = blockif_sectsz(bctxt);
	blockif_psectsz(bctxt, &sts, &sto);

	sc = calloc(1, sizeof(struct pci_vtblk_softc));
	sc->bc = bctxt;
	for (i = 0; i < VTBLK_RINGSZ; i++) {
		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
		io->io_req.br_callback = pci_vtblk_done;
		io->io_req.br_param = io;
		io->io_sc = sc;
		io->io_idx = (uint16_t)i;
	}

	pthread_mutex_init(&sc->vsc_mtx, NULL);

	/* init virtio softc and virtqueues */
	vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;

	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */

	/*
	 * Create an identifier for the backing file. Use parts of the
	 * md5 sum of the filename
	 */
	MD5Init(&mdctx);
	MD5Update(&mdctx, opts, (unsigned)strlen(opts));
	MD5Final(digest, &mdctx);
	snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);

	/* setup virtio block config space */
	sc->vbsc_cfg.vbc_capacity =
		(uint64_t)(size / DEV_BSIZE); /* 512-byte units */
	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
	sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX;
	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
	sc->vbsc_cfg.vbc_geometry.heads = 0;
	sc->vbsc_cfg.vbc_geometry.sectors = 0;
	sc->vbsc_cfg.vbc_blk_size = (uint32_t)sectsz;
	sc->vbsc_cfg.vbc_topology.physical_block_exp =
	    (uint8_t)((sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0);
	sc->vbsc_cfg.vbc_topology.alignment_offset =
	    (uint8_t)((sto != 0) ? ((sts - sto) / sectsz) : 0);
	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
	sc->vbsc_cfg.vbc_writeback = 0;

	/*
	 * Should we move some of this into virtio.c?  Could
	 * have the device, class, and subdev_0 as fields in
	 * the virtio constants structure.
	 */
	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);

	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
		blockif_close(sc->bc);
		free(sc);
		return (1);
	}
	vi_set_io_bar(&sc->vbsc_vs, 0);
	return (0);
}
Ejemplo n.º 24
0
U64 QMagicHash::MagicBishopMoves(const U64& occ, const U64& loc){return MagicBishopMoves(occ,ffsll(loc));}
Ejemplo n.º 25
0
/** Find next (bit) set */
static int fnsll(unsigned long long x, unsigned i)
{
    if (i >= CHAR_BIT * sizeof (x))
        return 0;
    return ffsll(x & ~((1ULL << i) - 1));
}
Ejemplo n.º 26
0
void
brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0;
   unsigned int min_index = brw->vb.min_index + brw->basevertex;
   unsigned int max_index = brw->vb.max_index + brw->basevertex;
   int delta, i, j;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* _NEW_POLYGON
    *
    * On gen6+, edge flags don't end up in the VUE (either in or out of the
    * VS).  Instead, they're uploaded as the last vertex element, and the data
    * is passed sideband through the fixed function units.  So, we need to
    * prepare the vertex buffer for it, but it's not present in inputs_read.
    */
   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
                           ctx->Polygon.BackMode != GL_FILL)) {
      vs_inputs |= VERT_BIT_EDGEFLAG;
   }

   if (0)
      fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      return;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 /* If we have a VB set to be uploaded for this buffer object
	  * already, reuse that VB state so that we emit fewer
	  * relocations.
	  */
	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
	    buffer->offset = (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

            uint32_t offset, size;
            if (glarray->InstanceDivisor) {
               offset = buffer->offset;
               size = (buffer->stride * ((brw->num_instances /
                                          glarray->InstanceDivisor) - 1) +
                       glarray->_ElementSize);
            } else {
               if (min_index == -1) {
                  offset = 0;
                  size = intel_buffer->Base.Size;
               } else {
                  offset = buffer->offset + min_index * buffer->stride;
                  size = (buffer->stride * (max_index - min_index) +
                          glarray->_ElementSize);
               }
            }
            buffer->bo = intel_bufferobj_buffer(brw, intel_buffer,
                                                offset, size);
            drm_intel_bo_reference(buffer->bo);

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
                  glarray->Ptr < ptr ||
                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
	 {
            /* If our stride is different from the first attribute's stride,
             * or if the first attribute's stride didn't cover our element,
             * disable the interleaved upload optimization.  The second case
             * can most commonly occur in cases where there is a single vertex
             * and, for example, the data is stored on the application's
             * stack.
             *
             * NOTE: This will also disable the optimization in cases where
             * the data is in a different order than the array indices.
             * Something like:
             *
             *     float data[...];
             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
             */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->glarray->_ElementSize);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   brw->vb.nr_buffers = j;
}
Ejemplo n.º 27
0
int
fw_writefw(struct devicelist *flashdev)
{
	libscsi_hdl_t	*handle;
	libscsi_target_t *target;
	libscsi_errno_t serr;
	size_t maxxfer, nwrite;
	uint8_t align;
	int ret = FWFLASH_FAILURE;

	if ((verifier == NULL) || (verifier->imgsize == 0) ||
	    (verifier->fwimage == NULL)) {
		/* should _NOT_ happen */
		logmsg(MSG_ERROR,
		    gettext("%s: Firmware image has not been verified\n"),
		    flashdev->drvname);
		return (FWFLASH_FAILURE);
	}

	if ((handle = libscsi_init(LIBSCSI_VERSION, &serr)) == NULL) {
		logmsg(MSG_ERROR, gettext("%s: failed to initialize libscsi\n"),
		    flashdev->drvname);
		return (FWFLASH_FAILURE);
	}

	if ((target = libscsi_open(handle, NULL, flashdev->access_devname)) ==
	    NULL) {
		logmsg(MSG_ERROR,
		    gettext("%s: unable to open device %s\n"),
		    flashdev->drvname, flashdev->access_devname);
		libscsi_fini(handle);
		return (FWFLASH_FAILURE);
	}

	if (libscsi_max_transfer(target, &maxxfer) != 0) {
		logmsg(MSG_ERROR, gettext("%s: failed to determine device "
		    "maximum transfer size: %s\n"), flashdev->drvname,
		    libscsi_errmsg(handle));
		goto err;
	}

	if (sdfw_read_descriptor(flashdev, handle, target, &align) !=
	    FWFLASH_SUCCESS) {
		goto err;
	}

	/*
	 * If the maximum transfer size is less than the maximum image size then
	 * we have to do some additional work. We need to read the descriptor
	 * via a READ BUFFER command and make sure that we support the required
	 * offset alignment. Note that an alignment of 0xff indicates that the
	 * device does not support partial writes and must receive the firmware
	 * in a single WRITE BUFFER.  Otherwise a value in align represents a
	 * required offset alignment of 2^off. From there, we make sure that
	 * this works for our partial write size and that our partial write size
	 * fits in the maximum transfer size.
	 */
	if (maxxfer < verifier->imgsize) {
		logmsg(MSG_INFO, "%s: Maximum transfer is %u, required "
		    "alignment is 2^%d\n", flashdev->drvname, maxxfer, align);
		if (FW_SD_PARTIAL_WRITE_SIZE > maxxfer) {
			logmsg(MSG_ERROR, gettext("%s: cannot write firmware "
			    "image: HBA enforces a maximum transfer size of "
			    "%u bytes, but the default partial transfer size "
			    "is %u bytes\n"), flashdev->drvname, maxxfer,
			    FW_SD_PARTIAL_WRITE_SIZE);
			goto err;
		}
		maxxfer = FW_SD_PARTIAL_WRITE_SIZE;

		if (ffsll(maxxfer) < align || align == 0xff) {
			logmsg(MSG_ERROR, gettext("%s: cannot write firmware "
			    "image: device requires partial writes aligned "
			    "to an unsupported value\n"), flashdev->drvname);
			goto err;
		}

		logmsg(MSG_INFO, "%s: final transfer block size is %u\n",
		    flashdev->drvname, maxxfer);
	}

	logmsg(MSG_INFO, "%s: Writing out %u bytes to %s\n", flashdev->drvname,
	    verifier->imgsize, flashdev->access_devname);
	nwrite = 0;
	for (;;) {
		uintptr_t buf;
		size_t towrite = MIN(maxxfer, verifier->imgsize - nwrite);

		if (towrite == 0)
			break;

		buf = (uintptr_t)verifier->fwimage;
		buf += nwrite;

		if (sdfw_write(flashdev, handle, target, towrite, nwrite,
		    (void *)buf) != FWFLASH_SUCCESS) {
			logmsg(MSG_ERROR, gettext("%s: failed to write to %s "
			    "successfully: %s\n"), flashdev->drvname,
			    flashdev->access_devname, libscsi_errmsg(handle));
			goto err;
		}

		nwrite += towrite;
	}

	logmsg(MSG_ERROR, gettext("Note: For flash based disks "
	    "(SSD, etc). You may need power off the system to wait a "
	    "few minutes for supercap to fully discharge, then power "
	    "on the system again to activate the new firmware\n"));
	ret = FWFLASH_SUCCESS;

err:
	if (target != NULL)
		libscsi_close(handle, target);
	if (handle != NULL)
		libscsi_fini(handle);

	return (ret);
}
Ejemplo n.º 28
0
static void brw_prepare_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   /* CACHE_NEW_VS_PROG */
   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
   const unsigned char *ptr = NULL;
   GLuint interleaved = 0, total_size = 0;
   unsigned int min_index = brw->vb.min_index;
   unsigned int max_index = brw->vb.max_index;
   int delta, i, j;
   GLboolean can_merge_uploads = GL_TRUE;

   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
   GLuint nr_uploads = 0;

   /* First build an array of pointers to ve's in vb.inputs_read
    */
   if (0)
      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);

   /* Accumulate the list of enabled arrays. */
   brw->vb.nr_enabled = 0;
   while (vs_inputs) {
      GLuint i = ffsll(vs_inputs) - 1;
      struct brw_vertex_element *input = &brw->vb.inputs[i];

      vs_inputs &= ~BITFIELD64_BIT(i);
      if (input->glarray->Size && get_size(input->glarray->Type))
         brw->vb.enabled[brw->vb.nr_enabled++] = input;
   }

   if (brw->vb.nr_enabled == 0)
      return;

   if (brw->vb.nr_buffers)
      goto prepare;

   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct gl_client_array *glarray = input->glarray;
      int type_size = get_size(glarray->Type);

      input->element_size = type_size * glarray->Size;

      if (_mesa_is_bufferobj(glarray->BufferObj)) {
	 struct intel_buffer_object *intel_buffer =
	    intel_buffer_object(glarray->BufferObj);
	 int k;

	 for (k = 0; k < i; k++) {
	    const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
	    if (glarray->BufferObj == other->BufferObj &&
		glarray->StrideB == other->StrideB &&
		glarray->InstanceDivisor == other->InstanceDivisor &&
		(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
	    {
	       input->buffer = brw->vb.enabled[k]->buffer;
	       input->offset = glarray->Ptr - other->Ptr;
	       break;
	    }
	 }
	 if (k == i) {
	    struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];

	    /* Named buffer object: Just reference its contents directly. */
            buffer->bo = intel_bufferobj_source(intel,
                                                intel_buffer, type_size,
						&buffer->offset);
	    drm_intel_bo_reference(buffer->bo);
	    buffer->offset += (uintptr_t)glarray->Ptr;
	    buffer->stride = glarray->StrideB;
	    buffer->step_rate = glarray->InstanceDivisor;

	    input->buffer = j++;
	    input->offset = 0;
	 }

	 /* This is a common place to reach if the user mistakenly supplies
	  * a pointer in place of a VBO offset.  If we just let it go through,
	  * we may end up dereferencing a pointer beyond the bounds of the
	  * GTT.  We would hope that the VBO's max_index would save us, but
	  * Mesa appears to hand us min/max values not clipped to the
	  * array object's _MaxElement, and _MaxElement frequently appears
	  * to be wrong anyway.
	  *
	  * The VBO spec allows application termination in this case, and it's
	  * probably a service to the poor programmer to do so rather than
	  * trying to just not render.
	  */
	 assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
      } else {
	 /* Queue the buffer object up to be uploaded in the next pass,
	  * when we've decided if we're doing interleaved or not.
	  */
	 if (nr_uploads == 0) {
	    /* Position array not properly enabled:
	     */
	    if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) {
               intel->Fallback = true; /* boolean, not bitfield */
               return;
            }

	    interleaved = glarray->StrideB;
	    ptr = glarray->Ptr;
	 }
	 else if (interleaved != glarray->StrideB ||
		  (uintptr_t)(glarray->Ptr - ptr) > interleaved)
	 {
	    interleaved = 0;
	 }
	 else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1))
	 {
	    /* enforce natural alignment (for doubles) */
	    interleaved = 0;
	 }

	 upload[nr_uploads++] = input;

	 total_size = ALIGN(total_size, type_size);
	 total_size += input->element_size;

         if (glarray->InstanceDivisor != 0) {
            can_merge_uploads = GL_FALSE;
         }
      }
   }

   /* If we need to upload all the arrays, then we can trim those arrays to
    * only the used elements [min_index, max_index] so long as we adjust all
    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
    */
   brw->vb.start_vertex_bias = 0;
   delta = min_index;
   if (nr_uploads == brw->vb.nr_enabled) {
      brw->vb.start_vertex_bias = -delta;
      delta = 0;
   }
   if (delta && !brw->intel.intelScreen->relaxed_relocations)
      min_index = delta = 0;

   /* Handle any arrays to be uploaded. */
   if (nr_uploads > 1) {
      if (interleaved && interleaved <= 2*total_size) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
	 /* All uploads are interleaved, so upload the arrays together as
	  * interleaved.  First, upload the contents and set up upload[0].
	  */
	 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
				 buffer, interleaved);
	 buffer->offset -= delta * interleaved;

	 for (i = 0; i < nr_uploads; i++) {
	    /* Then, just point upload[i] at upload[0]'s buffer. */
	    upload[i]->offset =
	       ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
	    upload[i]->buffer = j;
	 }
	 j++;

	 nr_uploads = 0;
      }
      else if ((total_size < 2048) && can_merge_uploads) {
	 /* Upload non-interleaved arrays into a single interleaved array */
	 struct brw_vertex_buffer *buffer;
	 int count = MAX2(max_index - min_index + 1, 1);
	 int offset;
	 char *map;

	 map = intel_upload_map(&brw->intel, total_size * count, total_size);
	 for (i = offset = 0; i < nr_uploads; i++) {
	    const unsigned char *src = upload[i]->glarray->Ptr;
	    int size = upload[i]->element_size;
	    int stride = upload[i]->glarray->StrideB;
	    char *dst;
	    int n;

	    offset = ALIGN(offset, get_size(upload[i]->glarray->Type));
	    dst = map + offset;
	    src += min_index * stride;

	    for (n = 0; n < count; n++) {
	       memcpy(dst, src, size);
	       src += stride;
	       dst += total_size;
	    }

	    upload[i]->offset = offset;
	    upload[i]->buffer = j;

	    offset += size;
	 }
	 assert(offset == total_size);
	 buffer = &brw->vb.buffers[j++];
	 intel_upload_unmap(&brw->intel, map, offset * count, offset,
			    &buffer->bo, &buffer->offset);
	 buffer->stride = offset;
	 buffer->step_rate = 0;
	 buffer->offset -= delta * offset;

	 nr_uploads = 0;
      }
   }
   /* Upload non-interleaved arrays */
   for (i = 0; i < nr_uploads; i++) {
      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
      if (upload[i]->glarray->InstanceDivisor == 0) {
         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
                                 buffer, upload[i]->element_size);
      } else {
         /* This is an instanced attribute, since its InstanceDivisor
          * is not zero. Therefore, its data will be stepped after the
          * instanced draw has been run InstanceDivisor times.
          */
         uint32_t instanced_attr_max_index =
            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
                                 buffer, upload[i]->element_size);
      }
      buffer->offset -= delta * buffer->stride;
      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
      upload[i]->buffer = j++;
      upload[i]->offset = 0;
   }

   /* can we simply extend the current vb? */
   if (j == brw->vb.nr_current_buffers) {
      int delta = 0;
      for (i = 0; i < j; i++) {
	 int d;

	 if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
	     brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
	     brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
	    break;

	 d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
	 if (d < 0)
	    break;
	 if (i == 0)
	    delta = d / brw->vb.current_buffers[i].stride;
	 if (delta * brw->vb.current_buffers[i].stride != d)
	    break;
      }

      if (i == j) {
	 brw->vb.start_vertex_bias += delta;
	 while (--j >= 0)
	    drm_intel_bo_unreference(brw->vb.buffers[j].bo);
	 j = 0;
      }
   }

   brw->vb.nr_buffers = j;

prepare:
   brw_prepare_query_begin(brw);
}
Ejemplo n.º 29
0
static nir_shader *
create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
                       const nir_shader_compiler_options *options,
                       const struct brw_tcs_prog_key *key)
{
   nir_builder b;
   nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL,
                                  options);
   nir_shader *nir = b.shader;
   nir_variable *var;
   nir_intrinsic_instr *load;
   nir_intrinsic_instr *store;
   nir_ssa_def *zero = nir_imm_int(&b, 0);
   nir_ssa_def *invoc_id =
      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);

   nir->info->inputs_read = key->outputs_written;
   nir->info->outputs_written = key->outputs_written;
   nir->info->tcs.vertices_out = key->input_vertices;
   nir->info->name = ralloc_strdup(nir, "passthrough");
   nir->num_uniforms = 8 * sizeof(uint32_t);

   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0");
   var->data.location = 0;
   var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1");
   var->data.location = 1;

   /* Write the patch URB header. */
   for (int i = 0; i <= 1; i++) {
      load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
      load->num_components = 4;
      load->src[0] = nir_src_for_ssa(zero);
      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
      nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t));
      nir_builder_instr_insert(&b, &load->instr);

      store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output);
      store->num_components = 4;
      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
      store->src[1] = nir_src_for_ssa(zero);
      nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i);
      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
      nir_builder_instr_insert(&b, &store->instr);
   }

   /* Copy inputs to outputs. */
   uint64_t varyings = key->outputs_written;

   while (varyings != 0) {
      const int varying = ffsll(varyings) - 1;

      load = nir_intrinsic_instr_create(nir,
                                        nir_intrinsic_load_per_vertex_input);
      load->num_components = 4;
      load->src[0] = nir_src_for_ssa(invoc_id);
      load->src[1] = nir_src_for_ssa(zero);
      nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
      nir_intrinsic_set_base(load, varying);
      nir_builder_instr_insert(&b, &load->instr);

      store = nir_intrinsic_instr_create(nir,
                                         nir_intrinsic_store_per_vertex_output);
      store->num_components = 4;
      store->src[0] = nir_src_for_ssa(&load->dest.ssa);
      store->src[1] = nir_src_for_ssa(invoc_id);
      store->src[2] = nir_src_for_ssa(zero);
      nir_intrinsic_set_base(store, varying);
      nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
      nir_builder_instr_insert(&b, &store->instr);

      varyings &= ~BITFIELD64_BIT(varying);
   }

   nir_validate_shader(nir);

   nir = brw_preprocess_nir(compiler, nir);

   return nir;
}
Ejemplo n.º 30
0
inline void pawn_move(struct position *pos, struct move_array *m, unsigned char pawns)
{
    uint64_t pawn_pos = pos->pieces[pawns];
    uint64_t moves;
    unsigned char index_to;
    switch (pawns & COLOR) {
    case WHITE:
        // nonpromotion forward moves 
        moves = moveN(pawn_pos) & ~pos->allpieces & ~rank[7];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + S, index_to, pawns, nopiece_n);
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // forward 2 moves 
        pawn_pos = pos->pieces[pawns];
        moves = moveN(moveN(pawn_pos) & ~pos->allpieces) & ~pos->allpieces & rank[3];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move_forward2(m, index_to + S + S, index_to, pawns, nopiece_n);
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // nonpromotion attack west moves
        pawn_pos = pos->pieces[pawns];
        moves = moveNW(pawn_pos) & (pos->bpieces | ep_squares[1][pos->ep]) & ~file[7] & ~rank[7];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + SE, index_to, pawns, find_piece_ep(pos, index_to));
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // nonpromotion attack east moves
        pawn_pos = pos->pieces[pawns];
        moves = moveNE(pawn_pos) & (pos->bpieces | ep_squares[1][pos->ep]) & ~file[0] & ~rank[7];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + SW, index_to, pawns, find_piece_ep(pos, index_to));
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        pawn_pos = pos->pieces[pawns];
        if ((pawn_pos & rank[6]) == 0) {    // no promotion possibilities
            return;
        } else {
            // promotion forward moves
            moves = moveN(pawn_pos) & ~pos->allpieces & rank[7];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + S, index_to, pawns, nopiece_n);
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }

            // promotion attack west moves
            pawn_pos = pos->pieces[pawns];
            moves = moveNW(pawn_pos) & pos->bpieces & ~file[7] & rank[7];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + SE, index_to, pawns, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }

            // promotion attack east moves
            pawn_pos = pos->pieces[pawns];
            moves = moveNE(pawn_pos) & pos->bpieces & ~file[0] & rank[7];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + SW, index_to, pawns, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }
        }
        break;
    default: // black
        // nonpromotion forward moves 
        moves = moveS(pawn_pos) & ~pos->allpieces & ~rank[0];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + N, index_to, pawns, nopiece_n);
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // forward 2 moves 
        pawn_pos = pos->pieces[pawns];
        moves = moveS(moveS(pawn_pos) & ~pos->allpieces) & ~pos->allpieces & rank[4];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move_forward2(m, index_to + N + N, index_to, pawns, nopiece_n);
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // nonpromotion attack west moves
        pawn_pos = pos->pieces[pawns];
        moves = moveSW(pawn_pos) & (pos->wpieces | ep_squares[0][pos->ep]) & ~file[7] & ~rank[0];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + NE, index_to, pawns, find_piece_ep(pos, index_to));
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        // nonpromotion attack east moves
        pawn_pos = pos->pieces[pawns];
        moves = moveSE(pawn_pos) & (pos->wpieces | ep_squares[0][pos->ep]) & ~file[0] & ~rank[0];
        for (int i = 0; i < 8; i++) {
            if ((index_to = ffsll(moves)) != 0) {
                index_to--;
                add_move(m, index_to + NW, index_to, pawns, find_piece_ep(pos, index_to));
                moves &= notlinboard[index_to];
            } else {
                break;
            }
        }

        pawn_pos = pos->pieces[pawns];
        if ((pawn_pos & rank[1]) == 0) {    // no promotion possibilities
            return;
        } else {
            // promotion forward moves
            moves = moveS(pawn_pos) & ~pos->allpieces & rank[0];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + N, index_to, pawns, nopiece_n);
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }

            // promotion attack west moves
            pawn_pos = pos->pieces[pawns];
            moves = moveSW(pawn_pos) & pos->wpieces & ~file[7] & rank[0];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + NE, index_to, pawns, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }

            // promotion attack east moves
            pawn_pos = pos->pieces[pawns];
            moves = moveSE(pawn_pos) & pos->wpieces & ~file[0] & rank[0];
            for (int i = 0; i < 8; i++) {
                if ((index_to = ffsll(moves)) != 0) {
                    index_to--;
                    add_promotion_move(m, index_to + NW, index_to, pawns, find_piece(pos, index_to));
                    moves &= notlinboard[index_to];
                } else {
                    break;
                }
            }
        }
        break;
    }
}