/** * Compute the VUE map for tessellation control shader outputs and * tessellation evaluation shader inputs. */ void brw_compute_tess_vue_map(struct brw_vue_map *vue_map, GLbitfield64 vertex_slots, GLbitfield patch_slots) { /* I don't think anything actually uses this... */ vue_map->slots_valid = vertex_slots; vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER); /* Make sure that the values we store in vue_map->varying_to_slot and * vue_map->slot_to_varying won't overflow the signed chars that are used * to store them. Note that since vue_map->slot_to_varying sometimes holds * values equal to VARYING_SLOT_TESS_MAX , we need to ensure that * VARYING_SLOT_TESS_MAX is <= 127, not 128. */ STATIC_ASSERT(VARYING_SLOT_TESS_MAX <= 127); for (int i = 0; i < VARYING_SLOT_TESS_MAX ; ++i) { vue_map->varying_to_slot[i] = -1; vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_PAD; } int slot = 0; /* The first 8 DWords are reserved for the "Patch Header". * * VARYING_SLOT_TESS_LEVEL_OUTER / INNER live here, but the exact layout * depends on the domain type. They might not be in slots 0 and 1 as * described here, but pretending they're separate allows us to uniquely * identify them by distinct slot locations. */ assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_INNER, slot++); assign_vue_slot(vue_map, VARYING_SLOT_TESS_LEVEL_OUTER, slot++); /* first assign per-patch varyings */ while (patch_slots != 0) { const int varying = ffsll(patch_slots) - 1; if (vue_map->varying_to_slot[varying + VARYING_SLOT_PATCH0] == -1) { assign_vue_slot(vue_map, varying + VARYING_SLOT_PATCH0, slot++); } patch_slots &= ~BITFIELD64_BIT(varying); } /* apparently, including the patch header... */ vue_map->num_per_patch_slots = slot; /* then assign per-vertex varyings for each vertex in our patch */ while (vertex_slots != 0) { const int varying = ffsll(vertex_slots) - 1; if (vue_map->varying_to_slot[varying] == -1) { assign_vue_slot(vue_map, varying, slot++); } vertex_slots &= ~BITFIELD64_BIT(varying); } vue_map->num_per_vertex_slots = slot - vue_map->num_per_patch_slots; vue_map->num_slots = slot; }
inline void bishop_move(struct position *pos, struct move_array *m, unsigned char bishops) { uint64_t bishop_pos = pos->pieces[bishops]; uint64_t moves; unsigned char index_from, index_to; for (int i = 0; i < 10; i++) { // up to 10 bishops of same color on the board, (for loop for compiler loop unrolling) if ((index_from = ffsll(bishop_pos)) != 0) { index_from--; moves = BishopMoves[index_from][magictransform((pos->allpieces & BishopMasks[index_from]), BishopMagic[index_from], BishopMagicSize [index_from])] & ~pos-> sumpieces[bishops & COLOR]; for (int i = 0; i < 14; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_from, index_to, bishops, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } bishop_pos &= notlinboard[index_from]; } else { break; } } }
// get/wait for next item for consumer. // Does not set any bmap bits. Item must remain in buffer // until we are done with it. qitem* queue_pop(queue *q) { qitem *buf = (qitem*)(q->buf + q->mapbytes*2); atomic_llong *map = (atomic_llong*)(q->buf + q->mapbytes); int sbit; int next; // printf("POP %lld, %lld %d\n",(long long int)q->buf, (long long int)map, q->mapbytes); while (1) { long long int mval = atomic_load(&map[q->last_map_pos]); if ((sbit = ffsll(mval & (~q->visited)))) { // printf("SET BIT ! %d %d %lld\n",sbit,q->last_map_pos, mval); --sbit; q->visited |= (((long long int)1) << sbit); atomic_fetch_sub(&q->size, 1); return (qitem*)&buf[q->last_map_pos*64 + sbit]; } if (q->last_map_pos == q->map_elements-1) { next = 0; } else { next = q->last_map_pos+1; } q->last_map_pos = next; q->visited = (long long int)0; mval = atomic_load(&map[next]); if ((sbit = ffsll(mval))) { --sbit; q->visited |= (((long long int)1) << sbit); atomic_fetch_sub(&q->size, 1); return (qitem*)&buf[next*64 + sbit]; } else { q->last_map_pos = 0; } usleep(DELAY_BY); } }
int _gnix_find_first_zero_bit(gnix_bitmap_t *bitmap) { int i, pos; gnix_bitmap_value_t value; for (i = 0, pos = 0; i < GNIX_BITMAP_BLOCKS(bitmap->length); ++i, pos += GNIX_BITMAP_BUCKET_LENGTH) { /* invert the bits to check for first zero bit */ value = ~(__gnix_load_block(bitmap, i)); if (value != 0) { /* no need to check for errors because we have established there is an unset bit */ pos += ffsll(value) - 1; if (pos < bitmap->length) return pos; else return -FI_EAGAIN; } } return -FI_EAGAIN; }
picture_t *picture_pool_Wait(picture_pool_t *pool) { unsigned i; vlc_mutex_lock(&pool->lock); assert(pool->refs > 0); while (pool->available == 0) vlc_cond_wait(&pool->wait, &pool->lock); i = ffsll(pool->available); assert(i > 0); pool->available &= ~(1ULL << (i - 1)); vlc_mutex_unlock(&pool->lock); picture_t *picture = pool->picture[i - 1]; if (pool->pic_lock != NULL && pool->pic_lock(picture) != 0) { vlc_mutex_lock(&pool->lock); pool->available |= 1ULL << (i - 1); vlc_cond_signal(&pool->wait); vlc_mutex_unlock(&pool->lock); return NULL; } picture_t *clone = picture_pool_ClonePicture(pool, i - 1); if (clone != NULL) { assert(clone->p_next == NULL); atomic_fetch_add(&pool->refs, 1); } return clone; }
inline void king_move(struct position *pos, struct move_array *m, unsigned char king) { unsigned char from = ffsll(pos->pieces[king]) - 1; uint64_t moves = kingmoves[from] & ~pos->sumpieces[king & COLOR]; unsigned char index; for (int i = 0; i < 8; i++) { if ((index = ffsll(moves)) != 0) { index--; add_move(m, from, index, king, find_piece(pos, index)); moves &= notlinboard[index]; } else { break; } } }
picture_t *picture_pool_Get(picture_pool_t *pool) { vlc_mutex_lock(&pool->lock); assert(pool->refs > 0); for (unsigned i = ffsll(pool->available); i; i = fnsll(pool->available, i)) { pool->available &= ~(1ULL << (i - 1)); vlc_mutex_unlock(&pool->lock); picture_t *picture = pool->picture[i - 1]; if (pool->pic_lock != NULL && pool->pic_lock(picture) != 0) { vlc_mutex_lock(&pool->lock); pool->available |= 1ULL << (i - 1); continue; } picture_t *clone = picture_pool_ClonePicture(pool, i - 1); if (clone != NULL) { assert(clone->p_next == NULL); atomic_fetch_add(&pool->refs, 1); } return clone; } vlc_mutex_unlock(&pool->lock); return NULL; }
bool _mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao) { /* Walk those enabled arrays that have the default vbo attached */ GLbitfield64 mask = vao->_Enabled & ~vao->VertexAttribBufferMask; while (mask) { /* Do not use u_bit_scan64 as we can walk multiple * attrib arrays at once */ const int i = ffsll(mask) - 1; const struct gl_vertex_attrib_array *attrib_array = &vao->VertexAttrib[i]; const struct gl_vertex_buffer_binding *buffer_binding = &vao->VertexBinding[attrib_array->VertexBinding]; /* Only enabled arrays shall appear in the _Enabled bitmask */ assert(attrib_array->Enabled); /* We have already masked out vao->VertexAttribBufferMask */ assert(!_mesa_is_bufferobj(buffer_binding->BufferObj)); /* Bail out once we find the first non vbo with a non zero stride */ if (buffer_binding->Stride != 0) return false; /* Note that we cannot use the xor variant since the _BoundArray mask * may contain array attributes that are bound but not enabled. */ mask &= ~buffer_binding->_BoundArrays; } return true; }
inline uint64_t is_check(struct position *pos) { unsigned char king_pos = ffsll(pos->pieces[wking_n | pos->tomove]) - 1; uint64_t bishop_checks = bishopmoves(pos, king_pos) & (pos->pieces[wbishops_n | pos->towait] | pos->pieces[wqueens_n | pos->towait]); uint64_t knight_checks = knightmoves[king_pos] & (pos->pieces[wknights_n | pos->towait]); uint64_t rook_checks = rookmoves(pos, king_pos) & (pos->pieces[wrooks_n | pos->towait] | pos->pieces[wqueens_n | pos->towait]); uint64_t pawn_checks = pawn_attacks[pos->towait][king_pos] & (pos->pieces[wpawns_n | pos->towait]); return (bishop_checks | knight_checks | rook_checks | pawn_checks); }
int FindSetBit(BitBoard b) { #if HAVE_FFSLL return 64 - ffsll(b); #else // return ffsl(b) - 1; union { BitBoard b; unsigned short sh[4]; unsigned char ch[8]; } d; d.b = b; #ifdef WORDS_BIGENDIAN if(d.sh[1]) return FirstBit16[d.sh[1]] + 16; if(d.sh[2]) return FirstBit16[d.sh[2]] + 32; if(d.sh[0]) return FirstBit16[d.sh[0]]; return FirstBit16[d.sh[3]] + 48; #else #if USE_8BIT if(d.sh[1]) { if(d.ch[3]) return FirstBit8[d.ch[3]] + 32; else return FirstBit8[d.ch[2]] + 40; } if(d.sh[2]) { if(d.ch[4]) return FirstBit8[d.ch[4]] + 24; else return FirstBit8[d.ch[5]] + 16; } if(d.sh[0]) { if(d.ch[1]) return FirstBit8[d.ch[1]] + 48; else return FirstBit8[d.ch[0]] + 56; } if(d.ch[6]) return FirstBit8[d.ch[6]] + 8; else return FirstBit8[d.ch[7]]; #endif /* USE_8BIT */ #if USE_16BIT if(d.sh[1]) { return FirstBit16[d.sh[1]] + 32; } if(d.sh[2]) { return FirstBit16[d.sh[2]] + 16; } if(d.sh[0]) { return FirstBit16[d.sh[0]] + 48; } return FirstBit16[d.sh[3]]; #endif /* USE_16BIT */ #endif #endif }
static void test_ffs(void *p) { /* ffs */ int_check(ffs(0), 0); int_check(ffs(1), 1); int_check(ffs(3), 1); int_check(ffs((int)-1), 1); int_check(ffs(ror32(1,1)), 32); /* flsl */ int_check(ffsl(0), 0); int_check(ffsl(1), 1); int_check(ffsl(3), 1); int_check(ffsl((long)-1), 1); if (sizeof(long) == 4) int_check(ffsl(ror32(1,1)), 32); else int_check(ffsl(ror64(1,1)), 64); /* ffsll */ int_check(ffsll(0), 0); int_check(ffsll(1), 1); int_check(ffsll(3), 1); int_check(ffsll((long long)-1), 1); ull_check((1ULL << 63), ror64(1,1)); int_check(ffsll(1ULL << 63), 64); int_check(ffsll(ror64(1,1)), 64); end:; }
static void scan_table(fd_set *t, table f) { u64 b = (void *)t; unsigned int i; for (i = 0 ; i <(FDSIZE/64); i++) { descriptor d; while ((d = ffsll(b[i]))) { d = (d-1) + (64*i); FD_CLR(d, t); thunk handler =(thunk)table_find(f, (void *)(unsigned long)d); table_set(f, (void *)(unsigned long)d, 0); apply(handler); } } }
inline void knight_move(struct position *pos, struct move_array *m, unsigned char knights) { uint64_t knight_pos = pos->pieces[knights]; uint64_t moves; unsigned char index_from, index_to; for (int i = 0; i < 10; i++) { // can have up to 10 knights of same color on the board, (for loop for compiler loop unrolling) if ((index_from = ffsll(knight_pos)) != 0) { index_from--; moves = knightmoves[index_from] & ~pos->sumpieces[knights & COLOR]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_from, index_to, knights, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } knight_pos &= notlinboard[index_from]; } else { break; } } }
/** * Helper for _mesa_update_array_object_max_element(). * \return min(arrayObj->VertexAttrib[*]._MaxElement). */ static GLuint compute_max_element(struct gl_array_object *arrayObj, GLbitfield64 enabled) { GLuint min = ~((GLuint)0); while (enabled) { struct gl_client_array *client_array; GLint attrib = ffsll(enabled) - 1; enabled ^= BITFIELD64_BIT(attrib); client_array = &arrayObj->VertexAttrib[attrib]; assert(client_array->Enabled); _mesa_update_array_max_element(client_array); min = MIN2(min, client_array->_MaxElement); } return min; }
StreamIDPtr StreamManager::get() { if ( m_numStreams < 2 ) { return m_default; } // we've got too many streams so use the locking version if ( m_numStreams > sizeof(m_streams) * 8 ) { Alembic::Util::scoped_lock l( m_lock ); // we've used up more than we have, just return the default if ( m_curStream >= m_numStreams ) { return m_default; } return StreamIDPtr( new StreamID( this, m_streamIDs[ m_curStream ++ ] ) ); } // CAS (compare and swap) non locking version Alembic::Util::int64_t val = 0; Alembic::Util::int64_t oldVal = 0; Alembic::Util::int64_t newVal = 0; do { oldVal = m_streams; val = ffsll( oldVal ); if ( val == 0 ) { return m_default; } newVal = oldVal & ~( 1 << (val - 1) ); } while ( !__sync_bool_compare_and_swap( &m_streams, oldVal, newVal ) ); return StreamIDPtr( new StreamID( this, ( std::size_t ) val - 1 ) ); }
static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, uint64_t mfirst, uint64_t mlast, uint64_t rfirst, uint64_t rlast) { uint64_t start = MAX(mfirst, rfirst); uint64_t end = MIN(mlast, rlast); vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; if (end < start) { return; } assert(end / VHOST_LOG_CHUNK < dev->log_size); assert(start / VHOST_LOG_CHUNK < dev->log_size); for (;from < to; ++from) { vhost_log_chunk_t log; int bit; /* We first check with non-atomic: much cheaper, * and we expect non-dirty to be the common case. */ if (!*from) { addr += VHOST_LOG_CHUNK; continue; } /* Data must be read atomically. We don't really * need the barrier semantics of __sync * builtins, but it's easier to use them than * roll our own. */ log = __sync_fetch_and_and(from, 0); while ((bit = sizeof(log) > sizeof(int) ? ffsll(log) : ffs(log))) { ram_addr_t ram_addr; bit -= 1; ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE; memory_region_set_dirty(section->mr, ram_addr, VHOST_LOG_PAGE); log &= ~(0x1ull << bit); } addr += VHOST_LOG_CHUNK; } }
static Int msb(Int inp USES_REGS) /* calculate the most significant bit for an integer */ { /* the obvious solution: do it by using binary search */ Int out = 0; if (inp < 0) { return Yap_ArithError(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, MkIntegerTerm(inp), "msb/1 received %d", inp); } #if HAVE__BUILTIN_FFSLL out = __builtin_ffsll(inp); #elif HAVE_FFSLL out = ffsll(inp); #else if (inp==0) return 0L; #if SIZEOF_INT_P == 8 if (inp & ((CELL)0xffffffffLL << 32)) {inp >>= 32; out += 32;}
/* This should be called with global_sem protection */ int get_new_thread_id(pthread_t *thread){ long long int map; struct timeval l_tv; map = sleep_map_array[0]; /* First 32 thread slots are for slurmd, last 32 ones for slurmctld */ if(slurmd_pid[0] == getpid()) map |= 0xFFFFFFFF00000000ULL; else map |= 0xFFFFFFFFULL; map = ~map; #if 0 real_gettimeofday(&l_tv, NULL); sim_lib_printf(0, "[%ld-%ld] Using map: %016llx\n", l_tv.tv_sec, l_tv.tv_usec, map); sim_lib_printf(0, "get_new_thread_id: [%16llx][%016llx], threads counter= %d\n", sleep_map_array[0], thread_exit_array[0], current_threads[0]); #endif /* Getting first slot available */ map = ffsll(map); if(map == 0){ /*printf("WARNING!: space no available for a new threads. Current threads: %u\n", current_threads[0]);*/ return -1; } /* Bit 0 is bit 1(ffsll returns ordinal value) */ map = map - 1; sleep_map_array[0] |= (1ULL << map); current_threads[0]++; if(current_threads[0] == 62){ /* 62 because we have slots for main slurmctl and slurmd threads */ printf("SIM ERROR: %d threads is not possible\n", current_threads[0]); return -1; } return map; }
static int freemap_alloc(freemap_t *freemap) { bucket_t mask; bucket_t *bucket; int nbucket; bucket_t *buckets; int bucket_idx; int bit_idx; int index; size_t size; for (bucket_idx = 0; bucket_idx < freemap->nbucket; bucket_idx++) { bucket = freemap->buckets + bucket_idx; if (*bucket && (bit_idx = ffsll(*bucket) - 1) >= 0) { index = bucket_idx * bits_per_bucket + bit_idx; mask = ~(((bucket_t)1) << bit_idx); *bucket &= mask; return index; } } index = bucket_idx * bits_per_bucket; nbucket = bucket_idx + 1; size = sizeof(bucket_t) * nbucket; buckets = realloc(freemap->buckets, size); if (!buckets) { errno = ENOMEM; return HANDLE_INDEX_INVALID; } buckets[bucket_idx] = ~((bucket_t)1); freemap->nbucket = nbucket; freemap->buckets = buckets; return index; }
/** * Updates the derived gl_client_arrays when a gl_vertex_attrib_array * or a gl_vertex_buffer_binding has changed. */ void _mesa_update_vao_client_arrays(struct gl_context *ctx, struct gl_vertex_array_object *vao) { GLbitfield64 arrays = vao->NewArrays; while (arrays) { struct gl_client_array *client_array; struct gl_vertex_attrib_array *attrib_array; struct gl_vertex_buffer_binding *buffer_binding; GLint attrib = ffsll(arrays) - 1; arrays ^= BITFIELD64_BIT(attrib); attrib_array = &vao->VertexAttrib[attrib]; buffer_binding = &vao->VertexBinding[attrib_array->VertexBinding]; client_array = &vao->_VertexAttrib[attrib]; _mesa_update_client_array(ctx, client_array, attrib_array, buffer_binding); } }
static void ntb_transport_doorbell_callback(void *data, uint32_t vector) { struct ntb_transport_ctx *nt = data; struct ntb_transport_qp *qp; struct _qpset db_bits; uint64_t vec_mask; unsigned qp_num; BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits); BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free); vec_mask = ntb_db_vector_mask(nt->ntb, vector); while (vec_mask != 0) { qp_num = ffsll(vec_mask) - 1; if (test_bit(qp_num, &db_bits)) { qp = &nt->qp_vec[qp_num]; taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work); } vec_mask &= ~(1ull << qp_num); } }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_VS_PROG_DATA */ const struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(brw->vs.base.prog_data); GLbitfield64 vs_inputs = vs_prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; unsigned i; int delta, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint index = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[index]; vs_inputs &= ~BITFIELD64_BIT(index); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; /* The range of data in a given buffer represented as [min, max) */ struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; uint32_t buffer_range_start[VERT_ATTRIB_MAX]; uint32_t buffer_range_end[VERT_ATTRIB_MAX]; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); const uint32_t offset = (uintptr_t)glarray->Ptr; /* Start with the worst case */ uint32_t start = 0; uint32_t range = intel_buffer->Base.Size; if (glarray->InstanceDivisor) { if (brw->num_instances) { start = offset + glarray->StrideB * brw->baseinstance; range = (glarray->StrideB * ((brw->num_instances - 1) / glarray->InstanceDivisor) + glarray->_ElementSize); } } else { if (brw->vb.index_bounds_valid) { start = offset + min_index * glarray->StrideB; range = (glarray->StrideB * (max_index - min_index) + glarray->_ElementSize); } } /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ unsigned k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; buffer_range_start[input->buffer] = MIN2(buffer_range_start[input->buffer], start); buffer_range_end[input->buffer] = MAX2(buffer_range_end[input->buffer], start + range); break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = offset; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; buffer->size = glarray->BufferObj->Size - offset; enabled_buffer[j] = intel_buffer; buffer_range_start[j] = start; buffer_range_end[j] = start + range; input->buffer = j++; input->offset = 0; } } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* Now that we've set up all of the buffers, we walk through and reference * each of them. We do this late so that we get the right size in each * buffer and don't reference too little data. */ for (i = 0; i < j; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; if (buffer->bo) continue; const uint32_t start = buffer_range_start[i]; const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); drm_intel_bo_reference(buffer->bo); } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; buffer->size += delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->size += delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
static int pci_vtblk_init(struct pci_devinst *pi, char *opts) { char bident[sizeof("XX:X:X")]; struct blockif_ctxt *bctxt; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; off_t size; int i, sectsz, sts, sto; if (opts == NULL) { printf("virtio-block: backing device required\n"); return (1); } /* * The supplied backing file has to exist */ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); bctxt = blockif_open(opts, bident); if (bctxt == NULL) { perror("Could not open backing file"); return (1); } size = blockif_size(bctxt); sectsz = blockif_sectsz(bctxt); blockif_psectsz(bctxt, &sts, &sto); sc = calloc(1, sizeof(struct pci_vtblk_softc)); sc->bc = bctxt; for (i = 0; i < VTBLK_RINGSZ; i++) { struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; io->io_req.br_callback = pci_vtblk_done; io->io_req.br_param = io; io->io_sc = sc; io->io_idx = (uint16_t)i; } pthread_mutex_init(&sc->vsc_mtx, NULL); /* init virtio softc and virtqueues */ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ /* * Create an identifier for the backing file. Use parts of the * md5 sum of the filename */ MD5Init(&mdctx); MD5Update(&mdctx, opts, (unsigned)strlen(opts)); MD5Final(digest, &mdctx); snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = (uint64_t)(size / DEV_BSIZE); /* 512-byte units */ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX; sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ sc->vbsc_cfg.vbc_geometry.heads = 0; sc->vbsc_cfg.vbc_geometry.sectors = 0; sc->vbsc_cfg.vbc_blk_size = (uint32_t)sectsz; sc->vbsc_cfg.vbc_topology.physical_block_exp = (uint8_t)((sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0); sc->vbsc_cfg.vbc_topology.alignment_offset = (uint8_t)((sto != 0) ? ((sts - sto) / sectsz) : 0); sc->vbsc_cfg.vbc_topology.min_io_size = 0; sc->vbsc_cfg.vbc_topology.opt_io_size = 0; sc->vbsc_cfg.vbc_writeback = 0; /* * Should we move some of this into virtio.c? Could * have the device, class, and subdev_0 as fields in * the virtio constants structure. */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { blockif_close(sc->bc); free(sc); return (1); } vi_set_io_bar(&sc->vbsc_vs, 0); return (0); }
U64 QMagicHash::MagicBishopMoves(const U64& occ, const U64& loc){return MagicBishopMoves(occ,ffsll(loc));}
/** Find next (bit) set */ static int fnsll(unsigned long long x, unsigned i) { if (i >= CHAR_BIT * sizeof (x)) return 0; return ffsll(x & ~((1ULL << i) - 1)); }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; int delta, i, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; uint32_t offset, size; if (glarray->InstanceDivisor) { offset = buffer->offset; size = (buffer->stride * ((brw->num_instances / glarray->InstanceDivisor) - 1) + glarray->_ElementSize); } else { if (min_index == -1) { offset = 0; size = intel_buffer->Base.Size; } else { offset = buffer->offset + min_index * buffer->stride; size = (buffer->stride * (max_index - min_index) + glarray->_ElementSize); } } buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset, size); drm_intel_bo_reference(buffer->bo); input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
int fw_writefw(struct devicelist *flashdev) { libscsi_hdl_t *handle; libscsi_target_t *target; libscsi_errno_t serr; size_t maxxfer, nwrite; uint8_t align; int ret = FWFLASH_FAILURE; if ((verifier == NULL) || (verifier->imgsize == 0) || (verifier->fwimage == NULL)) { /* should _NOT_ happen */ logmsg(MSG_ERROR, gettext("%s: Firmware image has not been verified\n"), flashdev->drvname); return (FWFLASH_FAILURE); } if ((handle = libscsi_init(LIBSCSI_VERSION, &serr)) == NULL) { logmsg(MSG_ERROR, gettext("%s: failed to initialize libscsi\n"), flashdev->drvname); return (FWFLASH_FAILURE); } if ((target = libscsi_open(handle, NULL, flashdev->access_devname)) == NULL) { logmsg(MSG_ERROR, gettext("%s: unable to open device %s\n"), flashdev->drvname, flashdev->access_devname); libscsi_fini(handle); return (FWFLASH_FAILURE); } if (libscsi_max_transfer(target, &maxxfer) != 0) { logmsg(MSG_ERROR, gettext("%s: failed to determine device " "maximum transfer size: %s\n"), flashdev->drvname, libscsi_errmsg(handle)); goto err; } if (sdfw_read_descriptor(flashdev, handle, target, &align) != FWFLASH_SUCCESS) { goto err; } /* * If the maximum transfer size is less than the maximum image size then * we have to do some additional work. We need to read the descriptor * via a READ BUFFER command and make sure that we support the required * offset alignment. Note that an alignment of 0xff indicates that the * device does not support partial writes and must receive the firmware * in a single WRITE BUFFER. Otherwise a value in align represents a * required offset alignment of 2^off. From there, we make sure that * this works for our partial write size and that our partial write size * fits in the maximum transfer size. */ if (maxxfer < verifier->imgsize) { logmsg(MSG_INFO, "%s: Maximum transfer is %u, required " "alignment is 2^%d\n", flashdev->drvname, maxxfer, align); if (FW_SD_PARTIAL_WRITE_SIZE > maxxfer) { logmsg(MSG_ERROR, gettext("%s: cannot write firmware " "image: HBA enforces a maximum transfer size of " "%u bytes, but the default partial transfer size " "is %u bytes\n"), flashdev->drvname, maxxfer, FW_SD_PARTIAL_WRITE_SIZE); goto err; } maxxfer = FW_SD_PARTIAL_WRITE_SIZE; if (ffsll(maxxfer) < align || align == 0xff) { logmsg(MSG_ERROR, gettext("%s: cannot write firmware " "image: device requires partial writes aligned " "to an unsupported value\n"), flashdev->drvname); goto err; } logmsg(MSG_INFO, "%s: final transfer block size is %u\n", flashdev->drvname, maxxfer); } logmsg(MSG_INFO, "%s: Writing out %u bytes to %s\n", flashdev->drvname, verifier->imgsize, flashdev->access_devname); nwrite = 0; for (;;) { uintptr_t buf; size_t towrite = MIN(maxxfer, verifier->imgsize - nwrite); if (towrite == 0) break; buf = (uintptr_t)verifier->fwimage; buf += nwrite; if (sdfw_write(flashdev, handle, target, towrite, nwrite, (void *)buf) != FWFLASH_SUCCESS) { logmsg(MSG_ERROR, gettext("%s: failed to write to %s " "successfully: %s\n"), flashdev->drvname, flashdev->access_devname, libscsi_errmsg(handle)); goto err; } nwrite += towrite; } logmsg(MSG_ERROR, gettext("Note: For flash based disks " "(SSD, etc). You may need power off the system to wait a " "few minutes for supercap to fully discharge, then power " "on the system again to activate the new firmware\n")); ret = FWFLASH_SUCCESS; err: if (target != NULL) libscsi_close(handle, target); if (handle != NULL) libscsi_fini(handle); return (ret); }
static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0, total_size = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; int delta, i, j; GLboolean can_merge_uploads = GL_TRUE; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* First build an array of pointers to ve's in vb.inputs_read */ if (0) printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); if (input->glarray->Size && get_size(input->glarray->Type)) brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) goto prepare; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; int type_size = get_size(glarray->Type); input->element_size = type_size * glarray->Size; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->bo = intel_bufferobj_source(intel, intel_buffer, type_size, &buffer->offset); drm_intel_bo_reference(buffer->bo); buffer->offset += (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { /* Position array not properly enabled: */ if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) { intel->Fallback = true; /* boolean, not bitfield */ return; } interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || (uintptr_t)(glarray->Ptr - ptr) > interleaved) { interleaved = 0; } else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1)) { /* enforce natural alignment (for doubles) */ interleaved = 0; } upload[nr_uploads++] = input; total_size = ALIGN(total_size, type_size); total_size += input->element_size; if (glarray->InstanceDivisor != 0) { can_merge_uploads = GL_FALSE; } } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } if (delta && !brw->intel.intelScreen->relaxed_relocations) min_index = delta = 0; /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved && interleaved <= 2*total_size) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } else if ((total_size < 2048) && can_merge_uploads) { /* Upload non-interleaved arrays into a single interleaved array */ struct brw_vertex_buffer *buffer; int count = MAX2(max_index - min_index + 1, 1); int offset; char *map; map = intel_upload_map(&brw->intel, total_size * count, total_size); for (i = offset = 0; i < nr_uploads; i++) { const unsigned char *src = upload[i]->glarray->Ptr; int size = upload[i]->element_size; int stride = upload[i]->glarray->StrideB; char *dst; int n; offset = ALIGN(offset, get_size(upload[i]->glarray->Type)); dst = map + offset; src += min_index * stride; for (n = 0; n < count; n++) { memcpy(dst, src, size); src += stride; dst += total_size; } upload[i]->offset = offset; upload[i]->buffer = j; offset += size; } assert(offset == total_size); buffer = &brw->vb.buffers[j++]; intel_upload_unmap(&brw->intel, map, offset * count, offset, &buffer->bo, &buffer->offset); buffer->stride = offset; buffer->step_rate = 0; buffer->offset -= delta * offset; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->element_size); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->element_size); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } /* can we simply extend the current vb? */ if (j == brw->vb.nr_current_buffers) { int delta = 0; for (i = 0; i < j; i++) { int d; if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride || brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; if (d < 0) break; if (i == 0) delta = d / brw->vb.current_buffers[i].stride; if (delta * brw->vb.current_buffers[i].stride != d) break; } if (i == j) { brw->vb.start_vertex_bias += delta; while (--j >= 0) drm_intel_bo_unreference(brw->vb.buffers[j].bo); j = 0; } } brw->vb.nr_buffers = j; prepare: brw_prepare_query_begin(brw); }
static nir_shader * create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, const nir_shader_compiler_options *options, const struct brw_tcs_prog_key *key) { nir_builder b; nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL, options); nir_shader *nir = b.shader; nir_variable *var; nir_intrinsic_instr *load; nir_intrinsic_instr *store; nir_ssa_def *zero = nir_imm_int(&b, 0); nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); nir->info->inputs_read = key->outputs_written; nir->info->outputs_written = key->outputs_written; nir->info->tcs.vertices_out = key->input_vertices; nir->info->name = ralloc_strdup(nir, "passthrough"); nir->num_uniforms = 8 * sizeof(uint32_t); var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0"); var->data.location = 0; var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1"); var->data.location = 1; /* Write the patch URB header. */ for (int i = 0; i <= 1; i++) { load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); load->num_components = 4; load->src[0] = nir_src_for_ssa(zero); nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t)); nir_builder_instr_insert(&b, &load->instr); store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output); store->num_components = 4; store->src[0] = nir_src_for_ssa(&load->dest.ssa); store->src[1] = nir_src_for_ssa(zero); nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i); nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); nir_builder_instr_insert(&b, &store->instr); } /* Copy inputs to outputs. */ uint64_t varyings = key->outputs_written; while (varyings != 0) { const int varying = ffsll(varyings) - 1; load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_per_vertex_input); load->num_components = 4; load->src[0] = nir_src_for_ssa(invoc_id); load->src[1] = nir_src_for_ssa(zero); nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); nir_intrinsic_set_base(load, varying); nir_builder_instr_insert(&b, &load->instr); store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_per_vertex_output); store->num_components = 4; store->src[0] = nir_src_for_ssa(&load->dest.ssa); store->src[1] = nir_src_for_ssa(invoc_id); store->src[2] = nir_src_for_ssa(zero); nir_intrinsic_set_base(store, varying); nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); nir_builder_instr_insert(&b, &store->instr); varyings &= ~BITFIELD64_BIT(varying); } nir_validate_shader(nir); nir = brw_preprocess_nir(compiler, nir); return nir; }
inline void pawn_move(struct position *pos, struct move_array *m, unsigned char pawns) { uint64_t pawn_pos = pos->pieces[pawns]; uint64_t moves; unsigned char index_to; switch (pawns & COLOR) { case WHITE: // nonpromotion forward moves moves = moveN(pawn_pos) & ~pos->allpieces & ~rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + S, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // forward 2 moves pawn_pos = pos->pieces[pawns]; moves = moveN(moveN(pawn_pos) & ~pos->allpieces) & ~pos->allpieces & rank[3]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move_forward2(m, index_to + S + S, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // nonpromotion attack west moves pawn_pos = pos->pieces[pawns]; moves = moveNW(pawn_pos) & (pos->bpieces | ep_squares[1][pos->ep]) & ~file[7] & ~rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + SE, index_to, pawns, find_piece_ep(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } // nonpromotion attack east moves pawn_pos = pos->pieces[pawns]; moves = moveNE(pawn_pos) & (pos->bpieces | ep_squares[1][pos->ep]) & ~file[0] & ~rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + SW, index_to, pawns, find_piece_ep(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } pawn_pos = pos->pieces[pawns]; if ((pawn_pos & rank[6]) == 0) { // no promotion possibilities return; } else { // promotion forward moves moves = moveN(pawn_pos) & ~pos->allpieces & rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + S, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // promotion attack west moves pawn_pos = pos->pieces[pawns]; moves = moveNW(pawn_pos) & pos->bpieces & ~file[7] & rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + SE, index_to, pawns, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } // promotion attack east moves pawn_pos = pos->pieces[pawns]; moves = moveNE(pawn_pos) & pos->bpieces & ~file[0] & rank[7]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + SW, index_to, pawns, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } } break; default: // black // nonpromotion forward moves moves = moveS(pawn_pos) & ~pos->allpieces & ~rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + N, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // forward 2 moves pawn_pos = pos->pieces[pawns]; moves = moveS(moveS(pawn_pos) & ~pos->allpieces) & ~pos->allpieces & rank[4]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move_forward2(m, index_to + N + N, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // nonpromotion attack west moves pawn_pos = pos->pieces[pawns]; moves = moveSW(pawn_pos) & (pos->wpieces | ep_squares[0][pos->ep]) & ~file[7] & ~rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + NE, index_to, pawns, find_piece_ep(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } // nonpromotion attack east moves pawn_pos = pos->pieces[pawns]; moves = moveSE(pawn_pos) & (pos->wpieces | ep_squares[0][pos->ep]) & ~file[0] & ~rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_move(m, index_to + NW, index_to, pawns, find_piece_ep(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } pawn_pos = pos->pieces[pawns]; if ((pawn_pos & rank[1]) == 0) { // no promotion possibilities return; } else { // promotion forward moves moves = moveS(pawn_pos) & ~pos->allpieces & rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + N, index_to, pawns, nopiece_n); moves &= notlinboard[index_to]; } else { break; } } // promotion attack west moves pawn_pos = pos->pieces[pawns]; moves = moveSW(pawn_pos) & pos->wpieces & ~file[7] & rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + NE, index_to, pawns, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } // promotion attack east moves pawn_pos = pos->pieces[pawns]; moves = moveSE(pawn_pos) & pos->wpieces & ~file[0] & rank[0]; for (int i = 0; i < 8; i++) { if ((index_to = ffsll(moves)) != 0) { index_to--; add_promotion_move(m, index_to + NW, index_to, pawns, find_piece(pos, index_to)); moves &= notlinboard[index_to]; } else { break; } } } break; } }