static INLINE boolean nouveau_buffer_allocate(struct nouveau_screen *screen, struct nv04_resource *buf, unsigned domain) { uint32_t size = buf->base.width0; if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_COMPUTE_RESOURCE | PIPE_BIND_SHADER_RESOURCE)) size = align(size, 0x100); if (domain == NOUVEAU_BO_VRAM) { buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size, &buf->bo, &buf->offset); if (!buf->bo) return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART); NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0); } else if (domain == NOUVEAU_BO_GART) { buf->mm = nouveau_mm_allocate(screen->mm_GART, size, &buf->bo, &buf->offset); if (!buf->bo) return FALSE; NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0); } else { assert(domain == 0); if (!nouveau_buffer_malloc(buf)) return FALSE; } buf->domain = domain; if (buf->bo) buf->address = buf->bo->offset + buf->offset; return TRUE; }
static inline bool nouveau_buffer_allocate(struct nouveau_screen *screen, struct nv04_resource *buf, unsigned domain) { uint32_t size = align(buf->base.width0, 0x100); if (domain == NOUVEAU_BO_VRAM) { buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size, &buf->bo, &buf->offset); if (!buf->bo) return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART); NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0); } else if (domain == NOUVEAU_BO_GART) { buf->mm = nouveau_mm_allocate(screen->mm_GART, size, &buf->bo, &buf->offset); if (!buf->bo) return false; NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0); } else { assert(domain == 0); if (!nouveau_buffer_malloc(buf)) return false; } buf->domain = domain; if (buf->bo) buf->address = buf->bo->offset + buf->offset; util_range_set_empty(&buf->valid_buffer_range); return true; }
static void nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx, unsigned offset, unsigned size) { struct nv04_resource *buf = nv04_resource(tx->base.resource); uint8_t *data = tx->map + offset; const unsigned base = tx->base.box.x + offset; const boolean can_cb = !((base | size) & 3); if (buf->data) memcpy(data, buf->data + base, size); else buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY; if (buf->domain == NOUVEAU_BO_VRAM) NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size); if (buf->domain == NOUVEAU_BO_GART) NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size); if (tx->bo) nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); else if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb) nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0, base, size / 4, (const uint32_t *)data); else nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data); nouveau_fence_ref(nv->screen->fence.current, &buf->fence); nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr); }
/* This happens rather often with DTD9/st. */ void nvc0_cb_push(struct nouveau_context *nv, struct nouveau_bo *bo, unsigned domain, unsigned base, unsigned size, unsigned offset, unsigned words, const uint32_t *data) { struct nouveau_pushbuf *push = nv->pushbuf; NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4); assert(!(offset & 3)); size = align(size, 0x100); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, size); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); while (words) { unsigned nr = PUSH_AVAIL(push); nr = MIN2(nr, words); nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); PUSH_SPACE(push, nr + 2); PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1); PUSH_DATA (push, offset); PUSH_DATAp(push, data, nr); words -= nr; data += nr; offset += nr * 4; } }
static void nouveau_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct nouveau_context *nv = nouveau_context(pipe); struct nouveau_transfer *tx = nouveau_transfer(transfer); struct nv04_resource *buf = nv04_resource(transfer->resource); if (tx->base.usage & PIPE_TRANSFER_WRITE) { if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map) nouveau_transfer_write(nv, tx, 0, tx->base.box.width); if (likely(buf->domain)) { const uint8_t bind = buf->base.bind; /* make sure we invalidate dedicated caches */ if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) nv->vbo_dirty = TRUE; if (bind & (PIPE_BIND_CONSTANT_BUFFER)) nv->cb_dirty = TRUE; } } if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE)) NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width); nouveau_buffer_transfer_del(nv, tx); FREE(tx); }
void nvc0_miptree_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer) { struct nvc0_context *nvc0 = nvc0_context(pctx); struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer; struct nv50_miptree *mt = nv50_miptree(tx->base.resource); unsigned i; if (tx->base.usage & PIPE_TRANSFER_MAP_DIRECTLY) { pipe_resource_reference(&transfer->resource, NULL); FREE(tx); return; } if (tx->base.usage & PIPE_TRANSFER_WRITE) { for (i = 0; i < tx->nlayers; ++i) { nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1], tx->nblocksx, tx->nblocksy); if (mt->layout_3d) tx->rect[0].z++; else tx->rect[0].base += mt->layer_stride; tx->rect[1].base += tx->nblocksy * tx->base.stride; } NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1); /* Allow the copies above to finish executing before freeing the source */ nouveau_fence_work(nvc0->screen->base.fence.current, nouveau_fence_unref_bo, tx->rect[1].bo); } else { nouveau_bo_ref(NULL, &tx->rect[1].bo); } if (tx->base.usage & PIPE_TRANSFER_READ) NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1); pipe_resource_reference(&transfer->resource, NULL); FREE(tx); }
void nvc0_default_kick_notify(struct nouveau_pushbuf *push) { struct nvc0_screen *screen = push->user_priv; if (screen) { nouveau_fence_next(&screen->base); nouveau_fence_update(&screen->base, true); if (screen->cur_ctx) screen->cur_ctx->state.flushed = true; NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1); } }
void nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx, bool on_flush) { struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending; struct nouveau_list *it; NOUVEAU_DRV_STAT_IFD(unsigned count = 0); for (it = list->next; it != list; it = it->next) { struct nouveau_bufref *ref = (struct nouveau_bufref *)it; struct nv04_resource *res = ref->priv; if (res) nvc0_resource_validate(res, (unsigned)ref->priv_data); NOUVEAU_DRV_STAT_IFD(count++); } NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count); }
static void nvc0_so_target_save_offset(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg, unsigned index, bool *serialize) { struct nvc0_so_target *targ = nvc0_so_target(ptarg); if (*serialize) { *serialize = false; PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1); IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0); NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1); } nvc0_query(targ->pq)->index = index; pipe->end_query(pipe, targ->pq); }
static void nouveau_buffer_destroy(struct pipe_screen *pscreen, struct pipe_resource *presource) { struct nv04_resource *res = nv04_resource(presource); nouveau_buffer_release_gpu_storage(res); if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) align_free(res->data); nouveau_fence_ref(NULL, &res->fence); nouveau_fence_ref(NULL, &res->fence_wr); FREE(res); NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1); }
/* Maybe just migrate to GART right away if we actually need to do this. */ static boolean nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx) { struct nv04_resource *buf = nv04_resource(tx->base.resource); const unsigned base = tx->base.box.x; const unsigned size = tx->base.box.width; NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size); nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART, buf->bo, buf->offset + base, buf->domain, size); if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client)) return FALSE; if (buf->data) memcpy(buf->data + base, tx->map, size); return TRUE; }
static void nvc0_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) { struct nvc0_context *nvc0 = nvc0_context(pipe); int ret; boolean m2mf; unsigned dst_layer = dstz, src_layer = src_box->z; if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { nouveau_copy_buffer(&nvc0->base, nv04_resource(dst), dstx, nv04_resource(src), src_box->x, src_box->width); NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width); return; } NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1); /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */ assert((src->nr_samples | 1) == (dst->nr_samples | 1)); m2mf = (src->format == dst->format) || (util_format_get_blocksizebits(src->format) == util_format_get_blocksizebits(dst->format)); nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; if (m2mf) { struct nv50_m2mf_rect drect, srect; unsigned i; unsigned nx = util_format_get_nblocksx(src->format, src_box->width); unsigned ny = util_format_get_nblocksy(src->format, src_box->height); nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz); nv50_m2mf_rect_setup(&srect, src, src_level, src_box->x, src_box->y, src_box->z); for (i = 0; i < src_box->depth; ++i) { nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny); if (nv50_miptree(dst)->layout_3d) drect.z++; else drect.base += nv50_miptree(dst)->layer_stride; if (nv50_miptree(src)->layout_3d) srect.z++; else srect.base += nv50_miptree(src)->layer_stride; } return; } assert(nv50_2d_dst_format_faithful(dst->format)); assert(nv50_2d_src_format_faithful(src->format)); BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD); BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR); nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx); nouveau_pushbuf_validate(nvc0->base.pushbuf); for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) { ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf, nv50_miptree(dst), dst_level, dstx, dsty, dst_layer, nv50_miptree(src), src_level, src_box->x, src_box->y, src_layer, src_box->width, src_box->height); if (ret) break; } nouveau_bufctx_reset(nvc0->bufctx, 0); }
static boolean nvc0_validate_tic(struct nvc0_context *nvc0, int s) { uint32_t commands[32]; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bo *txc = nvc0->screen->txc; unsigned i; unsigned n = 0; boolean need_flush = FALSE; for (i = 0; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); struct nv04_resource *res; const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); if (!tic) { if (dirty) commands[n++] = (i << 1) | 0; continue; } res = nv04_resource(tic->pipe.texture); if (tic->id < 0) { tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); PUSH_SPACE(push, 17); BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); PUSH_DATAh(push, txc->offset + (tic->id * 32)); PUSH_DATA (push, txc->offset + (tic->id * 32)); BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); PUSH_DATA (push, 0x100111); BEGIN_NIC0(push, NVC0_M2MF(DATA), 8); PUSH_DATAp(push, &tic->tic[0], 8); need_flush = TRUE; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, (tic->id << 4) | 1); NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; if (!dirty) continue; commands[n++] = (tic->id << 9) | (i << 1) | 1; BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) commands[n++] = (i << 1) | 0; nvc0->state.num_textures[s] = nvc0->num_textures[s]; if (n) { BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->textures_dirty[s] = 0; return need_flush; }
void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->indexed) { nvc0_push_map_idxbuf(&ctx, nvc0); index_size = nvc0->idxbuf.index_size; if (info->primitive_restart) { BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.instance_id = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); }
struct pipe_resource * nouveau_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_screen *screen = nouveau_screen(pscreen); struct nv04_resource *buffer; bool ret; buffer = CALLOC_STRUCT(nv04_resource); if (!buffer) return NULL; buffer->base = *templ; buffer->vtbl = &nouveau_buffer_vtbl; pipe_reference_init(&buffer->base.reference, 1); buffer->base.screen = pscreen; if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { buffer->domain = NOUVEAU_BO_GART; } else if (buffer->base.bind == 0 || (buffer->base.bind & (screen->vidmem_bindings & screen->sysmem_bindings))) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_DYNAMIC: /* For most apps, we'd have to do staging transfers to avoid sync * with this usage, and GART -> GART copies would be suboptimal. */ buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_STAGING: case PIPE_USAGE_STREAM: buffer->domain = NOUVEAU_BO_GART; break; default: assert(0); break; } } else { if (buffer->base.bind & screen->vidmem_bindings) buffer->domain = NV_VRAM_DOMAIN(screen); else if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } /* There can be very special situations where we want non-gpu-mapped * buffers, but never through this interface. */ assert(buffer->domain); ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == false) goto fail; if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy) nouveau_buffer_cache(NULL, buffer); NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1); util_range_init(&buffer->valid_buffer_range); return &buffer->base; fail: FREE(buffer); return NULL; }
static boolean nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_query *q = nvc0_query(pq); uint64_t *res64 = (uint64_t*)result; uint32_t *res32 = (uint32_t*)result; boolean *res8 = (boolean*)result; uint64_t *data64 = (uint64_t *)q->data; unsigned i; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (q->type >= NVC0_QUERY_DRV_STAT(0) && q->type <= NVC0_QUERY_DRV_STAT_LAST) { res64[0] = q->u.value; return TRUE; } else #endif if (q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) { return nve4_mp_pm_query_result(nvc0, q, result, wait); } if (q->state != NVC0_QUERY_STATE_READY) nvc0_query_update(nvc0->screen->base.client, q); if (q->state != NVC0_QUERY_STATE_READY) { if (!wait) { if (q->state != NVC0_QUERY_STATE_FLUSHED) { q->state = NVC0_QUERY_STATE_FLUSHED; /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ PUSH_KICK(nvc0->base.pushbuf); } return FALSE; } if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) return FALSE; NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1); } q->state = NVC0_QUERY_STATE_READY; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = q->data[1] - q->data[5]; break; case PIPE_QUERY_OCCLUSION_PREDICATE: res8[0] = q->data[1] != q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: res64[0] = data64[0] - data64[4]; res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: res8[0] = data64[0] != data64[2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: res64[0] = 1000000000; res8[8] = FALSE; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; case PIPE_QUERY_PIPELINE_STATISTICS: for (i = 0; i < 10; ++i) res64[i] = data64[i * 2] - data64[24 + i * 2]; break; case NVC0_QUERY_TFB_BUFFER_OFFSET: res32[0] = q->data[1]; break; default: assert(0); /* can't happen, we don't create queries with invalid type */ return FALSE; } return TRUE; }
struct pipe_resource * nouveau_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_screen *screen = nouveau_screen(pscreen); struct nv04_resource *buffer; boolean ret; buffer = CALLOC_STRUCT(nv04_resource); if (!buffer) return NULL; buffer->base = *templ; buffer->vtbl = &nouveau_buffer_vtbl; pipe_reference_init(&buffer->base.reference, 1); buffer->base.screen = pscreen; if (buffer->base.bind & (screen->vidmem_bindings & screen->sysmem_bindings)) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: case PIPE_USAGE_STATIC: buffer->domain = NOUVEAU_BO_VRAM; break; case PIPE_USAGE_DYNAMIC: /* For most apps, we'd have to do staging transfers to avoid sync * with this usage, and GART -> GART copies would be suboptimal. */ buffer->domain = NOUVEAU_BO_VRAM; break; case PIPE_USAGE_STAGING: case PIPE_USAGE_STREAM: buffer->domain = NOUVEAU_BO_GART; break; default: assert(0); break; } } else { if (buffer->base.bind & screen->vidmem_bindings) buffer->domain = NOUVEAU_BO_VRAM; else if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == FALSE) goto fail; if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy) nouveau_buffer_cache(NULL, buffer); NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1); return &buffer->base; fail: FREE(buffer); return NULL; }
/* Returns a pointer to a memory area representing a window into the * resource's data. * * This may or may not be the _actual_ memory area of the resource. However * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory * area, the contents of the returned map are copied over to the resource. * * The usage indicates what the caller plans to do with the map: * * WRITE means that the user plans to write to it * * READ means that the user plans on reading from it * * DISCARD_WHOLE_RESOURCE means that the whole resource is going to be * potentially overwritten, and even if it isn't, the bits that aren't don't * need to be maintained. * * DISCARD_RANGE means that all the data in the specified range is going to * be overwritten. * * The strategy for determining what kind of memory area to return is complex, * see comments inside of the function. */ static void * nouveau_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct nouveau_context *nv = nouveau_context(pipe); struct nv04_resource *buf = nv04_resource(resource); struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer); uint8_t *map; int ret; if (!tx) return NULL; nouveau_buffer_transfer_init(tx, resource, box, usage); *ptransfer = &tx->base; if (usage & PIPE_TRANSFER_READ) NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1); if (usage & PIPE_TRANSFER_WRITE) NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1); /* If we are trying to write to an uninitialized range, the user shouldn't * care what was there before. So we can treat the write as if the target * range were being discarded. Furthermore, since we know that even if this * buffer is busy due to GPU activity, because the contents were * uninitialized, the GPU can't care what was there, and so we can treat * the write as being unsynchronized. */ if ((usage & PIPE_TRANSFER_WRITE) && !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) usage |= PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_UNSYNCHRONIZED; if (usage & PIPE_TRANSFER_PERSISTENT) usage |= PIPE_TRANSFER_UNSYNCHRONIZED; if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & NOUVEAU_TRANSFER_DISCARD) { /* Set up a staging area for the user to write to. It will be copied * back into VRAM on unmap. */ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK; nouveau_transfer_staging(nv, tx, TRUE); } else { if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { /* The GPU is currently writing to this buffer. Copy its current * contents to a staging area in the GART. This is necessary since * not the whole area being mapped is being discarded. */ if (buf->data) { align_free(buf->data); buf->data = NULL; } nouveau_transfer_staging(nv, tx, FALSE); nouveau_transfer_read(nv, tx); } else { /* The buffer is currently idle. Create a staging area for writes, * and make sure that the cached data is up-to-date. */ if (usage & PIPE_TRANSFER_WRITE) nouveau_transfer_staging(nv, tx, TRUE); if (!buf->data) nouveau_buffer_cache(nv, buf); } } return buf->data ? (buf->data + box->x) : tx->map; } else if (unlikely(buf->domain == 0)) { return buf->data + box->x; } /* At this point, buf->domain == GART */ if (nouveau_buffer_should_discard(buf, usage)) { int ref = buf->base.reference.count - 1; nouveau_buffer_reallocate(nv->screen, buf, buf->domain); if (ref > 0) /* any references inside context possible ? */ nv->invalidate_resource_storage(nv, &buf->base, ref); } /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the * relevant flags. If buf->mm is set, that means this resource is part of a * larger slab bo that holds multiple resources. So in that case, don't * wait on the whole slab and instead use the logic below to return a * reasonable buffer for that case. */ ret = nouveau_bo_map(buf->bo, buf->mm ? 0 : nouveau_screen_transfer_flags(usage), nv->client); if (ret) { FREE(tx); return NULL; } map = (uint8_t *)buf->bo->map + buf->offset + box->x; /* using kernel fences only if !buf->mm */ if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm) return map; /* If the GPU is currently reading/writing this buffer, we shouldn't * interfere with its progress. So instead we either wait for the GPU to * complete its operation, or set up a staging area to perform our work in. */ if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) { if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) { /* Discarding was not possible, must sync because * subsequent transfers might use UNSYNCHRONIZED. */ nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE); } else if (usage & PIPE_TRANSFER_DISCARD_RANGE) { /* The whole range is being discarded, so it doesn't matter what was * there before. No need to copy anything over. */ nouveau_transfer_staging(nv, tx, TRUE); map = tx->map; } else if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) { if (usage & PIPE_TRANSFER_DONTBLOCK) map = NULL; else nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE); } else { /* It is expected that the returned buffer be a representation of the * data in question, so we must copy it over from the buffer. */ nouveau_transfer_staging(nv, tx, TRUE); if (tx->map) memcpy(tx->map, map, box->width); map = tx->map; } } if (!map) FREE(tx); return map; }
void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (nvc0->state.index_bias) { /* this is already taken care of by translate */ IMMED_NVC0(ctx.push, NVC0_3D(VB_ELEMENT_BASE), 0); nvc0->state.index_bias = 0; } if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->primitive_restart) { /* NOTE: I hope we won't ever need that last index (~0). * If we do, we have to disable primitive restart here always and * use END,BEGIN to restart. (XXX: would that affect PrimitiveID ?) * We could also deactive PRIM_RESTART_WITH_DRAW_ARRAYS temporarily, * and add manual restart to disp_vertices_seq. */ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->indexed ? 0xffffffff : info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; if (info->indexed) { nvc0_push_map_idxbuf(&ctx, nvc0); index_size = nvc0->idxbuf.index_size; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.instance_id = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS) IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); }
static void * nouveau_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct nouveau_context *nv = nouveau_context(pipe); struct nv04_resource *buf = nv04_resource(resource); struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer); uint8_t *map; int ret; if (!tx) return NULL; nouveau_buffer_transfer_init(tx, resource, box, usage); *ptransfer = &tx->base; if (usage & PIPE_TRANSFER_READ) NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1); if (usage & PIPE_TRANSFER_WRITE) NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1); if (buf->domain == NOUVEAU_BO_VRAM) { if (usage & NOUVEAU_TRANSFER_DISCARD) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK; nouveau_transfer_staging(nv, tx, TRUE); } else { if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { if (buf->data) { align_free(buf->data); buf->data = NULL; } nouveau_transfer_staging(nv, tx, FALSE); nouveau_transfer_read(nv, tx); } else { if (usage & PIPE_TRANSFER_WRITE) nouveau_transfer_staging(nv, tx, TRUE); if (!buf->data) nouveau_buffer_cache(nv, buf); } } return buf->data ? (buf->data + box->x) : tx->map; } else if (unlikely(buf->domain == 0)) { return buf->data + box->x; } if (nouveau_buffer_should_discard(buf, usage)) { int ref = buf->base.reference.count - 1; nouveau_buffer_reallocate(nv->screen, buf, buf->domain); if (ref > 0) /* any references inside context possible ? */ nv->invalidate_resource_storage(nv, &buf->base, ref); } ret = nouveau_bo_map(buf->bo, buf->mm ? 0 : nouveau_screen_transfer_flags(usage), nv->client); if (ret) { FREE(tx); return NULL; } map = (uint8_t *)buf->bo->map + buf->offset + box->x; /* using kernel fences only if !buf->mm */ if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm) return map; if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) { if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) { /* Discarding was not possible, must sync because * subsequent transfers might use UNSYNCHRONIZED. */ nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE); } else if (usage & PIPE_TRANSFER_DISCARD_RANGE) { nouveau_transfer_staging(nv, tx, TRUE); map = tx->map; } else if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) { if (usage & PIPE_TRANSFER_DONTBLOCK) map = NULL; else nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE); } else { nouveau_transfer_staging(nv, tx, TRUE); if (tx->map) memcpy(tx->map, map, box->width); map = tx->map; } } if (!map) FREE(tx); return map; }