static inline bool nv50_hw_sm_query_read_data(uint32_t count[32][4], struct nv50_context *nv50, bool wait, struct nv50_hw_query *hq, const struct nv50_hw_sm_query_cfg *cfg, unsigned mp_count) { struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); unsigned p, c; for (p = 0; p < mp_count; ++p) { const unsigned b = (0x14 / 4) * p; for (c = 0; c < cfg->num_counters; ++c) { if (hq->data[b + 4] != hq->sequence) { if (!wait) return false; if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->base.client)) return false; } count[p][c] = hq->data[b + hsq->ctr[c]]; } } return true; }
static boolean nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); uint64_t *res64 = (uint64_t *)result; boolean *res8 = (boolean *)result; uint64_t *data64 = (uint64_t *)q->data; if (!q->ready) /* update ? */ q->ready = nv50_query_ready(q); if (!q->ready) { if (!wait) { /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ if (!q->flushed) { q->flushed = TRUE; PUSH_KICK(nv50->base.pushbuf); } return FALSE; } if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) return FALSE; } q->ready = TRUE; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = q->data[1]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0]; break; case PIPE_QUERY_SO_STATISTICS: res64[0] = data64[0]; res64[1] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; res8[8] = (data64[0] == data64[2]) ? FALSE : TRUE; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; default: return FALSE; } return TRUE; }
static INLINE boolean nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage) { if (!mt->base.mm) { uint32_t access = (usage & PIPE_TRANSFER_WRITE) ? NOUVEAU_BO_WR : NOUVEAU_BO_RD; return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client); } if (usage & PIPE_TRANSFER_WRITE) return !mt->base.fence || nouveau_fence_wait(mt->base.fence); return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr); }
static void nouveau_dri2_finish_swap(DrawablePtr draw, unsigned int frame, unsigned int tv_sec, unsigned int tv_usec, struct nouveau_dri2_vblank_state *s) { ScrnInfoPtr scrn = xf86ScreenToScrn(draw->pScreen); NVPtr pNv = NVPTR(scrn); PixmapPtr dst_pix; PixmapPtr src_pix = nouveau_dri2_buffer(s->src)->ppix; struct nouveau_bo *dst_bo; struct nouveau_bo *src_bo = nouveau_pixmap_bo(src_pix); struct nouveau_pushbuf *push = pNv->pushbuf; RegionRec reg; int type, ret; Bool front_updated, will_exchange; xf86CrtcPtr ref_crtc; REGION_INIT(0, ®, (&(BoxRec){ 0, 0, draw->width, draw->height }), 0); REGION_TRANSLATE(0, ®, draw->x, draw->y); /* Main crtc for this drawable shall finally deliver pageflip event. */ ref_crtc = nouveau_pick_best_crtc(scrn, FALSE, draw->x, draw->y, draw->width, draw->height); /* Update frontbuffer pixmap and name: Could have changed due to * window (un)redirection as part of compositing. */ front_updated = update_front(draw, s->dst); /* Assign frontbuffer pixmap, after update in update_front() */ dst_pix = nouveau_dri2_buffer(s->dst)->ppix; dst_bo = nouveau_pixmap_bo(dst_pix); /* Throttle on the previous frame before swapping */ nouveau_bo_wait(dst_bo, NOUVEAU_BO_RD, push->client); /* Swap by buffer exchange possible? */ will_exchange = front_updated && can_exchange(draw, dst_pix, src_pix); /* Only emit a wait for vblank pushbuf here if this is a copy-swap, or * if it is a kms pageflip-swap on an old kernel. Pure exchange swaps * don't need sync to vblank. kms pageflip-swaps on Linux 3.13+ are * synced to vblank in the kms driver, so we must not sync here, or * framerate will be cut in half! */ if (can_sync_to_vblank(draw) && (!will_exchange || (!pNv->has_async_pageflip && nouveau_exa_pixmap_is_onscreen(dst_pix)))) { /* Reference the back buffer to sync it to vblank */ nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn) { src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD }, 1);
void nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, struct nv50_query *q, unsigned result_offset) { struct nv50_hw_query *hq = nv50_hw_query(q); nv50_hw_query_update(q); if (hq->state != NV50_HW_QUERY_STATE_READY) nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); hq->state = NV50_HW_QUERY_STATE_READY; BEGIN_NV04(push, SUBC_3D(method), 1); PUSH_DATA (push, hq->data[result_offset / 4]); }
/* Maybe just migrate to GART right away if we actually need to do this. */ static boolean nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx) { struct nv04_resource *buf = nv04_resource(tx->base.resource); const unsigned base = tx->base.box.x; const unsigned size = tx->base.box.width; nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART, buf->bo, buf->offset + base, buf->domain, size); if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client)) return FALSE; if (buf->data) memcpy(buf->data + base, tx->map, size); return TRUE; }
static void nouveau_finish(struct gl_context *ctx) { struct nouveau_context *nctx = to_nouveau_context(ctx); struct nouveau_pushbuf *push = context_push(ctx); struct nouveau_pushbuf_refn refn = { nctx->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR }; nouveau_flush(ctx); if (!nouveau_pushbuf_space(push, 16, 0, 0) && !nouveau_pushbuf_refn(push, &refn, 1)) { PUSH_DATA(push, 0); PUSH_KICK(push); } nouveau_bo_wait(nctx->fence, NOUVEAU_BO_RDWR, context_client(ctx)); }
/* Copies data from the resource into the the transfer's temporary GART * buffer. Also updates buf->data if present. * * Maybe just migrate to GART right away if we actually need to do this. */ static bool nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx) { struct nv04_resource *buf = nv04_resource(tx->base.resource); const unsigned base = tx->base.box.x; const unsigned size = tx->base.box.width; NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size); nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART, buf->bo, buf->offset + base, buf->domain, size); if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client)) return false; if (buf->data) memcpy(buf->data + base, tx->map, size); return true; }
static boolean nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, boolean wait, union pipe_query_result *result) { struct nv50_hw_query *hq = nv50_hw_query(q); uint64_t *res64 = (uint64_t *)result; uint32_t *res32 = (uint32_t *)result; uint8_t *res8 = (uint8_t *)result; uint64_t *data64 = (uint64_t *)hq->data; int i; if (hq->state != NV50_HW_QUERY_STATE_READY) nv50_hw_query_update(q); if (hq->state != NV50_HW_QUERY_STATE_READY) { if (!wait) { /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { hq->state = NV50_HW_QUERY_STATE_FLUSHED; PUSH_KICK(nv50->base.pushbuf); } return false; } if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) return false; } hq->state = NV50_HW_QUERY_STATE_READY; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = hq->data[1] - hq->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: res64[0] = data64[0] - data64[4]; res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_PIPELINE_STATISTICS: for (i = 0; i < 8; ++i) res64[i] = data64[i * 2] - data64[16 + i * 2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: res64[0] = 1000000000; res8[8] = false; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: res32[0] = hq->data[1]; break; default: assert(0); return false; } return true; }
static boolean nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_query *q = nvc0_query(pq); uint64_t *res64 = (uint64_t*)result; uint32_t *res32 = (uint32_t*)result; boolean *res8 = (boolean*)result; uint64_t *data64 = (uint64_t *)q->data; unsigned i; #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (q->type >= NVC0_QUERY_DRV_STAT(0) && q->type <= NVC0_QUERY_DRV_STAT_LAST) { res64[0] = q->u.value; return TRUE; } else #endif if (q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) { return nve4_mp_pm_query_result(nvc0, q, result, wait); } if (q->state != NVC0_QUERY_STATE_READY) nvc0_query_update(nvc0->screen->base.client, q); if (q->state != NVC0_QUERY_STATE_READY) { if (!wait) { if (q->state != NVC0_QUERY_STATE_FLUSHED) { q->state = NVC0_QUERY_STATE_FLUSHED; /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ PUSH_KICK(nvc0->base.pushbuf); } return FALSE; } if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) return FALSE; NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1); } q->state = NVC0_QUERY_STATE_READY; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = q->data[1] - q->data[5]; break; case PIPE_QUERY_OCCLUSION_PREDICATE: res8[0] = q->data[1] != q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: res64[0] = data64[0] - data64[4]; res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: res8[0] = data64[0] != data64[2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: res64[0] = 1000000000; res8[8] = FALSE; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; case PIPE_QUERY_PIPELINE_STATISTICS: for (i = 0; i < 10; ++i) res64[i] = data64[i * 2] - data64[24 + i * 2]; break; case NVC0_QUERY_TFB_BUFFER_OFFSET: res32[0] = q->data[1]; break; default: assert(0); /* can't happen, we don't create queries with invalid type */ return FALSE; } return TRUE; }
/* Metric calculations: * sum(x) ... sum of x over all MPs * avg(x) ... average of x over all MPs * * IPC : sum(inst_executed) / clock * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued) * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles) * MP_EFFICIENCY : avg(active_cycles / clock) * * NOTE: Interpretation of IPC requires knowledge of MP count. */ static boolean nve4_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, void *result, boolean wait) { uint32_t count[32][4]; uint64_t value = 0; unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); unsigned p, c, d; const struct nve4_mp_pm_query_cfg *cfg; cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; for (p = 0; p < mp_count; ++p) { const unsigned b = (0x60 / 4) * p; for (c = 0; c < cfg->num_counters; ++c) { count[p][c] = 0; for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) { if (q->data[b + 20 + d] != q->sequence) { if (!wait) return FALSE; if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client)) return FALSE; } if (q->ctr[c] & ~0x3) count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)]; else count[p][c] += q->data[b + d * 4 + q->ctr[c]]; } } } if (cfg->op == NVE4_COUNTER_OPn_SUM) { for (c = 0; c < cfg->num_counters; ++c) for (p = 0; p < mp_count; ++p) value += count[p][c]; value = (value * cfg->norm[0]) / cfg->norm[1]; } else if (cfg->op == NVE4_COUNTER_OPn_OR) { uint32_t v = 0; for (c = 0; c < cfg->num_counters; ++c) for (p = 0; p < mp_count; ++p) v |= count[p][c]; value = (v * cfg->norm[0]) / cfg->norm[1]; } else if (cfg->op == NVE4_COUNTER_OPn_AND) { uint32_t v = ~0; for (c = 0; c < cfg->num_counters; ++c) for (p = 0; p < mp_count; ++p) v &= count[p][c]; value = (v * cfg->norm[0]) / cfg->norm[1]; } else if (cfg->op == NVE4_COUNTER_OP2_REL_SUM_MM) { uint64_t v[2] = { 0, 0 }; for (p = 0; p < mp_count; ++p) { v[0] += count[p][0]; v[1] += count[p][1]; } if (v[0]) value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]); } else if (cfg->op == NVE4_COUNTER_OP2_DIV_SUM_M0) { for (p = 0; p < mp_count; ++p) value += count[p][0]; if (count[0][1]) value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]); else value = 0; } else if (cfg->op == NVE4_COUNTER_OP2_AVG_DIV_MM) { unsigned mp_used = 0; for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) if (count[p][1]) value += (count[p][0] * cfg->norm[0]) / count[p][1]; if (mp_used) value /= mp_used * cfg->norm[1]; } else if (cfg->op == NVE4_COUNTER_OP2_AVG_DIV_M0) { unsigned mp_used = 0; for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0]) value += count[p][0]; if (count[0][1] && mp_used) { value *= cfg->norm[0]; value /= count[0][1] * mp_used * cfg->norm[1]; } else { value = 0; } } *(uint64_t *)result = value; return TRUE; }
void nouveau_dri2_copy_region2(ScreenPtr pScreen, DrawablePtr pDraw, RegionPtr pRegion, DRI2BufferPtr pDstBuffer, DRI2BufferPtr pSrcBuffer) { struct nouveau_dri2_buffer *src = nouveau_dri2_buffer(pSrcBuffer); struct nouveau_dri2_buffer *dst = nouveau_dri2_buffer(pDstBuffer); NVPtr pNv = NVPTR(xf86ScreenToScrn(pScreen)); RegionPtr pCopyClip; GCPtr pGC; DrawablePtr src_draw, dst_draw; Bool translate = FALSE; int off_x = 0, off_y = 0; src_draw = &src->ppix->drawable; dst_draw = &dst->ppix->drawable; #if 0 ErrorF("attachments src %d, dst %d, drawable %p %p pDraw %p\n", src->base.attachment, dst->base.attachment, src_draw, dst_draw, pDraw); #endif if (src->base.attachment == DRI2BufferFrontLeft) src_draw = pDraw; if (dst->base.attachment == DRI2BufferFrontLeft) { #ifdef NOUVEAU_PIXMAP_SHARING if (pDraw->pScreen != pScreen) { dst_draw = DRI2UpdatePrime(pDraw, pDstBuffer); if (!dst_draw) return; } else #endif dst_draw = pDraw; if (dst_draw != pDraw) translate = TRUE; } if (translate && pDraw->type == DRAWABLE_WINDOW) { #ifdef COMPOSITE PixmapPtr pPix = get_drawable_pixmap(pDraw); off_x = -pPix->screen_x; off_y = -pPix->screen_y; #endif off_x += pDraw->x; off_y += pDraw->y; } pGC = GetScratchGC(pDraw->depth, pScreen); pCopyClip = REGION_CREATE(pScreen, NULL, 0); REGION_COPY(pScreen, pCopyClip, pRegion); if (translate) { REGION_TRANSLATE(pScreen, pCopyClip, off_x, off_y); } pGC->funcs->ChangeClip(pGC, CT_REGION, pCopyClip, 0); ValidateGC(dst_draw, pGC); /* If this is a full buffer swap or frontbuffer flush, throttle on * the previous one. */ if (dst->base.attachment == DRI2BufferFrontLeft && REGION_NUM_RECTS(pRegion) == 1) { BoxPtr extents = REGION_EXTENTS(pScreen, pRegion); if (extents->x1 == 0 && extents->y1 == 0 && extents->x2 == pDraw->width && extents->y2 == pDraw->height) { PixmapPtr fpix = get_drawable_pixmap(dst_draw); struct nouveau_bo *bo = nouveau_pixmap_bo(fpix); if (bo) nouveau_bo_wait(bo, NOUVEAU_BO_RD, pNv->client); } } pGC->ops->CopyArea(src_draw, dst_draw, pGC, 0, 0, pDraw->width, pDraw->height, off_x, off_y); FreeScratchGC(pGC); }
static boolean nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, boolean wait, union pipe_query_result *result) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_query *q = nvc0_query(pq); uint64_t *res64 = (uint64_t*)result; uint32_t *res32 = (uint32_t*)result; boolean *res8 = (boolean*)result; uint64_t *data64 = (uint64_t *)q->data; unsigned i; if (q->state != NVC0_QUERY_STATE_READY) nvc0_query_update(nvc0->screen->base.client, q); if (q->state != NVC0_QUERY_STATE_READY) { if (!wait) { if (q->state != NVC0_QUERY_STATE_FLUSHED) { q->state = NVC0_QUERY_STATE_FLUSHED; /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ PUSH_KICK(nvc0->base.pushbuf); } return FALSE; } if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) return FALSE; } q->state = NVC0_QUERY_STATE_READY; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = q->data[1] - q->data[5]; break; case PIPE_QUERY_OCCLUSION_PREDICATE: res8[0] = q->data[1] != q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: res64[0] = data64[0] - data64[4]; res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: res8[0] = data64[0] != data64[2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE; break; case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; case PIPE_QUERY_PIPELINE_STATISTICS: for (i = 0; i < 10; ++i) res64[i] = data64[i * 2] - data64[24 + i * 2]; break; case NVC0_QUERY_TFB_BUFFER_OFFSET: res32[0] = q->data[1]; break; default: return FALSE; } return TRUE; }