void nvc0_launch_grid(struct pipe_context *pipe, const uint *block_layout, const uint *grid_layout, uint32_t label, const void *input) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; unsigned s, i; int ret; ret = !nvc0_compute_state_validate(nvc0); if (ret) goto out; nvc0_compute_upload_input(nvc0, input); BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); PUSH_DATA (push, cp->num_barriers); BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); /* grid/block setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); PUSH_DATA (push, grid_layout[2]); BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); PUSH_DATA (push, block_layout[2]); /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); PUSH_DATA (push, 0x1); BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); /* kernel launching */ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); PUSH_DATA (push, 0x1000); BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); PUSH_DATA (push, 0x1); /* rebind all the 3D constant buffers * (looks like binding a CB on COMPUTE clobbers 3D state) */ nvc0->dirty |= NVC0_NEW_CONSTBUF; for (s = 0; s < 6; s++) { for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) if (nvc0->constbuf[s][i].u.buf) nvc0->constbuf_dirty[s] |= 1 << i; } memset(nvc0->state.uniform_buffer_bound, 0, sizeof(nvc0->state.uniform_buffer_bound)); out: if (ret) NOUVEAU_ERR("Failed to launch grid !\n"); }
void nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; int ret; ret = !nvc0_state_validate_cp(nvc0, ~0); if (ret) { NOUVEAU_ERR("Failed to launch grid !\n"); return; } nvc0_compute_upload_input(nvc0, info); BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); PUSH_DATA (push, cp->num_barriers); BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); PUSH_DATA (push, 0x1); BEGIN_NVC0(push, SUBC_CP(0x036c), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); /* block setup */ BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); PUSH_DATA (push, info->block[2]); if (unlikely(info->indirect)) { struct nv04_resource *res = nv04_resource(info->indirect); uint32_t offset = res->offset + info->indirect_offset; unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; nouveau_pushbuf_space(push, 16, 0, 1); PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); nouveau_pushbuf_data(push, res->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); } else { /* grid setup */ BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); PUSH_DATA (push, info->grid[2]); /* kernel launching */ BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); PUSH_DATA (push, 0x1000); BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_CP(0x0360), 1); PUSH_DATA (push, 0x1); } /* TODO: Not sure if this is really necessary. */ nvc0_compute_invalidate_surfaces(nvc0, 5); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES; nvc0->images_dirty[5] |= nvc0->images_valid[5]; }