Пример #1
0
static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs,
                               unsigned flags,
                               struct pipe_fence_handle **pfence)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= GFX6) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (cs->ws->info.gfx_ib_pad_with_type2) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->current.cdw & 15)
            radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->current.cdw > rcs->current.max_dw) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (pfence || cs->csc->num_slab_buffers) {
        struct pipe_fence_handle *fence;

        if (cs->next_fence) {
            fence = cs->next_fence;
            cs->next_fence = NULL;
        } else {
            fence = radeon_cs_create_fence(rcs);
        }

        if (fence) {
            if (pfence)
                radeon_fence_reference(pfence, fence);

            mtx_lock(&cs->ws->bo_fence_lock);
            for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
                struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
                p_atomic_inc(&bo->num_active_ioctls);
                radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
            }
            mtx_unlock(&cs->ws->bo_fence_lock);

            radeon_fence_reference(&fence, NULL);
        }
    } else {
        radeon_fence_reference(&cs->next_fence, NULL);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
        unsigned i, num_relocs;

        num_relocs = cs->cst->num_relocs;

        cs->cst->chunks[0].length_dw = cs->base.current.cdw;

        for (i = 0; i < num_relocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
        }

        switch (cs->ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
        case RING_COMPUTE:
            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 3;

            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & PIPE_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ring_type == RING_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
            if (!(flags & PIPE_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.current.buf = cs->csc->buf;
    cs->base.current.cdw = 0;
    cs->base.used_vram = 0;
    cs->base.used_gart = 0;

    if (cs->ring_type == RING_GFX)
        cs->ws->num_gfx_IBs++;
    else if (cs->ring_type == RING_DMA)
        cs->ws->num_sdma_IBs++;
    return 0;
}
Пример #2
0
static void *si_create_compute_state(
	struct pipe_context *ctx,
	const struct pipe_compute_state *cso)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_screen *sscreen = (struct si_screen *)ctx->screen;
	struct si_compute *program = CALLOC_STRUCT(si_compute);

	pipe_reference_init(&program->reference, 1);
	program->screen = (struct si_screen *)ctx->screen;
	program->ir_type = cso->ir_type;
	program->local_size = cso->req_local_mem;
	program->private_size = cso->req_private_mem;
	program->input_size = cso->req_input_mem;
	program->use_code_object_v2 = cso->ir_type == PIPE_SHADER_IR_NATIVE;

	if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
		if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
			program->ir.tgsi = tgsi_dup_tokens(cso->prog);
			if (!program->ir.tgsi) {
				FREE(program);
				return NULL;
			}
		} else {
			assert(cso->ir_type == PIPE_SHADER_IR_NIR);
			program->ir.nir = (struct nir_shader *) cso->prog;
		}

		program->compiler_ctx_state.debug = sctx->debug;
		program->compiler_ctx_state.is_debug_context = sctx->is_debug;
		p_atomic_inc(&sscreen->num_shaders_created);
		util_queue_fence_init(&program->ready);

		struct util_async_debug_callback async_debug;
		bool wait =
			(sctx->debug.debug_message && !sctx->debug.async) ||
			sctx->is_debug ||
			si_can_dump_shader(sscreen, PIPE_SHADER_COMPUTE);

		if (wait) {
			u_async_debug_init(&async_debug);
			program->compiler_ctx_state.debug = async_debug.base;
		}

		util_queue_add_job(&sscreen->shader_compiler_queue,
				   program, &program->ready,
				   si_create_compute_state_async, NULL);

		if (wait) {
			util_queue_fence_wait(&program->ready);
			u_async_debug_drain(&async_debug, &sctx->debug);
			u_async_debug_cleanup(&async_debug);
		}
	} else {
		const struct pipe_llvm_program_header *header;
		const char *code;
		header = cso->prog;
		code = cso->prog + sizeof(struct pipe_llvm_program_header);

		ac_elf_read(code, header->num_bytes, &program->shader.binary);
		if (program->use_code_object_v2) {
			const amd_kernel_code_t *code_object =
				si_compute_get_code_object(program, 0);
			code_object_to_config(code_object, &program->shader.config);
		} else {
			si_shader_binary_read_config(&program->shader.binary,
				     &program->shader.config, 0);
		}
		si_shader_dump(sctx->screen, &program->shader, &sctx->debug,
			       PIPE_SHADER_COMPUTE, stderr, true);
		if (si_shader_binary_upload(sctx->screen, &program->shader) < 0) {
			fprintf(stderr, "LLVM failed to upload shader\n");
			FREE(program);
			return NULL;
		}
	}

	return program;
}
Пример #3
0
static void *si_create_compute_state(
	struct pipe_context *ctx,
	const struct pipe_compute_state *cso)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_screen *sscreen = (struct si_screen *)ctx->screen;
	struct si_compute *program = CALLOC_STRUCT(si_compute);

	program->screen = (struct si_screen *)ctx->screen;
	program->ir_type = cso->ir_type;
	program->local_size = cso->req_local_mem;
	program->private_size = cso->req_private_mem;
	program->input_size = cso->req_input_mem;
	program->use_code_object_v2 = HAVE_LLVM >= 0x0400 &&
					cso->ir_type == PIPE_SHADER_IR_NATIVE;

	if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
		program->tokens = tgsi_dup_tokens(cso->prog);
		if (!program->tokens) {
			FREE(program);
			return NULL;
		}

		program->compiler_ctx_state.tm = sctx->tm;
		program->compiler_ctx_state.debug = sctx->b.debug;
		program->compiler_ctx_state.is_debug_context = sctx->is_debug;
		p_atomic_inc(&sscreen->b.num_shaders_created);
		util_queue_fence_init(&program->ready);

		if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
		    sctx->is_debug ||
		    r600_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
			si_create_compute_state_async(program, -1);
		else
			util_queue_add_job(&sscreen->shader_compiler_queue,
					   program, &program->ready,
					   si_create_compute_state_async, NULL);
	} else {
		const struct pipe_llvm_program_header *header;
		const char *code;
		header = cso->prog;
		code = cso->prog + sizeof(struct pipe_llvm_program_header);

		ac_elf_read(code, header->num_bytes, &program->shader.binary);
		if (program->use_code_object_v2) {
			const amd_kernel_code_t *code_object =
				si_compute_get_code_object(program, 0);
			code_object_to_config(code_object, &program->shader.config);
		} else {
			si_shader_binary_read_config(&program->shader.binary,
				     &program->shader.config, 0);
		}
		si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug,
			       PIPE_SHADER_COMPUTE, stderr, true);
		if (si_shader_binary_upload(sctx->screen, &program->shader) < 0) {
			fprintf(stderr, "LLVM failed to upload shader\n");
			FREE(program);
			return NULL;
		}
	}

	return program;
}