예제 #1
0
static void si_handle_env_var_force_family(struct si_screen *sscreen)
{
	const char *family = debug_get_option("SI_FORCE_FAMILY", NULL);
	unsigned i;

	if (!family)
		return;

	for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
		if (!strcmp(family, r600_get_llvm_processor_name(i))) {
			/* Override family and chip_class. */
			sscreen->b.family = sscreen->b.info.family = i;

			if (i >= CHIP_TONGA)
				sscreen->b.chip_class = sscreen->b.info.chip_class = VI;
			else if (i >= CHIP_BONAIRE)
				sscreen->b.chip_class = sscreen->b.info.chip_class = CIK;
			else
				sscreen->b.chip_class = sscreen->b.info.chip_class = SI;

			/* Don't submit any IBs. */
			setenv("RADEON_NOOP", "1", 1);
			return;
		}
	}

	fprintf(stderr, "radeonsi: Unknown family: %s\n", family);
	exit(1);
}
예제 #2
0
파일: si_pipe.c 프로젝트: hakzsam/mesa
static LLVMTargetMachineRef
si_create_llvm_target_machine(struct si_screen *sscreen)
{
	const char *triple = "amdgcn--";

	return LLVMCreateTargetMachine(radeon_llvm_get_r600_target(triple), triple,
				       r600_get_llvm_processor_name(sscreen->b.family),
#if HAVE_LLVM >= 0x0308
				       sscreen->b.debug_flags & DBG_SI_SCHED ?
					       SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" :
#endif
					       SI_LLVM_DEFAULT_FEATURES,
				       LLVMCodeGenLevelDefault,
				       LLVMRelocDefault,
				       LLVMCodeModelDefault);
}
unsigned r600_llvm_compile(
	LLVMModuleRef mod,
	enum radeon_family family,
	struct r600_bytecode *bc,
	boolean *use_kill,
	unsigned dump)
{
	unsigned r;
	struct radeon_shader_binary binary;
	const char * gpu_family = r600_get_llvm_processor_name(family);

	memset(&binary, 0, sizeof(struct radeon_shader_binary));
	r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);

	r = r600_create_shader(bc, &binary, use_kill);

	FREE(binary.code);
	FREE(binary.config);
	FREE(binary.rodata);
	FREE(binary.global_symbol_offsets);

	return r;
}
예제 #4
0
파일: r600_llvm.c 프로젝트: MartaLo/mesa
unsigned r600_llvm_compile(
	LLVMModuleRef mod,
	enum radeon_family family,
	struct r600_bytecode *bc,
	boolean *use_kill,
	unsigned dump,
	struct pipe_debug_callback *debug)
{
	unsigned r;
	struct radeon_shader_binary binary;
	const char * gpu_family = r600_get_llvm_processor_name(family);

	radeon_shader_binary_init(&binary);
	if (dump)
		LLVMDumpModule(mod);
	r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);

	r = r600_create_shader(bc, &binary, use_kill);

	radeon_shader_binary_clean(&binary);

	return r;
}
예제 #5
0
static int r600_get_compute_param(struct pipe_screen *screen,
        enum pipe_compute_cap param,
        void *ret)
{
	struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;

	//TODO: select these params by asic
	switch (param) {
	case PIPE_COMPUTE_CAP_IR_TARGET: {
		const char *gpu;
		const char *triple;
		if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) {
			triple = "r600--";
		} else {
			triple = "amdgcn--";
		}
		switch(rscreen->family) {
		/* Clang < 3.6 is missing Hainan in its list of
		 * GPUs, so we need to use the name of a similar GPU.
		 */
#if HAVE_LLVM < 0x0306
		case CHIP_HAINAN:
			gpu = "oland";
			break;
#endif
		default:
			gpu = r600_get_llvm_processor_name(rscreen->family);
			break;
		}
		if (ret) {
			sprintf(ret, "%s-%s", gpu, triple);
		}
		/* +2 for dash and terminating NIL byte */
		return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
	}
	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
		if (ret) {
			uint64_t *grid_dimension = ret;
			grid_dimension[0] = 3;
		}
		return 1 * sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
		if (ret) {
			uint64_t *grid_size = ret;
			grid_size[0] = 65535;
			grid_size[1] = 65535;
			grid_size[2] = 1;
		}
		return 3 * sizeof(uint64_t) ;

	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
		if (ret) {
			uint64_t *block_size = ret;
			block_size[0] = 256;
			block_size[1] = 256;
			block_size[2] = 256;
		}
		return 3 * sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
		if (ret) {
			uint64_t *max_threads_per_block = ret;
			*max_threads_per_block = 256;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
		if (ret) {
			uint64_t *max_global_size = ret;
			uint64_t max_mem_alloc_size;

			r600_get_compute_param(screen,
				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
				&max_mem_alloc_size);

			/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
			 * 1/4 of the MAX_GLOBAL_SIZE.  Since the
			 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
			 * make sure we never report more than
			 * 4 * MAX_MEM_ALLOC_SIZE.
			 */
			*max_global_size = MIN2(4 * max_mem_alloc_size,
				rscreen->info.gart_size +
				rscreen->info.vram_size);
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
		if (ret) {
			uint64_t *max_local_size = ret;
			/* Value reported by the closed source driver. */
			*max_local_size = 32768;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
		if (ret) {
			uint64_t *max_input_size = ret;
			/* Value reported by the closed source driver. */
			*max_input_size = 1024;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
		if (ret) {
			uint64_t *max_mem_alloc_size = ret;

			/* XXX: The limit in older kernels is 256 MB.  We
			 * should add a query here for newer kernels.
			 */
			*max_mem_alloc_size = 256 * 1024 * 1024;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
		if (ret) {
			uint32_t *max_clock_frequency = ret;
			*max_clock_frequency = rscreen->info.max_sclk;
		}
		return sizeof(uint32_t);

	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
		if (ret) {
			uint32_t *max_compute_units = ret;
			*max_compute_units = rscreen->info.max_compute_units;
		}
		return sizeof(uint32_t);

	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
		if (ret) {
			uint32_t *images_supported = ret;
			*images_supported = 0;
		}
		return sizeof(uint32_t);
	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
		break; /* unused */
	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
		if (ret) {
			uint32_t *subgroup_size = ret;
			*subgroup_size = r600_wavefront_size(rscreen->family);
		}
		return sizeof(uint32_t);
	}

        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
        return 0;
}
예제 #6
0
static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                              void *priv, unsigned flags)
{
	struct si_context *sctx = CALLOC_STRUCT(si_context);
	struct si_screen* sscreen = (struct si_screen *)screen;
	struct radeon_winsys *ws = sscreen->b.ws;
	LLVMTargetRef r600_target;
	const char *triple = "amdgcn--";
	int shader, i;

	if (!sctx)
		return NULL;

	if (sscreen->b.debug_flags & DBG_CHECK_VM)
		flags |= PIPE_CONTEXT_DEBUG;

	sctx->b.b.screen = screen; /* this must be set first */
	sctx->b.b.priv = priv;
	sctx->b.b.destroy = si_destroy_context;
	sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;

	if (!r600_common_context_init(&sctx->b, &sscreen->b))
		goto fail;

	if (sscreen->b.info.drm_major == 3)
		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;

	si_init_blit_functions(sctx);
	si_init_compute_functions(sctx);
	si_init_cp_dma_functions(sctx);
	si_init_debug_functions(sctx);

	if (sscreen->b.info.has_uvd) {
		sctx->b.b.create_video_codec = si_uvd_create_decoder;
		sctx->b.b.create_video_buffer = si_video_buffer_create;
	} else {
		sctx->b.b.create_video_codec = vl_create_decoder;
		sctx->b.b.create_video_buffer = vl_video_buffer_create;
	}

	sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
				       si_context_gfx_flush, sctx);

	if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
		sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
		if (!sctx->ce_ib)
			goto fail;

		if (ws->cs_add_const_preamble_ib) {
			sctx->ce_preamble_ib =
			           ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);

			if (!sctx->ce_preamble_ib)
				goto fail;
		}

		sctx->ce_suballocator =
				u_suballocator_create(&sctx->b.b, 1024 * 1024,
						      64, PIPE_BIND_CUSTOM,
						      PIPE_USAGE_DEFAULT, FALSE);
		if (!sctx->ce_suballocator)
			goto fail;
	}

	sctx->b.gfx.flush = si_context_gfx_flush;

	/* Border colors. */
	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
					  sizeof(*sctx->border_color_table));
	if (!sctx->border_color_table)
		goto fail;

	sctx->border_color_buffer = (struct r600_resource*)
		pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
				   SI_MAX_BORDER_COLORS *
				   sizeof(*sctx->border_color_table));
	if (!sctx->border_color_buffer)
		goto fail;

	sctx->border_color_map =
		ws->buffer_map(sctx->border_color_buffer->buf,
			       NULL, PIPE_TRANSFER_WRITE);
	if (!sctx->border_color_map)
		goto fail;

	si_init_all_descriptors(sctx);
	si_init_state_functions(sctx);
	si_init_shader_functions(sctx);

	if (sctx->b.chip_class >= CIK)
		cik_init_sdma_functions(sctx);
	else
		si_init_dma_functions(sctx);

	if (sscreen->b.debug_flags & DBG_FORCE_DMA)
		sctx->b.b.resource_copy_region = sctx->b.dma_copy;

	sctx->blitter = util_blitter_create(&sctx->b.b);
	if (sctx->blitter == NULL)
		goto fail;
	sctx->blitter->draw_rectangle = r600_draw_rectangle;

	sctx->sample_mask.sample_mask = 0xffff;

	/* these must be last */
	si_begin_new_cs(sctx);
	r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */

	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
	 * with a NULL buffer). We need to use a dummy buffer instead. */
	if (sctx->b.chip_class == CIK) {
		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
								 PIPE_USAGE_DEFAULT, 16);
		if (!sctx->null_const_buf.buffer)
			goto fail;
		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
							      &sctx->null_const_buf);
			}
		}

		/* Clear the NULL constant buffer, because loads should return zeros. */
		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
				     sctx->null_const_buf.buffer->width0, 0,
				     R600_COHERENCY_SHADER);
	}

	/* XXX: This is the maximum value allowed.  I'm not sure how to compute
	 * this for non-cs shaders.  Using the wrong value here can result in
	 * GPU lockups, but the maximum value seems to always work.
	 */
	sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;

	/* Initialize LLVM TargetMachine */
	r600_target = radeon_llvm_get_r600_target(triple);
	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
					   r600_get_llvm_processor_name(sscreen->b.family),
#if HAVE_LLVM >= 0x0308
					   sscreen->b.debug_flags & DBG_SI_SCHED ?
					   	"+DumpCode,+vgpr-spilling,+si-scheduler" :
#endif
					   	"+DumpCode,+vgpr-spilling",
					   LLVMCodeGenLevelDefault,
					   LLVMRelocDefault,
					   LLVMCodeModelDefault);

	return &sctx->b.b;
fail:
	fprintf(stderr, "radeonsi: Failed to create a context.\n");
	si_destroy_context(&sctx->b.b);
	return NULL;
}
static int r600_get_compute_param(struct pipe_screen *screen,
        enum pipe_shader_ir ir_type,
        enum pipe_compute_cap param,
        void *ret)
{
	struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;

	//TODO: select these params by asic
	switch (param) {
	case PIPE_COMPUTE_CAP_IR_TARGET: {
		const char *gpu;
		const char *triple = "r600--";
		gpu = r600_get_llvm_processor_name(rscreen->family);
		if (ret) {
			sprintf(ret, "%s-%s", gpu, triple);
		}
		/* +2 for dash and terminating NIL byte */
		return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
	}
	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
		if (ret) {
			uint64_t *grid_dimension = ret;
			grid_dimension[0] = 3;
		}
		return 1 * sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
		if (ret) {
			uint64_t *grid_size = ret;
			grid_size[0] = 65535;
			grid_size[1] = 65535;
			grid_size[2] = 65535;
		}
		return 3 * sizeof(uint64_t) ;

	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
		if (ret) {
			uint64_t *block_size = ret;
			unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
			block_size[0] = threads_per_block;
			block_size[1] = threads_per_block;
			block_size[2] = threads_per_block;
		}
		return 3 * sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
		if (ret) {
			uint64_t *max_threads_per_block = ret;
			*max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
		}
		return sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
		if (ret) {
			uint32_t *address_bits = ret;
			address_bits[0] = 32;
		}
		return 1 * sizeof(uint32_t);

	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
		if (ret) {
			uint64_t *max_global_size = ret;
			uint64_t max_mem_alloc_size;

			r600_get_compute_param(screen, ir_type,
				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
				&max_mem_alloc_size);

			/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
			 * 1/4 of the MAX_GLOBAL_SIZE.  Since the
			 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
			 * make sure we never report more than
			 * 4 * MAX_MEM_ALLOC_SIZE.
			 */
			*max_global_size = MIN2(4 * max_mem_alloc_size,
						MAX2(rscreen->info.gart_size,
						     rscreen->info.vram_size));
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
		if (ret) {
			uint64_t *max_local_size = ret;
			/* Value reported by the closed source driver. */
			*max_local_size = 32768;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
		if (ret) {
			uint64_t *max_input_size = ret;
			/* Value reported by the closed source driver. */
			*max_input_size = 1024;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
		if (ret) {
			uint64_t *max_mem_alloc_size = ret;

			*max_mem_alloc_size = rscreen->info.max_alloc_size;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
		if (ret) {
			uint32_t *max_clock_frequency = ret;
			*max_clock_frequency = rscreen->info.max_shader_clock;
		}
		return sizeof(uint32_t);

	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
		if (ret) {
			uint32_t *max_compute_units = ret;
			*max_compute_units = rscreen->info.num_good_compute_units;
		}
		return sizeof(uint32_t);

	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
		if (ret) {
			uint32_t *images_supported = ret;
			*images_supported = 0;
		}
		return sizeof(uint32_t);
	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
		break; /* unused */
	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
		if (ret) {
			uint32_t *subgroup_size = ret;
			*subgroup_size = r600_wavefront_size(rscreen->family);
		}
		return sizeof(uint32_t);
	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
		if (ret) {
			uint64_t *max_variable_threads_per_block = ret;
			*max_variable_threads_per_block = 0;
		}
		return sizeof(uint64_t);
	}

        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
        return 0;
}
예제 #8
0
static int r600_get_compute_param(struct pipe_screen *screen,
                                  enum pipe_compute_cap param,
                                  void *ret)
{
    struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;

    //TODO: select these params by asic
    switch (param) {
    case PIPE_COMPUTE_CAP_IR_TARGET: {
        const char *gpu = r600_get_llvm_processor_name(rscreen->family);
        if (ret) {
            sprintf(ret, "%s-r600--", gpu);
        }
        return (8 + strlen(gpu)) * sizeof(char);
    }
    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
        if (ret) {
            uint64_t *grid_dimension = ret;
            grid_dimension[0] = 3;
        }
        return 1 * sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
        if (ret) {
            uint64_t *grid_size = ret;
            grid_size[0] = 65535;
            grid_size[1] = 65535;
            grid_size[2] = 1;
        }
        return 3 * sizeof(uint64_t) ;

    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
        if (ret) {
            uint64_t *block_size = ret;
            block_size[0] = 256;
            block_size[1] = 256;
            block_size[2] = 256;
        }
        return 3 * sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
        if (ret) {
            uint64_t *max_threads_per_block = ret;
            *max_threads_per_block = 256;
        }
        return sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
        if (ret) {
            uint64_t *max_global_size = ret;
            /* XXX: This is what the proprietary driver reports, we
             * may want to use a different value. */
            /* XXX: Not sure what to put here for SI. */
            if (rscreen->chip_class >= SI)
                *max_global_size = 2000000000;
            else
                *max_global_size = 201326592;
        }
        return sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
        if (ret) {
            uint64_t *max_local_size = ret;
            /* Value reported by the closed source driver. */
            *max_local_size = 32768;
        }
        return sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
        if (ret) {
            uint64_t *max_input_size = ret;
            /* Value reported by the closed source driver. */
            *max_input_size = 1024;
        }
        return sizeof(uint64_t);

    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
        if (ret) {
            uint64_t max_global_size;
            uint64_t *max_mem_alloc_size = ret;
            r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size);
            /* OpenCL requres this value be at least
             * max(MAX_GLOBAL_SIZE / 4, 128 * 1024 *1024)
             * I'm really not sure what value to report here, but
             * MAX_GLOBAL_SIZE / 4 seems resonable.
             */
            *max_mem_alloc_size = max_global_size / 4;
        }
        return sizeof(uint64_t);

    default:
        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
        return 0;
    }
}
예제 #9
0
static int r600_get_compute_param(struct pipe_screen *screen,
        enum pipe_compute_cap param,
        void *ret)
{
	struct r600_screen *rscreen = (struct r600_screen *)screen;
	//TODO: select these params by asic
	switch (param) {
	case PIPE_COMPUTE_CAP_IR_TARGET: {
		const char *gpu = r600_get_llvm_processor_name(rscreen->b.family);
	        if (ret) {
			sprintf(ret, "%s-r600--", gpu);
		}
		return (8 + strlen(gpu)) * sizeof(char);
	}
	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
		if (ret) {
			uint64_t * grid_dimension = ret;
			grid_dimension[0] = 3;
		}
		return 1 * sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
		if (ret) {
			uint64_t * grid_size = ret;
			grid_size[0] = 65535;
			grid_size[1] = 65535;
			grid_size[2] = 1;
		}
		return 3 * sizeof(uint64_t) ;

	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
		if (ret) {
			uint64_t * block_size = ret;
			block_size[0] = 256;
			block_size[1] = 256;
			block_size[2] = 256;
		}
		return 3 * sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
		if (ret) {
			uint64_t * max_threads_per_block = ret;
			*max_threads_per_block = 256;
		}
		return sizeof(uint64_t);

	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
		if (ret) {
			uint64_t *max_global_size = ret;
			/* XXX: Not sure what to put here. */
			*max_global_size = 2000000000;
		}
		return sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
		if (ret) {
			uint64_t *max_local_size = ret;
			/* Value reported by the closed source driver. */
			*max_local_size = 32768;
		}
		return sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
		if (ret) {
			uint64_t *max_input_size = ret;
			/* Value reported by the closed source driver. */
			*max_input_size = 1024;
		}
		return sizeof(uint64_t);
	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
		if (ret) {
			uint64_t max_global_size;
			uint64_t *max_mem_alloc_size = ret;
			r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size);
			*max_mem_alloc_size = max_global_size / 4;
		}
		return sizeof(uint64_t);
	default:
		fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
		return 0;
	}
}
예제 #10
0
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv)
{
	struct si_context *sctx = CALLOC_STRUCT(si_context);
	struct si_screen* sscreen = (struct si_screen *)screen;
	struct radeon_winsys *ws = sscreen->b.ws;
	LLVMTargetRef r600_target;
#if HAVE_LLVM >= 0x0306
	const char *triple = "amdgcn--";
#endif
	int shader, i;

	if (sctx == NULL)
		return NULL;

	sctx->b.b.screen = screen; /* this must be set first */
	sctx->b.b.priv = priv;
	sctx->b.b.destroy = si_destroy_context;
	sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */

	if (!r600_common_context_init(&sctx->b, &sscreen->b))
		goto fail;

	if (sscreen->b.info.drm_major == 3)
		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;

	si_init_blit_functions(sctx);
	si_init_compute_functions(sctx);
	si_init_cp_dma_functions(sctx);

	if (sscreen->b.info.has_uvd) {
		sctx->b.b.create_video_codec = si_uvd_create_decoder;
		sctx->b.b.create_video_buffer = si_video_buffer_create;
	} else {
		sctx->b.b.create_video_codec = vl_create_decoder;
		sctx->b.b.create_video_buffer = vl_video_buffer_create;
	}

	sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
					     sctx, sscreen->b.trace_bo ?
						sscreen->b.trace_bo->cs_buf : NULL);
	sctx->b.rings.gfx.flush = si_context_gfx_flush;

	si_init_all_descriptors(sctx);

	/* Initialize cache_flush. */
	sctx->cache_flush = si_atom_cache_flush;
	sctx->atoms.s.cache_flush = &sctx->cache_flush;

	sctx->msaa_sample_locs = si_atom_msaa_sample_locs;
	sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs;

	sctx->msaa_config = si_atom_msaa_config;
	sctx->atoms.s.msaa_config = &sctx->msaa_config;

	sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
	sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;

	si_init_state_functions(sctx);
	si_init_shader_functions(sctx);

	if (sscreen->b.debug_flags & DBG_FORCE_DMA)
		sctx->b.b.resource_copy_region = sctx->b.dma_copy;

	sctx->blitter = util_blitter_create(&sctx->b.b);
	if (sctx->blitter == NULL)
		goto fail;
	sctx->blitter->draw_rectangle = r600_draw_rectangle;

	/* these must be last */
	si_begin_new_cs(sctx);
	r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */

	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
	 * with a NULL buffer). We need to use a dummy buffer instead. */
	if (sctx->b.chip_class == CIK) {
		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
								 PIPE_USAGE_DEFAULT, 16);
		if (!sctx->null_const_buf.buffer)
			goto fail;
		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
							      &sctx->null_const_buf);
			}
		}

		/* Clear the NULL constant buffer, because loads should return zeros. */
		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
				     sctx->null_const_buf.buffer->width0, 0, false);
	}

	/* XXX: This is the maximum value allowed.  I'm not sure how to compute
	 * this for non-cs shaders.  Using the wrong value here can result in
	 * GPU lockups, but the maximum value seems to always work.
	 */
	sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;

#if HAVE_LLVM >= 0x0306
	/* Initialize LLVM TargetMachine */
	r600_target = radeon_llvm_get_r600_target(triple);
	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
					   r600_get_llvm_processor_name(sscreen->b.family),
					   "+DumpCode,+vgpr-spilling",
					   LLVMCodeGenLevelDefault,
					   LLVMRelocDefault,
					   LLVMCodeModelDefault);
#endif

	return &sctx->b.b;
fail:
	si_destroy_context(&sctx->b.b);
	return NULL;
}