Example #1
0
static LLVMTargetMachineRef make_machine(pass_opt_t* opt)
{
  LLVMTargetRef target;
  char* err;

  if(LLVMGetTargetFromTriple(opt->triple, &target, &err) != 0)
  {
    errorf(opt->check.errors, NULL, "couldn't create target: %s", err);
    LLVMDisposeMessage(err);
    return NULL;
  }

  LLVMCodeGenOptLevel opt_level =
    opt->release ? LLVMCodeGenLevelAggressive : LLVMCodeGenLevelNone;

  LLVMRelocMode reloc =
    (opt->pic || opt->library) ? LLVMRelocPIC : LLVMRelocDefault;

  LLVMTargetMachineRef machine = LLVMCreateTargetMachine(target, opt->triple,
    opt->cpu, opt->features, opt_level, reloc, LLVMCodeModelDefault);

  if(machine == NULL)
  {
    errorf(opt->check.errors, NULL, "couldn't create target machine");
    return NULL;
  }

  return machine;
}
Example #2
0
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options)
{
	assert(family >= CHIP_TAHITI);
	char features[256];
	const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
	LLVMTargetRef target = ac_get_llvm_target(triple);

	snprintf(features, sizeof(features),
		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
		 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
		 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
		 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
	
	LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
	                             target,
	                             triple,
	                             ac_get_llvm_processor_name(family),
				     features,
	                             LLVMCodeGenLevelDefault,
	                             LLVMRelocDefault,
	                             LLVMCodeModelDefault);

	return tm;
}
Example #3
0
/* triple:string -> ?cpu:string -> ?features:string
   ?level:CodeGenOptLevel.t -> ?reloc_mode:RelocMode.t
   ?code_model:CodeModel.t -> Target.t -> TargetMachine.t */
CAMLprim value llvm_create_targetmachine_native(value Triple, value CPU,
                  value Features, value OptLevel, value RelocMode,
                  value CodeModel, LLVMTargetRef Target) {
  LLVMTargetMachineRef Machine;
  const char *CPUStr = "", *FeaturesStr = "";
  LLVMCodeGenOptLevel OptLevelEnum = LLVMCodeGenLevelDefault;
  LLVMRelocMode RelocModeEnum = LLVMRelocDefault;
  LLVMCodeModel CodeModelEnum = LLVMCodeModelDefault;

  if(CPU != Val_int(0))
    CPUStr = String_val(Field(CPU, 0));
  if(Features != Val_int(0))
    FeaturesStr = String_val(Field(Features, 0));
  if(OptLevel != Val_int(0))
    OptLevelEnum = Int_val(Field(OptLevel, 0));
  if(RelocMode != Val_int(0))
    RelocModeEnum = Int_val(Field(RelocMode, 0));
  if(CodeModel != Val_int(0))
    CodeModelEnum = Int_val(Field(CodeModel, 0));

  Machine = LLVMCreateTargetMachine(Target, String_val(Triple), CPUStr,
                FeaturesStr, OptLevelEnum, RelocModeEnum, CodeModelEnum);

  return llvm_alloc_targetmachine(Machine);
}
Example #4
0
static LLVMTargetMachineRef
si_create_llvm_target_machine(struct si_screen *sscreen)
{
	const char *triple = "amdgcn--";

	return LLVMCreateTargetMachine(radeon_llvm_get_r600_target(triple), triple,
				       r600_get_llvm_processor_name(sscreen->b.family),
#if HAVE_LLVM >= 0x0308
				       sscreen->b.debug_flags & DBG_SI_SCHED ?
					       SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" :
#endif
					       SI_LLVM_DEFAULT_FEATURES,
				       LLVMCodeGenLevelDefault,
				       LLVMRelocDefault,
				       LLVMCodeModelDefault);
}
Example #5
0
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
{
	assert(family >= CHIP_TAHITI);

	const char *triple = "amdgcn--";
	LLVMTargetRef target = ac_get_llvm_target(triple);
	LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
	                             target,
	                             triple,
	                             ac_get_llvm_processor_name(family),
	                             "+DumpCode,+vgpr-spilling",
	                             LLVMCodeGenLevelDefault,
	                             LLVMRelocDefault,
	                             LLVMCodeModelDefault);

	return tm;
}
Example #6
0
/**
 * Compile an LLVM module to machine code.
 *
 * @returns 0 for success, 1 for failure
 */
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
			  const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
{

	char cpu[CPU_STRING_LEN];
	char fs[FS_STRING_LEN];
	char *err;
	bool dispose_tm = false;
	LLVMContextRef llvm_ctx;
	unsigned rval = 0;
	LLVMMemoryBufferRef out_buffer;
	unsigned buffer_size;
	const char *buffer_data;
	char triple[TRIPLE_STRING_LEN];
	LLVMBool mem_err;

	if (!tm) {
		strncpy(triple, "r600--", TRIPLE_STRING_LEN);
		LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
		if (!target) {
			return 1;
		}
		strncpy(cpu, gpu_family, CPU_STRING_LEN);
		memset(fs, 0, sizeof(fs));
		if (dump) {
			strncpy(fs, "+DumpCode", FS_STRING_LEN);
		}
		tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
						  LLVMCodeModelDefault);
		dispose_tm = true;
	}
	if (dump) {
		LLVMDumpModule(M);
	}
	/* Setup Diagnostic Handler*/
	llvm_ctx = LLVMGetModuleContext(M);

#if HAVE_LLVM >= 0x0305
	LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
#endif
	rval = 0;

	/* Compile IR*/
	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
								 &out_buffer);

	/* Process Errors/Warnings */
	if (mem_err) {
		fprintf(stderr, "%s: %s", __FUNCTION__, err);
		FREE(err);
		LLVMDisposeTargetMachine(tm);
		return 1;
	}

	if (0 != rval) {
		fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__);
	}

	/* Extract Shader Code*/
	buffer_size = LLVMGetBufferSize(out_buffer);
	buffer_data = LLVMGetBufferStart(out_buffer);

	radeon_elf_read(buffer_data, buffer_size, binary, dump);

	/* Clean up */
	LLVMDisposeMemoryBuffer(out_buffer);

	if (dispose_tm) {
		LLVMDisposeTargetMachine(tm);
	}
	return rval;
}
Example #7
0
static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                              void *priv, unsigned flags)
{
	struct si_context *sctx = CALLOC_STRUCT(si_context);
	struct si_screen* sscreen = (struct si_screen *)screen;
	struct radeon_winsys *ws = sscreen->b.ws;
	LLVMTargetRef r600_target;
	const char *triple = "amdgcn--";
	int shader, i;

	if (!sctx)
		return NULL;

	if (sscreen->b.debug_flags & DBG_CHECK_VM)
		flags |= PIPE_CONTEXT_DEBUG;

	sctx->b.b.screen = screen; /* this must be set first */
	sctx->b.b.priv = priv;
	sctx->b.b.destroy = si_destroy_context;
	sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;

	if (!r600_common_context_init(&sctx->b, &sscreen->b))
		goto fail;

	if (sscreen->b.info.drm_major == 3)
		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;

	si_init_blit_functions(sctx);
	si_init_compute_functions(sctx);
	si_init_cp_dma_functions(sctx);
	si_init_debug_functions(sctx);

	if (sscreen->b.info.has_uvd) {
		sctx->b.b.create_video_codec = si_uvd_create_decoder;
		sctx->b.b.create_video_buffer = si_video_buffer_create;
	} else {
		sctx->b.b.create_video_codec = vl_create_decoder;
		sctx->b.b.create_video_buffer = vl_video_buffer_create;
	}

	sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
				       si_context_gfx_flush, sctx);

	if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
		sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
		if (!sctx->ce_ib)
			goto fail;

		if (ws->cs_add_const_preamble_ib) {
			sctx->ce_preamble_ib =
			           ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);

			if (!sctx->ce_preamble_ib)
				goto fail;
		}

		sctx->ce_suballocator =
				u_suballocator_create(&sctx->b.b, 1024 * 1024,
						      64, PIPE_BIND_CUSTOM,
						      PIPE_USAGE_DEFAULT, FALSE);
		if (!sctx->ce_suballocator)
			goto fail;
	}

	sctx->b.gfx.flush = si_context_gfx_flush;

	/* Border colors. */
	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
					  sizeof(*sctx->border_color_table));
	if (!sctx->border_color_table)
		goto fail;

	sctx->border_color_buffer = (struct r600_resource*)
		pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT,
				   SI_MAX_BORDER_COLORS *
				   sizeof(*sctx->border_color_table));
	if (!sctx->border_color_buffer)
		goto fail;

	sctx->border_color_map =
		ws->buffer_map(sctx->border_color_buffer->buf,
			       NULL, PIPE_TRANSFER_WRITE);
	if (!sctx->border_color_map)
		goto fail;

	si_init_all_descriptors(sctx);
	si_init_state_functions(sctx);
	si_init_shader_functions(sctx);

	if (sctx->b.chip_class >= CIK)
		cik_init_sdma_functions(sctx);
	else
		si_init_dma_functions(sctx);

	if (sscreen->b.debug_flags & DBG_FORCE_DMA)
		sctx->b.b.resource_copy_region = sctx->b.dma_copy;

	sctx->blitter = util_blitter_create(&sctx->b.b);
	if (sctx->blitter == NULL)
		goto fail;
	sctx->blitter->draw_rectangle = r600_draw_rectangle;

	sctx->sample_mask.sample_mask = 0xffff;

	/* these must be last */
	si_begin_new_cs(sctx);
	r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */

	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
	 * with a NULL buffer). We need to use a dummy buffer instead. */
	if (sctx->b.chip_class == CIK) {
		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
								 PIPE_USAGE_DEFAULT, 16);
		if (!sctx->null_const_buf.buffer)
			goto fail;
		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
							      &sctx->null_const_buf);
			}
		}

		/* Clear the NULL constant buffer, because loads should return zeros. */
		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
				     sctx->null_const_buf.buffer->width0, 0,
				     R600_COHERENCY_SHADER);
	}

	/* XXX: This is the maximum value allowed.  I'm not sure how to compute
	 * this for non-cs shaders.  Using the wrong value here can result in
	 * GPU lockups, but the maximum value seems to always work.
	 */
	sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;

	/* Initialize LLVM TargetMachine */
	r600_target = radeon_llvm_get_r600_target(triple);
	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
					   r600_get_llvm_processor_name(sscreen->b.family),
#if HAVE_LLVM >= 0x0308
					   sscreen->b.debug_flags & DBG_SI_SCHED ?
					   	"+DumpCode,+vgpr-spilling,+si-scheduler" :
#endif
					   	"+DumpCode,+vgpr-spilling",
					   LLVMCodeGenLevelDefault,
					   LLVMRelocDefault,
					   LLVMCodeModelDefault);

	return &sctx->b.b;
fail:
	fprintf(stderr, "radeonsi: Failed to create a context.\n");
	si_destroy_context(&sctx->b.b);
	return NULL;
}
Example #8
0
/**
 * Compile an LLVM module to machine code.
 *
 * @returns 0 for success, 1 for failure
 */
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_llvm_binary *binary,
					  const char * gpu_family, unsigned dump) {

	LLVMTargetRef target;
	LLVMTargetMachineRef tm;
	char cpu[CPU_STRING_LEN];
	char fs[FS_STRING_LEN];
	char *err;
	LLVMMemoryBufferRef out_buffer;
	unsigned buffer_size;
	const char *buffer_data;
	char triple[TRIPLE_STRING_LEN];
	char *elf_buffer;
	Elf *elf;
	Elf_Scn *section = NULL;
	size_t section_str_index;
	LLVMBool r;

	init_r600_target();

	target = get_r600_target();
	if (!target) {
		return 1;
	}

	strncpy(cpu, gpu_family, CPU_STRING_LEN);
	memset(fs, 0, sizeof(fs));
	if (dump) {
		LLVMDumpModule(M);
		strncpy(fs, "+DumpCode", FS_STRING_LEN);
	}
	strncpy(triple, "r600--", TRIPLE_STRING_LEN);
	tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
						  LLVMCodeModelDefault);

	r = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
								 &out_buffer);
	if (r) {
		fprintf(stderr, "%s", err);
		FREE(err);
		return 1;
	}

	buffer_size = LLVMGetBufferSize(out_buffer);
	buffer_data = LLVMGetBufferStart(out_buffer);

	/* One of the libelf implementations
	 * (http://www.mr511.de/software/english.htm) requires calling
	 * elf_version() before elf_memory().
	 */
	elf_version(EV_CURRENT);
	elf_buffer = MALLOC(buffer_size);
	memcpy(elf_buffer, buffer_data, buffer_size);

	elf = elf_memory(elf_buffer, buffer_size);

	elf_getshdrstrndx(elf, &section_str_index);
	binary->disassembled = 0;

	while ((section = elf_nextscn(elf, section))) {
		const char *name;
		Elf_Data *section_data = NULL;
		GElf_Shdr section_header;
		if (gelf_getshdr(section, &section_header) != &section_header) {
			fprintf(stderr, "Failed to read ELF section header\n");
			return 1;
		}
		name = elf_strptr(elf, section_str_index, section_header.sh_name);
		if (!strcmp(name, ".text")) {
			section_data = elf_getdata(section, section_data);
			binary->code_size = section_data->d_size;
			binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
			memcpy(binary->code, section_data->d_buf, binary->code_size);
		} else if (!strcmp(name, ".AMDGPU.config")) {
			section_data = elf_getdata(section, section_data);
			binary->config_size = section_data->d_size;
			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
			memcpy(binary->config, section_data->d_buf, binary->config_size);
		} else if (dump && !strcmp(name, ".AMDGPU.disasm")) {
			binary->disassembled = 1;
			section_data = elf_getdata(section, section_data);
			fprintf(stderr, "\nShader Disassembly:\n\n");
			fprintf(stderr, "%.*s\n", (int)section_data->d_size,
						  (char *)section_data->d_buf);
		}
	}

	LLVMDisposeMemoryBuffer(out_buffer);
	LLVMDisposeTargetMachine(tm);
	return 0;
}
Example #9
0
/*
 * machine_init
 *
 * Initializes the machine context.
 */
machinedef_t *
machine_init (const char *mspec)
{
    machine_ctx_t m;
    machinedef_t *mach;
    char *err;
    LLVMTargetRef target;
    char *machspec = (char *) mspec;
    unsigned long allosize;

    if (machspec == 0) {
        machspec = LLVM_DEFAULT_TARGET_TRIPLE;
    }
    allosize = (sizeof(struct machine_ctx_s) +
                sizeof(struct machinedef_s) +
                strlen(machspec) + 1);
    m = malloc(allosize);
    if (m == 0) return 0;
    memset(m, 0, allosize);
    m->triple = ((char *) m) + (sizeof(struct machine_ctx_s) +
                                sizeof(struct machinedef_s));
    memcpy(m->triple, machspec, strlen(machspec));

    LLVM_NATIVE_TARGETINFO();
    LLVM_NATIVE_TARGET();
    LLVM_NATIVE_TARGETMC();
    LLVM_NATIVE_ASMPRINTER();
    LLVM_NATIVE_ASMPARSER();

    err = 0;
    target = HelperLookupTarget(machspec, &err);
    if (target == 0) {
        if (err != 0) free(err);
        free(m);
        return 0;
    }
    m->target_machine = LLVMCreateTargetMachine(target, (char *)machspec, "", "",
                                                LLVMCodeGenLevelDefault,
                                                LLVMRelocPIC, LLVMCodeModelDefault);
    if (m->target_machine == 0) {
        free(m);
        return 0;
    }
    HelperSetAsmVerbosity(m->target_machine, 1);
    m->llvmctx = LLVMContextCreate();
    if (m->llvmctx == 0) {
        LLVMDisposeTargetMachine(m->target_machine);
        free(m);
        return 0;
    }

    m->is_macho = (strstr(machspec, "darwin") != 0); // XXX

    mach = (machinedef_t *)(m + 1);

    mach->machctx = m;
    mach->bpaddr = sizeof(int *) * 8;
    mach->bpval  = sizeof(long) * 8;
    mach->bpunit = 8;
    mach->charsize_count = 1;
    mach->charsizes[0] = 8;
    mach->flags = MACH_M_SIGNEXT | MACH_M_LTC_INIT;
    mach->max_align = 4;
    mach->reg_count = 16;

    return mach;
    
} /* machine_init */
/**
 * Compile an LLVM module to machine code.
 *
 * @returns 0 for success, 1 for failure
 */
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
			     const char *gpu_family,
			     LLVMTargetMachineRef tm,
			     struct pipe_debug_callback *debug)
{
	struct radeon_llvm_diagnostics diag;
	char cpu[CPU_STRING_LEN];
	char fs[FS_STRING_LEN];
	char *err;
	bool dispose_tm = false;
	LLVMContextRef llvm_ctx;
	LLVMMemoryBufferRef out_buffer;
	unsigned buffer_size;
	const char *buffer_data;
	char triple[TRIPLE_STRING_LEN];
	LLVMBool mem_err;

	diag.debug = debug;
	diag.retval = 0;

	if (!tm) {
		strncpy(triple, "r600--", TRIPLE_STRING_LEN);
		LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
		if (!target) {
			return 1;
		}
		strncpy(cpu, gpu_family, CPU_STRING_LEN);
		memset(fs, 0, sizeof(fs));
		strncpy(fs, "+DumpCode", FS_STRING_LEN);
		tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
						  LLVMCodeModelDefault);
		dispose_tm = true;
	}

	/* Setup Diagnostic Handler*/
	llvm_ctx = LLVMGetModuleContext(M);

	LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag);

	/* Compile IR*/
	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
								 &out_buffer);

	/* Process Errors/Warnings */
	if (mem_err) {
		fprintf(stderr, "%s: %s", __FUNCTION__, err);
		pipe_debug_message(debug, SHADER_INFO,
				   "LLVM emit error: %s", err);
		FREE(err);
		diag.retval = 1;
		goto out;
	}

	/* Extract Shader Code*/
	buffer_size = LLVMGetBufferSize(out_buffer);
	buffer_data = LLVMGetBufferStart(out_buffer);

	radeon_elf_read(buffer_data, buffer_size, binary);

	/* Clean up */
	LLVMDisposeMemoryBuffer(out_buffer);

out:
	if (dispose_tm) {
		LLVMDisposeTargetMachine(tm);
	}
	if (diag.retval != 0)
		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
	return diag.retval;
}
Example #11
0
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv)
{
	struct si_context *sctx = CALLOC_STRUCT(si_context);
	struct si_screen* sscreen = (struct si_screen *)screen;
	struct radeon_winsys *ws = sscreen->b.ws;
	LLVMTargetRef r600_target;
#if HAVE_LLVM >= 0x0306
	const char *triple = "amdgcn--";
#endif
	int shader, i;

	if (sctx == NULL)
		return NULL;

	sctx->b.b.screen = screen; /* this must be set first */
	sctx->b.b.priv = priv;
	sctx->b.b.destroy = si_destroy_context;
	sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */

	if (!r600_common_context_init(&sctx->b, &sscreen->b))
		goto fail;

	if (sscreen->b.info.drm_major == 3)
		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;

	si_init_blit_functions(sctx);
	si_init_compute_functions(sctx);
	si_init_cp_dma_functions(sctx);

	if (sscreen->b.info.has_uvd) {
		sctx->b.b.create_video_codec = si_uvd_create_decoder;
		sctx->b.b.create_video_buffer = si_video_buffer_create;
	} else {
		sctx->b.b.create_video_codec = vl_create_decoder;
		sctx->b.b.create_video_buffer = vl_video_buffer_create;
	}

	sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
					     sctx, sscreen->b.trace_bo ?
						sscreen->b.trace_bo->cs_buf : NULL);
	sctx->b.rings.gfx.flush = si_context_gfx_flush;

	si_init_all_descriptors(sctx);

	/* Initialize cache_flush. */
	sctx->cache_flush = si_atom_cache_flush;
	sctx->atoms.s.cache_flush = &sctx->cache_flush;

	sctx->msaa_sample_locs = si_atom_msaa_sample_locs;
	sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs;

	sctx->msaa_config = si_atom_msaa_config;
	sctx->atoms.s.msaa_config = &sctx->msaa_config;

	sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
	sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;

	si_init_state_functions(sctx);
	si_init_shader_functions(sctx);

	if (sscreen->b.debug_flags & DBG_FORCE_DMA)
		sctx->b.b.resource_copy_region = sctx->b.dma_copy;

	sctx->blitter = util_blitter_create(&sctx->b.b);
	if (sctx->blitter == NULL)
		goto fail;
	sctx->blitter->draw_rectangle = r600_draw_rectangle;

	/* these must be last */
	si_begin_new_cs(sctx);
	r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */

	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
	 * with a NULL buffer). We need to use a dummy buffer instead. */
	if (sctx->b.chip_class == CIK) {
		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
								 PIPE_USAGE_DEFAULT, 16);
		if (!sctx->null_const_buf.buffer)
			goto fail;
		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
							      &sctx->null_const_buf);
			}
		}

		/* Clear the NULL constant buffer, because loads should return zeros. */
		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
				     sctx->null_const_buf.buffer->width0, 0, false);
	}

	/* XXX: This is the maximum value allowed.  I'm not sure how to compute
	 * this for non-cs shaders.  Using the wrong value here can result in
	 * GPU lockups, but the maximum value seems to always work.
	 */
	sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;

#if HAVE_LLVM >= 0x0306
	/* Initialize LLVM TargetMachine */
	r600_target = radeon_llvm_get_r600_target(triple);
	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
					   r600_get_llvm_processor_name(sscreen->b.family),
					   "+DumpCode,+vgpr-spilling",
					   LLVMCodeGenLevelDefault,
					   LLVMRelocDefault,
					   LLVMCodeModelDefault);
#endif

	return &sctx->b.b;
fail:
	si_destroy_context(&sctx->b.b);
	return NULL;
}