llvm::Function* BackendLLVM::build_llvm_instance (bool groupentry) { // Make a layer function: void layer_func(ShaderGlobals*, GroupData*) // Note that the GroupData* is passed as a void*. std::string unique_layer_name = Strutil::format ("%s_%d", inst()->layername(), inst()->id()); ll.current_function ( ll.make_function (unique_layer_name, !groupentry, // fastcall for non-entry layer functions ll.type_void(), // return type llvm_type_sg_ptr(), llvm_type_groupdata_ptr())); // Get shader globals and groupdata pointers m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++; m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++; llvm::BasicBlock *entry_bb = ll.new_basic_block (unique_layer_name); m_exit_instance_block = NULL; // Set up a new IR builder ll.new_builder (entry_bb); #if 0 /* helpful for debuggin */ if (llvm_debug() && groupentry) llvm_gen_debug_printf (Strutil::format("\n\n\n\nGROUP! %s",group().name())); if (llvm_debug()) llvm_gen_debug_printf (Strutil::format("enter layer %s %s", inst()->layername(), inst()->shadername())); #endif if (shadingsys().countlayerexecs()) ll.call_function ("osl_incr_layers_executed", sg_void_ptr()); if (groupentry) { if (m_num_used_layers > 1) { // If this is the group entry point, clear all the "layer // executed" bits. If it's not the group entry (but rather is // an upstream node), then set its bit! int sz = (m_num_used_layers + 3) & (~3); // round up to 32 bits ll.op_memset (ll.void_ptr(layer_run_ptr(0)), 0, sz, 4 /*align*/); } // Group entries also need to allot space for ALL layers' params // that are closures (to avoid weird order of layer eval problems). for (int i = 0; i < group().nlayers(); ++i) { ShaderInstance *gi = group()[i]; if (gi->unused()) continue; FOREACH_PARAM (Symbol &sym, gi) { if (sym.typespec().is_closure_based()) { int arraylen = std::max (1, sym.typespec().arraylength()); llvm::Value *val = ll.constant_ptr(NULL, ll.type_void_ptr()); for (int a = 0; a < arraylen; ++a) { llvm::Value *arrind = sym.typespec().is_array() ? ll.constant(a) : NULL; llvm_store_value (val, sym, 0, arrind, 0); } } } // Unconditionally execute earlier layers that are not lazy if (! gi->run_lazily() && i < group().nlayers()-1) llvm_call_layer (i, true /* unconditionally run */); } } // Setup the symbols m_named_values.clear (); BOOST_FOREACH (Symbol &s, inst()->symbols()) { // Skip constants -- we always inline scalar constants, and for // array constants we will just use the pointers to the copy of // the constant that belongs to the instance. if (s.symtype() == SymTypeConst) continue; // Skip structure placeholders if (s.typespec().is_structure()) continue; // Allocate space for locals, temps, aggregate constants if (s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp || s.symtype() == SymTypeConst) getOrAllocateLLVMSymbol (s); // Set initial value for constants, closures, and strings that are // not parameters. if (s.symtype() != SymTypeParam && s.symtype() != SymTypeOutputParam && s.symtype() != SymTypeGlobal && (s.is_constant() || s.typespec().is_closure_based() || s.typespec().is_string_based() || ((s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp) && shadingsys().debug_uninit()))) llvm_assign_initial_value (s); // If debugnan is turned on, globals check that their values are ok if (s.symtype() == SymTypeGlobal && shadingsys().debug_nan()) { TypeDesc t = s.typespec().simpletype(); if (t.basetype == TypeDesc::FLOAT) { // just check float-based types int ncomps = t.numelements() * t.aggregate; llvm::Value *args[] = { ll.constant(ncomps), llvm_void_ptr(s), ll.constant((int)s.has_derivs()), sg_void_ptr(), ll.constant(ustring(inst()->shadername())), ll.constant(0), ll.constant(s.name()), ll.constant(0), ll.constant(ncomps), ll.constant("<none>") }; ll.call_function ("osl_naninf_check", args, 10); } } } // make a second pass for the parameters (which may make use of // locals and constants from the first pass) FOREACH_PARAM (Symbol &s, inst()) { // Skip structure placeholders if (s.typespec().is_structure()) continue; // Skip if it's never read and isn't connected if (! s.everread() && ! s.connected_down() && ! s.connected() && ! shadingsys().is_renderer_output(s.name())) continue; // Set initial value for params (may contain init ops) llvm_assign_initial_value (s); } // All the symbols are stack allocated now. // Mark all the basic blocks, including allocating llvm::BasicBlock // records for each. find_basic_blocks (); find_conditionals (); m_layers_already_run.clear (); build_llvm_code (inst()->maincodebegin(), inst()->maincodeend()); if (llvm_has_exit_instance_block()) ll.op_branch (m_exit_instance_block); // also sets insert point // Transfer all of this layer's outputs into the downstream shader's // inputs. for (int layer = this->layer()+1; layer < group().nlayers(); ++layer) { ShaderInstance *child = group()[layer]; for (int c = 0; c < child->nconnections(); ++c) { const Connection &con (child->connection (c)); if (con.srclayer == this->layer()) { ASSERT (con.src.arrayindex == -1 && con.src.channel == -1 && con.dst.arrayindex == -1 && con.dst.channel == -1 && "no support for individual element/channel connection"); Symbol *srcsym (inst()->symbol (con.src.param)); Symbol *dstsym (child->symbol (con.dst.param)); llvm_run_connected_layers (*srcsym, con.src.param); // FIXME -- I'm not sure I understand this. Isn't this // unnecessary if we wrote to the parameter ourself? llvm_assign_impl (*dstsym, *srcsym); } } } // llvm_gen_debug_printf ("done copying connections"); // All done #if 0 /* helpful for debugging */ if (llvm_debug()) llvm_gen_debug_printf (Strutil::format("exit layer %s %s", inst()->layername(), inst()->shadername())); #endif ll.op_return(); if (llvm_debug()) std::cout << "layer_func (" << unique_layer_name << ") "<< this->layer() << "/" << group().nlayers() << " after llvm = " << ll.bitcode_string(ll.current_function()) << "\n"; ll.end_builder(); // clear the builder return ll.current_function(); }
llvm::Function* RuntimeOptimizer::build_llvm_instance (bool groupentry) { // Make a layer function: void layer_func(ShaderGlobals*, GroupData*) // Note that the GroupData* is passed as a void*. std::string unique_layer_name = Strutil::format ("%s_%d", inst()->layername().c_str(), inst()->id()); m_layer_func = llvm::cast<llvm::Function>(m_llvm_module->getOrInsertFunction(unique_layer_name, llvm_type_void(), llvm_type_sg_ptr(), llvm_type_groupdata_ptr(), NULL)); // Use fastcall for non-entry layer functions to encourage register calling if (!groupentry) m_layer_func->setCallingConv(llvm::CallingConv::Fast); llvm::Function::arg_iterator arg_it = m_layer_func->arg_begin(); // Get shader globals pointer m_llvm_shaderglobals_ptr = arg_it++; m_llvm_groupdata_ptr = arg_it++; llvm::BasicBlock *entry_bb = llvm_new_basic_block (unique_layer_name); // Set up a new IR builder delete m_builder; m_builder = new llvm::IRBuilder<> (entry_bb); // llvm_gen_debug_printf (std::string("enter layer ")+inst()->shadername()); if (groupentry) { if (m_num_used_layers > 1) { // If this is the group entry point, clear all the "layer // executed" bits. If it's not the group entry (but rather is // an upstream node), then set its bit! int sz = (m_num_used_layers + 3) & (~3); // round up to 32 bits llvm_memset (llvm_void_ptr(layer_run_ptr(0)), 0, sz, 4 /*align*/); } // Group entries also need to allot space for ALL layers' params // that are closures (to avoid weird order of layer eval problems). for (int i = 0; i < group().nlayers(); ++i) { ShaderInstance *gi = group()[i]; if (gi->unused()) continue; FOREACH_PARAM (Symbol &sym, gi) { if (sym.typespec().is_closure_based()) { int arraylen = std::max (1, sym.typespec().arraylength()); llvm::Value *val = llvm_constant_ptr(NULL, llvm_type_void_ptr()); for (int a = 0; a < arraylen; ++a) { llvm::Value *arrind = sym.typespec().is_array() ? llvm_constant(a) : NULL; llvm_store_value (val, sym, 0, arrind, 0); } } } // Unconditionally execute earlier layers that are not lazy if (! gi->run_lazily() && i < group().nlayers()-1) llvm_call_layer (i, true /* unconditionally run */); } } // Setup the symbols m_named_values.clear (); BOOST_FOREACH (Symbol &s, inst()->symbols()) { // Skip non-array constants -- we always inline them if (s.symtype() == SymTypeConst && !s.typespec().is_array()) continue; // Skip structure placeholders if (s.typespec().is_structure()) continue; // Allocate space for locals, temps, aggregate constants if (s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp || s.symtype() == SymTypeConst) getOrAllocateLLVMSymbol (s); // Set initial value for constants, closures, and strings that are // not parameters. if (s.symtype() != SymTypeParam && s.symtype() != SymTypeOutputParam && (s.is_constant() || s.typespec().is_closure_based() || s.typespec().is_string_based())) llvm_assign_initial_value (s); // If debugnan is turned on, globals check that their values are ok if (s.symtype() == SymTypeGlobal && m_shadingsys.debug_nan()) { TypeDesc t = s.typespec().simpletype(); if (t.basetype == TypeDesc::FLOAT) { // just check float-based types int ncomps = t.numelements() * t.aggregate; llvm::Value *args[] = { llvm_constant(ncomps), llvm_void_ptr(s), llvm_constant((int)s.has_derivs()), sg_void_ptr(), llvm_constant(ustring(inst()->shadername())), llvm_constant(0), llvm_constant(s.name()) }; llvm_call_function ("osl_naninf_check", args, 7); } } } // make a second pass for the parameters (which may make use of // locals and constants from the first pass) FOREACH_PARAM (Symbol &s, inst()) { // Skip structure placeholders if (s.typespec().is_structure()) continue; // Skip if it's never read and isn't connected if (! s.everread() && ! s.connected_down() && ! s.connected()) continue; // Set initial value for params (may contain init ops) llvm_assign_initial_value (s); } // All the symbols are stack allocated now. // Mark all the basic blocks, including allocating llvm::BasicBlock // records for each. find_basic_blocks (true); find_conditionals (); m_layers_already_run.clear (); build_llvm_code (inst()->maincodebegin(), inst()->maincodeend()); // Transfer all of this layer's outputs into the downstream shader's // inputs. for (int layer = m_layer+1; layer < group().nlayers(); ++layer) { ShaderInstance *child = m_group[layer]; for (int c = 0; c < child->nconnections(); ++c) { const Connection &con (child->connection (c)); if (con.srclayer == m_layer) { ASSERT (con.src.arrayindex == -1 && con.src.channel == -1 && con.dst.arrayindex == -1 && con.dst.channel == -1 && "no support for individual element/channel connection"); Symbol *srcsym (inst()->symbol (con.src.param)); Symbol *dstsym (child->symbol (con.dst.param)); llvm_run_connected_layers (*srcsym, con.src.param); // FIXME -- I'm not sure I understand this. Isn't this // unnecessary if we wrote to the parameter ourself? llvm_assign_impl (*dstsym, *srcsym); } } } // llvm_gen_debug_printf ("done copying connections"); // All done // llvm_gen_debug_printf (std::string("exit layer ")+inst()->shadername()); builder().CreateRetVoid(); if (shadingsys().llvm_debug()) llvm::outs() << "layer_func (" << unique_layer_name << ") after llvm = " << *m_layer_func << "\n"; delete m_builder; m_builder = NULL; return m_layer_func; }