llvm::Function*
BackendLLVM::build_llvm_instance (bool groupentry)
{
    // Make a layer function: void layer_func(ShaderGlobals*, GroupData*)
    // Note that the GroupData* is passed as a void*.
    std::string unique_layer_name = Strutil::format ("%s_%d", inst()->layername(), inst()->id());

    ll.current_function (
           ll.make_function (unique_layer_name,
                             !groupentry, // fastcall for non-entry layer functions
                             ll.type_void(), // return type
                             llvm_type_sg_ptr(), llvm_type_groupdata_ptr()));

    // Get shader globals and groupdata pointers
    m_llvm_shaderglobals_ptr = ll.current_function_arg(0); //arg_it++;
    m_llvm_groupdata_ptr = ll.current_function_arg(1); //arg_it++;

    llvm::BasicBlock *entry_bb = ll.new_basic_block (unique_layer_name);
    m_exit_instance_block = NULL;

    // Set up a new IR builder
    ll.new_builder (entry_bb);
#if 0 /* helpful for debuggin */
    if (llvm_debug() && groupentry)
        llvm_gen_debug_printf (Strutil::format("\n\n\n\nGROUP! %s",group().name()));
    if (llvm_debug())
        llvm_gen_debug_printf (Strutil::format("enter layer %s %s",
                                  inst()->layername(), inst()->shadername()));
#endif
    if (shadingsys().countlayerexecs())
        ll.call_function ("osl_incr_layers_executed", sg_void_ptr());

    if (groupentry) {
        if (m_num_used_layers > 1) {
            // If this is the group entry point, clear all the "layer
            // executed" bits.  If it's not the group entry (but rather is
            // an upstream node), then set its bit!
            int sz = (m_num_used_layers + 3) & (~3);  // round up to 32 bits
            ll.op_memset (ll.void_ptr(layer_run_ptr(0)), 0, sz, 4 /*align*/);
        }
        // Group entries also need to allot space for ALL layers' params
        // that are closures (to avoid weird order of layer eval problems).
        for (int i = 0;  i < group().nlayers();  ++i) {
            ShaderInstance *gi = group()[i];
            if (gi->unused())
                continue;
            FOREACH_PARAM (Symbol &sym, gi) {
               if (sym.typespec().is_closure_based()) {
                    int arraylen = std::max (1, sym.typespec().arraylength());
                    llvm::Value *val = ll.constant_ptr(NULL, ll.type_void_ptr());
                    for (int a = 0; a < arraylen;  ++a) {
                        llvm::Value *arrind = sym.typespec().is_array() ? ll.constant(a) : NULL;
                        llvm_store_value (val, sym, 0, arrind, 0);
                    }
                }
            }
            // Unconditionally execute earlier layers that are not lazy
            if (! gi->run_lazily() && i < group().nlayers()-1)
                llvm_call_layer (i, true /* unconditionally run */);
        }
    }

    // Setup the symbols
    m_named_values.clear ();
    BOOST_FOREACH (Symbol &s, inst()->symbols()) {
        // Skip constants -- we always inline scalar constants, and for
        // array constants we will just use the pointers to the copy of
        // the constant that belongs to the instance.
        if (s.symtype() == SymTypeConst)
            continue;
        // Skip structure placeholders
        if (s.typespec().is_structure())
            continue;
        // Allocate space for locals, temps, aggregate constants
        if (s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp ||
                s.symtype() == SymTypeConst)
            getOrAllocateLLVMSymbol (s);
        // Set initial value for constants, closures, and strings that are
        // not parameters.
        if (s.symtype() != SymTypeParam && s.symtype() != SymTypeOutputParam &&
            s.symtype() != SymTypeGlobal &&
            (s.is_constant() || s.typespec().is_closure_based() ||
             s.typespec().is_string_based() || 
             ((s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp)
              && shadingsys().debug_uninit())))
            llvm_assign_initial_value (s);
        // If debugnan is turned on, globals check that their values are ok
        if (s.symtype() == SymTypeGlobal && shadingsys().debug_nan()) {
            TypeDesc t = s.typespec().simpletype();
            if (t.basetype == TypeDesc::FLOAT) { // just check float-based types
                int ncomps = t.numelements() * t.aggregate;
                llvm::Value *args[] = { ll.constant(ncomps), llvm_void_ptr(s),
                     ll.constant((int)s.has_derivs()), sg_void_ptr(), 
                     ll.constant(ustring(inst()->shadername())),
                     ll.constant(0), ll.constant(s.name()),
                     ll.constant(0), ll.constant(ncomps),
                     ll.constant("<none>")
                };
                ll.call_function ("osl_naninf_check", args, 10);
            }
        }
    }
    // make a second pass for the parameters (which may make use of
    // locals and constants from the first pass)
    FOREACH_PARAM (Symbol &s, inst()) {
        // Skip structure placeholders
        if (s.typespec().is_structure())
            continue;
        // Skip if it's never read and isn't connected
        if (! s.everread() && ! s.connected_down() && ! s.connected()
              && ! shadingsys().is_renderer_output(s.name()))
            continue;
        // Set initial value for params (may contain init ops)
        llvm_assign_initial_value (s);
    }

    // All the symbols are stack allocated now.

    // Mark all the basic blocks, including allocating llvm::BasicBlock
    // records for each.
    find_basic_blocks ();
    find_conditionals ();
    m_layers_already_run.clear ();

    build_llvm_code (inst()->maincodebegin(), inst()->maincodeend());

    if (llvm_has_exit_instance_block())
        ll.op_branch (m_exit_instance_block); // also sets insert point

    // Transfer all of this layer's outputs into the downstream shader's
    // inputs.
    for (int layer = this->layer()+1;  layer < group().nlayers();  ++layer) {
        ShaderInstance *child = group()[layer];
        for (int c = 0;  c < child->nconnections();  ++c) {
            const Connection &con (child->connection (c));
            if (con.srclayer == this->layer()) {
                ASSERT (con.src.arrayindex == -1 && con.src.channel == -1 &&
                        con.dst.arrayindex == -1 && con.dst.channel == -1 &&
                        "no support for individual element/channel connection");
                Symbol *srcsym (inst()->symbol (con.src.param));
                Symbol *dstsym (child->symbol (con.dst.param));
                llvm_run_connected_layers (*srcsym, con.src.param);
                // FIXME -- I'm not sure I understand this.  Isn't this
                // unnecessary if we wrote to the parameter ourself?
                llvm_assign_impl (*dstsym, *srcsym);
            }
        }
    }
    // llvm_gen_debug_printf ("done copying connections");

    // All done
#if 0 /* helpful for debugging */
    if (llvm_debug())
        llvm_gen_debug_printf (Strutil::format("exit layer %s %s",
                                   inst()->layername(), inst()->shadername()));
#endif
    ll.op_return();

    if (llvm_debug())
        std::cout << "layer_func (" << unique_layer_name << ") "<< this->layer() 
                  << "/" << group().nlayers() << " after llvm  = " 
                  << ll.bitcode_string(ll.current_function()) << "\n";

    ll.end_builder();  // clear the builder

    return ll.current_function();
}
Exemplo n.º 2
0
llvm::Function*
RuntimeOptimizer::build_llvm_instance (bool groupentry)
{
    // Make a layer function: void layer_func(ShaderGlobals*, GroupData*)
    // Note that the GroupData* is passed as a void*.
    std::string unique_layer_name = Strutil::format ("%s_%d", inst()->layername().c_str(), inst()->id());

    m_layer_func = llvm::cast<llvm::Function>(m_llvm_module->getOrInsertFunction(unique_layer_name,
                    llvm_type_void(), llvm_type_sg_ptr(),
                    llvm_type_groupdata_ptr(), NULL));
    // Use fastcall for non-entry layer functions to encourage register calling
    if (!groupentry) m_layer_func->setCallingConv(llvm::CallingConv::Fast);
    llvm::Function::arg_iterator arg_it = m_layer_func->arg_begin();
    // Get shader globals pointer
    m_llvm_shaderglobals_ptr = arg_it++;
    m_llvm_groupdata_ptr = arg_it++;

    llvm::BasicBlock *entry_bb = llvm_new_basic_block (unique_layer_name);

    // Set up a new IR builder
    delete m_builder;
    m_builder = new llvm::IRBuilder<> (entry_bb);
    // llvm_gen_debug_printf (std::string("enter layer ")+inst()->shadername());

    if (groupentry) {
        if (m_num_used_layers > 1) {
            // If this is the group entry point, clear all the "layer
            // executed" bits.  If it's not the group entry (but rather is
            // an upstream node), then set its bit!
            int sz = (m_num_used_layers + 3) & (~3);  // round up to 32 bits
            llvm_memset (llvm_void_ptr(layer_run_ptr(0)), 0, sz, 4 /*align*/);
        }
        // Group entries also need to allot space for ALL layers' params
        // that are closures (to avoid weird order of layer eval problems).
        for (int i = 0;  i < group().nlayers();  ++i) {
            ShaderInstance *gi = group()[i];
            if (gi->unused())
                continue;
            FOREACH_PARAM (Symbol &sym, gi) {
               if (sym.typespec().is_closure_based()) {
                    int arraylen = std::max (1, sym.typespec().arraylength());
                    llvm::Value *val = llvm_constant_ptr(NULL, llvm_type_void_ptr());
                    for (int a = 0; a < arraylen;  ++a) {
                        llvm::Value *arrind = sym.typespec().is_array() ? llvm_constant(a) : NULL;
                        llvm_store_value (val, sym, 0, arrind, 0);
                    }
                }
            }
            // Unconditionally execute earlier layers that are not lazy
            if (! gi->run_lazily() && i < group().nlayers()-1)
                llvm_call_layer (i, true /* unconditionally run */);
        }
    }

    // Setup the symbols
    m_named_values.clear ();
    BOOST_FOREACH (Symbol &s, inst()->symbols()) {
        // Skip non-array constants -- we always inline them
        if (s.symtype() == SymTypeConst && !s.typespec().is_array())
            continue;
        // Skip structure placeholders
        if (s.typespec().is_structure())
            continue;
        // Allocate space for locals, temps, aggregate constants
        if (s.symtype() == SymTypeLocal || s.symtype() == SymTypeTemp ||
                s.symtype() == SymTypeConst)
            getOrAllocateLLVMSymbol (s);
        // Set initial value for constants, closures, and strings that are
        // not parameters.
        if (s.symtype() != SymTypeParam && s.symtype() != SymTypeOutputParam &&
            (s.is_constant() || s.typespec().is_closure_based() ||
             s.typespec().is_string_based()))
            llvm_assign_initial_value (s);
        // If debugnan is turned on, globals check that their values are ok
        if (s.symtype() == SymTypeGlobal && m_shadingsys.debug_nan()) {
            TypeDesc t = s.typespec().simpletype();
            if (t.basetype == TypeDesc::FLOAT) { // just check float-based types
                int ncomps = t.numelements() * t.aggregate;
                llvm::Value *args[] = { llvm_constant(ncomps), llvm_void_ptr(s),
                     llvm_constant((int)s.has_derivs()), sg_void_ptr(), 
                     llvm_constant(ustring(inst()->shadername())),
                     llvm_constant(0), llvm_constant(s.name()) };
                llvm_call_function ("osl_naninf_check", args, 7);
            }
        }
    }
    // make a second pass for the parameters (which may make use of
    // locals and constants from the first pass)
    FOREACH_PARAM (Symbol &s, inst()) {
        // Skip structure placeholders
        if (s.typespec().is_structure())
            continue;
        // Skip if it's never read and isn't connected
        if (! s.everread() && ! s.connected_down() && ! s.connected())
            continue;
        // Set initial value for params (may contain init ops)
        llvm_assign_initial_value (s);
    }

    // All the symbols are stack allocated now.

    // Mark all the basic blocks, including allocating llvm::BasicBlock
    // records for each.
    find_basic_blocks (true);
    find_conditionals ();
    m_layers_already_run.clear ();

    build_llvm_code (inst()->maincodebegin(), inst()->maincodeend());

    // Transfer all of this layer's outputs into the downstream shader's
    // inputs.
    for (int layer = m_layer+1;  layer < group().nlayers();  ++layer) {
        ShaderInstance *child = m_group[layer];
        for (int c = 0;  c < child->nconnections();  ++c) {
            const Connection &con (child->connection (c));
            if (con.srclayer == m_layer) {
                ASSERT (con.src.arrayindex == -1 && con.src.channel == -1 &&
                        con.dst.arrayindex == -1 && con.dst.channel == -1 &&
                        "no support for individual element/channel connection");
                Symbol *srcsym (inst()->symbol (con.src.param));
                Symbol *dstsym (child->symbol (con.dst.param));
                llvm_run_connected_layers (*srcsym, con.src.param);
                // FIXME -- I'm not sure I understand this.  Isn't this
                // unnecessary if we wrote to the parameter ourself?
                llvm_assign_impl (*dstsym, *srcsym);
            }
        }
    }
    // llvm_gen_debug_printf ("done copying connections");

    // All done
    // llvm_gen_debug_printf (std::string("exit layer ")+inst()->shadername());
    builder().CreateRetVoid();

    if (shadingsys().llvm_debug())
        llvm::outs() << "layer_func (" << unique_layer_name << ") after llvm  = " << *m_layer_func << "\n";

    delete m_builder;
    m_builder = NULL;

    return m_layer_func;
}