int Compiler::runCodeGen(llvm::TargetData *TD, llvm::TargetMachine *TM,
                         llvm::NamedMDNode const *ExportVarMetadata,
                         llvm::NamedMDNode const *ExportFuncMetadata) {
  // Create memory manager for creation of code emitter later.
  if (!mCodeMemMgr.get() && !createCodeMemoryManager()) {
    setError("Failed to startup memory management for further compilation");
    return 1;
  }

  mpResult->mContext = (char *) (mCodeMemMgr.get()->getCodeMemBase());

  // Create code emitter
  if (!mCodeEmitter.get()) {
    if (!createCodeEmitter()) {
      setError("Failed to create machine code emitter for compilation");
      return 1;
    }
  } else {
    // Reuse the code emitter
    mCodeEmitter->reset();
  }

  mCodeEmitter->setTargetMachine(*TM);
  mCodeEmitter->registerSymbolCallback(mpSymbolLookupFn,
                                       mpSymbolLookupContext);

  // Create code-gen pass to run the code emitter
  llvm::OwningPtr<llvm::FunctionPassManager> CodeGenPasses(
    new llvm::FunctionPassManager(mModule));

  // Add TargetData to code generation pass manager
  CodeGenPasses->add(TD);

  // Add code emit passes
  if (TM->addPassesToEmitMachineCode(*CodeGenPasses,
                                     *mCodeEmitter,
                                     CodeGenOptLevel)) {
    setError("The machine code emission is not supported on '" + Triple + "'");
    return 1;
  }

  // Run the code emitter on every non-declaration function in the module
  CodeGenPasses->doInitialization();
  for (llvm::Module::iterator
       I = mModule->begin(), E = mModule->end(); I != E; I++) {
    if (!I->isDeclaration()) {
      CodeGenPasses->run(*I);
    }
  }

  CodeGenPasses->doFinalization();

  // Copy the global address mapping from code emitter and remapping
  if (ExportVarMetadata) {
    ScriptCompiled::ExportVarList &varList = mpResult->mExportVars;

    for (int i = 0, e = ExportVarMetadata->getNumOperands(); i != e; i++) {
      llvm::MDNode *ExportVar = ExportVarMetadata->getOperand(i);
      if (ExportVar != NULL && ExportVar->getNumOperands() > 1) {
        llvm::Value *ExportVarNameMDS = ExportVar->getOperand(0);
        if (ExportVarNameMDS->getValueID() == llvm::Value::MDStringVal) {
          llvm::StringRef ExportVarName =
            static_cast<llvm::MDString*>(ExportVarNameMDS)->getString();

          CodeEmitter::global_addresses_const_iterator I, E;
          for (I = mCodeEmitter->global_address_begin(),
               E = mCodeEmitter->global_address_end();
               I != E; I++) {
            if (I->first->getValueID() != llvm::Value::GlobalVariableVal)
              continue;
            if (ExportVarName == I->first->getName()) {
              varList.push_back(I->second);
#if DEBUG_BCC_REFLECT
              LOGD("runCodeGen(): Exported VAR: %s @ %p\n", ExportVarName.str().c_str(), I->second);
#endif
              break;
            }
          }
          if (I != mCodeEmitter->global_address_end())
            continue;  // found

#if DEBUG_BCC_REFLECT
          LOGD("runCodeGen(): Exported VAR: %s @ %p\n",
               ExportVarName.str().c_str(), (void *)0);
#endif
        }
      }
      // if reaching here, we know the global variable record in metadata is
      // not found. So we make an empty slot
      varList.push_back(NULL);
    }

    bccAssert((varList.size() == ExportVarMetadata->getNumOperands()) &&
              "Number of slots doesn't match the number of export variables!");
  }

  if (ExportFuncMetadata) {
    ScriptCompiled::ExportFuncList &funcList = mpResult->mExportFuncs;

    for (int i = 0, e = ExportFuncMetadata->getNumOperands(); i != e; i++) {
      llvm::MDNode *ExportFunc = ExportFuncMetadata->getOperand(i);
      if (ExportFunc != NULL && ExportFunc->getNumOperands() > 0) {
        llvm::Value *ExportFuncNameMDS = ExportFunc->getOperand(0);
        if (ExportFuncNameMDS->getValueID() == llvm::Value::MDStringVal) {
          llvm::StringRef ExportFuncName =
            static_cast<llvm::MDString*>(ExportFuncNameMDS)->getString();
          funcList.push_back(mpResult->lookup(ExportFuncName.str().c_str()));
#if DEBUG_BCC_REFLECT
          LOGD("runCodeGen(): Exported Func: %s @ %p\n", ExportFuncName.str().c_str(),
               funcList.back());
#endif
        }
      }
    }
  }

  // Tell code emitter now can release the memory using during the JIT since
  // we have done the code emission
  mCodeEmitter->releaseUnnecessary();

  return 0;
}
bool fuseKernels(bcc::BCCContext& Context,
                 const std::vector<Source *>& sources,
                 const std::vector<int>& slots,
                 const std::string& fusedName,
                 Module* mergedModule) {
  bccAssert(sources.size() == slots.size() && "sources and slots differ in size");

  uint32_t fusedFunctionSignature;

  llvm::FunctionType* fusedType =
          getFusedFuncType(Context, sources, slots, mergedModule, &fusedFunctionSignature);

  if (fusedType == nullptr) {
    return false;
  }

  Function* fusedKernel =
          (Function*)(mergedModule->getOrInsertFunction(fusedName, fusedType));

  llvm::LLVMContext& ctxt = Context.getLLVMContext();

  llvm::BasicBlock* block = llvm::BasicBlock::Create(ctxt, "entry", fusedKernel);
  llvm::IRBuilder<> builder(block);

  Function::arg_iterator argIter = fusedKernel->arg_begin();

  llvm::Value* dataElement = nullptr;
  if (bcinfo::MetadataExtractor::hasForEachSignatureIn(fusedFunctionSignature)) {
    dataElement = &*(argIter++);
    dataElement->setName("DataIn");
  }

  llvm::Value* X = nullptr;
  if (bcinfo::MetadataExtractor::hasForEachSignatureX(fusedFunctionSignature)) {
    X = &*(argIter++);
    X->setName("x");
  }

  llvm::Value* Y = nullptr;
  if (bcinfo::MetadataExtractor::hasForEachSignatureY(fusedFunctionSignature)) {
    Y = &*(argIter++);
    Y->setName("y");
  }

  llvm::Value* Z = nullptr;
  if (bcinfo::MetadataExtractor::hasForEachSignatureZ(fusedFunctionSignature)) {
    Z = &*(argIter++);
    Z->setName("z");
  }

  auto slotIter = slots.begin();
  for (const Source* source : sources) {
    int slot = *slotIter;

    uint32_t inputFunctionSignature;
    const Function* inputFunction =
            getFunction(mergedModule, source, slot, &inputFunctionSignature);
    if (inputFunction == nullptr) {
      // Either failed to find the kernel function, or the function has multiple inputs.
      return false;
    }

    // Don't try to fuse a non-kernel
    if (!bcinfo::MetadataExtractor::hasForEachSignatureKernel(inputFunctionSignature)) {
      ALOGE("Kernel fusion (module %s function %s): not a kernel",
            source->getName().c_str(), inputFunction->getName().str().c_str());
      return false;
    }

    std::vector<llvm::Value*> args;

    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(inputFunctionSignature)) {
      if (dataElement == nullptr) {
        ALOGE("Kernel fusion (module %s function %s): expected input, but got null",
              source->getName().c_str(), inputFunction->getName().str().c_str());
        return false;
      }

      const llvm::FunctionType* funcTy = inputFunction->getFunctionType();
      llvm::Type* firstArgType = funcTy->getParamType(0);

      if (dataElement->getType() != firstArgType) {
        std::string msg;
        llvm::raw_string_ostream rso(msg);
        rso << "Mismatching argument type, expected ";
        firstArgType->print(rso);
        rso << ", received ";
        dataElement->getType()->print(rso);
        ALOGE("Kernel fusion (module %s function %s): %s", source->getName().c_str(),
              inputFunction->getName().str().c_str(), rso.str().c_str());
        return false;
      }

      args.push_back(dataElement);
    } else {
      // Only the first kernel in a batch is allowed to have no input
      if (slotIter != slots.begin()) {
        ALOGE("Kernel fusion (module %s function %s): function not first in batch takes no input",
              source->getName().c_str(), inputFunction->getName().str().c_str());
        return false;
      }
    }

    if (bcinfo::MetadataExtractor::hasForEachSignatureX(inputFunctionSignature)) {
      args.push_back(X);
    }

    if (bcinfo::MetadataExtractor::hasForEachSignatureY(inputFunctionSignature)) {
      args.push_back(Y);
    }

    if (bcinfo::MetadataExtractor::hasForEachSignatureZ(inputFunctionSignature)) {
      args.push_back(Z);
    }

    dataElement = builder.CreateCall((llvm::Value*)inputFunction, args);

    slotIter++;
  }

  if (fusedKernel->getReturnType()->isVoidTy()) {
    builder.CreateRetVoid();
  } else {
    builder.CreateRet(dataElement);
  }

  llvm::NamedMDNode* ExportForEachNameMD =
    mergedModule->getOrInsertNamedMetadata("#rs_export_foreach_name");

  llvm::MDString* nameMDStr = llvm::MDString::get(ctxt, fusedName);
  llvm::MDNode* nameMDNode = llvm::MDNode::get(ctxt, nameMDStr);
  ExportForEachNameMD->addOperand(nameMDNode);

  llvm::NamedMDNode* ExportForEachMD =
    mergedModule->getOrInsertNamedMetadata("#rs_export_foreach");
  llvm::MDString* sigMDStr = llvm::MDString::get(ctxt,
                                                 llvm::utostr(fusedFunctionSignature));
  llvm::MDNode* sigMDNode = llvm::MDNode::get(ctxt, sigMDStr);
  ExportForEachMD->addOperand(sigMDNode);

  return true;
}