Esempio n. 1
0
void
lp_build_init(void)
{
   if (gallivm_initialized)
      return;

#ifdef DEBUG
   gallivm_debug = debug_get_option_gallivm_debug();
#endif

   lp_set_target_options();

#if USE_MCJIT
   LLVMLinkInMCJIT();
#else
   LLVMLinkInJIT();
#endif

   util_cpu_detect();

   /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
    * 8-wide vector needs more floating ops than 4-wide (due to padding), it is
    * actually more efficient to use 4-wide vectors on this processor.
    *
    * See also:
    * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2
    */
   if (HAVE_AVX &&
       util_cpu_caps.has_avx &&
       util_cpu_caps.has_intel) {
      lp_native_vector_width = 256;
   } else {
      /* Leave it at 128, even when no SIMD extensions are available.
       * Really needs to be a multiple of 128 so can fit 4 floats.
       */
      lp_native_vector_width = 128;
   }
 
   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
                                                 lp_native_vector_width);

   gallivm_initialized = TRUE;

#if 0
   /* For simulating less capable machines */
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
#endif
}
void
lp_build_init(void)
{
   if (gallivm_initialized)
      return;

#ifdef DEBUG
   gallivm_debug = debug_get_option_gallivm_debug();
#endif

   lp_set_target_options();

#if USE_MCJIT
   LLVMLinkInMCJIT();
#else
   LLVMLinkInJIT();
#endif

   util_cpu_detect();

   if (HAVE_AVX &&
       util_cpu_caps.has_avx) {
      lp_native_vector_width = 256;
   } else {
      /* Leave it at 128, even when no SIMD extensions are available.
       * Really needs to be a multiple of 128 so can fit 4 floats.
       */
      lp_native_vector_width = 128;
   }
 
   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
                                                 lp_native_vector_width);

   gallivm_initialized = TRUE;

#if 0
   /* For simulating less capable machines */
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
#endif
}
Esempio n. 3
0
void
lp_build_init(void)
{
#ifdef DEBUG
   gallivm_debug = debug_get_option_gallivm_debug();
#endif

   lp_set_target_options();

   LLVMInitializeNativeTarget();

   LLVMLinkInJIT();

   if (!lp_build_module)
      lp_build_module = LLVMModuleCreateWithName("gallivm");

   if (!lp_build_provider)
      lp_build_provider = LLVMCreateModuleProviderForExistingModule(lp_build_module);

   if (!lp_build_engine) {
      enum LLVM_CodeGenOpt_Level optlevel;
      char *error = NULL;

      if (gallivm_debug & GALLIVM_DEBUG_NO_OPT) {
         optlevel = None;
      }
      else {
         optlevel = Default;
      }

      if (LLVMCreateJITCompiler(&lp_build_engine, lp_build_provider,
                                (unsigned)optlevel, &error)) {
         _debug_printf("%s\n", error);
         LLVMDisposeMessage(error);
         assert(0);
      }

#if defined(DEBUG) || defined(PROFILE)
      lp_register_oprofile_jit_event_listener(lp_build_engine);
#endif
   }

   if (!lp_build_target)
      lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);

   if (!lp_build_pass) {
      lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider);
      LLVMAddTargetData(lp_build_target, lp_build_pass);

      if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
         /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
          * but there are more on SVN. */
         /* TODO: Add more passes */
         LLVMAddCFGSimplificationPass(lp_build_pass);
         LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
         LLVMAddConstantPropagationPass(lp_build_pass);
         if(util_cpu_caps.has_sse4_1) {
            /* FIXME: There is a bug in this pass, whereby the combination of fptosi
             * and sitofp (necessary for trunc/floor/ceil/round implementation)
             * somehow becomes invalid code.
             */
            LLVMAddInstructionCombiningPass(lp_build_pass);
         }
         LLVMAddGVNPass(lp_build_pass);
      } else {
         /* We need at least this pass to prevent the backends to fail in
          * unexpected ways.
          */
         LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
      }
   }

   util_cpu_detect();

#if 0
   /* For simulating less capable machines */
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
#endif
}