/* Copyright 2005-2009 Intel Corporation. All Rights Reserved. This file is part of Threading Building Blocks. Threading Building Blocks is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation. Threading Building Blocks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Threading Building Blocks; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA As a special exception, you may use this file as part of a free software library without restriction. Specifically, if other files instantiate templates or use macros or inline functions from this file, or you compile this file and link it with other files to produce an executable, this file does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ #include "dynamic_link.h" #ifndef LIBRARY_ASSERT #include "tbb/tbb_stddef.h" #define LIBRARY_ASSERT(x,y) __TBB_ASSERT(x,y) #endif /* LIBRARY_ASSERT */ #if _WIN32||_WIN64 #include <malloc.h> /* alloca */ #else #include <dlfcn.h> #if __FreeBSD__ #include <stdlib.h> /* alloca */ #else #include <alloca.h> #endif #endif OPEN_INTERNAL_NAMESPACE #if __TBB_WEAK_SYMBOLS bool dynamic_link( void*, const dynamic_link_descriptor descriptors[], size_t n, size_t required ) { if ( required == ~(size_t)0 ) required = n; LIBRARY_ASSERT( required<=n, "Number of required entry points exceeds their total number" ); size_t k = 0; // Check if the first required entries are present in what was loaded into our process while ( k < required && descriptors[k].ptr ) ++k; if ( k < required ) return false; // Commit all the entry points. for ( k = 0; k < n; ++k ) *descriptors[k].handler = (pointer_to_handler) descriptors[k].ptr; return true; } #else /* !__TBB_WEAK_SYMBOLS */ bool dynamic_link( void* module, const dynamic_link_descriptor descriptors[], size_t n, size_t required ) { pointer_to_handler *h = (pointer_to_handler*)alloca(n * sizeof(pointer_to_handler)); if ( required == ~(size_t)0 ) required = n; LIBRARY_ASSERT( required<=n, "Number of required entry points exceeds their total number" ); size_t k = 0; for ( ; k < n; ++k ) { #if _WIN32||_WIN64 h[k] = pointer_to_handler(GetProcAddress( (HMODULE)module, descriptors[k].name )); #else // Lvalue casting is used; this way icc -strict-ansi does not warn about nonstandard pointer conversion (void *&)h[k] = dlsym( module, descriptors[k].name ); #endif /* _WIN32||_WIN64 */ if ( !h[k] && k < required ) return false; } LIBRARY_ASSERT( k == n, "if required entries are initialized, all entries are expected to be walked"); // Commit the entry points. // Cannot use memset here, because the writes must be atomic. for( k = 0; k < n; ++k ) *descriptors[k].handler = h[k]; return true; } #endif /* !__TBB_WEAK_SYMBOLS */ bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], size_t n, size_t required, dynamic_link_handle* handle ) { #if _WIN32||_WIN64 // Interpret non-NULL handle parameter as request to really link against another library. if ( !handle && dynamic_link( GetModuleHandle(NULL), descriptors, n, required ) ) // Target library was statically linked into this executable return true; // Prevent Windows from displaying silly message boxes if it fails to load library // (e.g. because of MS runtime problems - one of those crazy manifest related ones) UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); dynamic_link_handle module = LoadLibrary (library); SetErrorMode (prev_mode); #else dynamic_link_handle module = dlopen( library, RTLD_LAZY ); #endif /* _WIN32||_WIN64 */ if( module ) { if( !dynamic_link( module, descriptors, n, required ) ) { // Return true if the library is there and it contains all the expected entry points. dynamic_unlink(module); module = NULL; } } if( handle ) *handle = module; return module!=NULL; }
/** Is called by DoOneTimeInitializations and ITT_DoOneTimeInitialization in a protected (one-time) manner. Not to be invoked directly. **/ bool InitializeITT() { bool result = false; // Check if we are running under a performance or correctness tool bool t_checker = GetBoolEnvironmentVariable("KMP_FOR_TCHECK"); bool t_profiler = GetBoolEnvironmentVariable("KMP_FOR_TPROFILE"); __TBB_ASSERT(!(t_checker&&t_profiler), NULL); if ( t_checker || t_profiler ) { // Yes, we are in the tool mode. Try to load libittnotify library. result = dynamic_link( LIBITTNOTIFY_NAME, ITT_HandlerTable, ITT_HandlerTable_size, 4 ); } if (result){ if ( t_checker ) { current_tool = ITC; } else if ( t_profiler ) { current_tool = ITP; } } else { // Clear away the proxy (dummy) handlers for (int i = 0; i < ITT_HandlerTable_size; i++) *ITT_HandlerTable[i].handler = NULL; current_tool = NONE; } PrintExtraVersionInfo( "ITT", result?"yes":"no" ); return result; }
inline void init_concmon_module() { __TBB_ASSERT( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event, NULL ); if( dynamic_link( "Kernel32.dll", SRWLLinkTable, sizeof(SRWLLinkTable)/sizeof(dynamic_link_descriptor) ) ) { __TBB_ASSERT( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event, NULL ); __TBB_ASSERT( (uintptr_t)__TBB_acquire_binsem!=(uintptr_t)&acquire_binsem_using_event, NULL ); __TBB_ASSERT( (uintptr_t)__TBB_release_binsem!=(uintptr_t)&release_binsem_using_event, NULL ); } }
static void initialize_hardware_concurrency_info () { #if __TBB_WIN8UI_SUPPORT // For these applications processor groups info is unavailable // Setting up a number of processors for one processor group theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency(); #else /* __TBB_WIN8UI_SUPPORT */ dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable, sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) ); SYSTEM_INFO si; GetNativeSystemInfo(&si); DWORD_PTR pam, sam, m = 1; GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam ); int nproc = 0; for ( size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) { if ( pam & m ) ++nproc; } __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); // By default setting up a number of processors for one processor group theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) { // The process does not have restricting affinity mask and multiple processor groups are possible ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount(); __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); // Fail safety bootstrap. Release versions will limit available concurrency // level, while debug ones would assert. if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) ProcessorGroupInfo::NumGroups = MaxProcessorGroups; if ( ProcessorGroupInfo::NumGroups > 1 ) { TBB_GROUP_AFFINITY ga; if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) ProcessorGroupInfo::HoleIndex = ga.Group; int nprocs = 0; for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { ProcessorGroupInfo &pgi = theProcessorGroups[i]; pgi.numProcs = (int)TBB_GetActiveProcessorCount(i); __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; pgi.numProcsRunningTotal = nprocs += pgi.numProcs; } __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); } } #endif /* __TBB_WIN8UI_SUPPORT */ PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups); if (ProcessorGroupInfo::NumGroups>1) for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i) PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs); }
void init_condvar_module() { __TBB_ASSERT( (uintptr_t)__TBB_init_condvar==(uintptr_t)&init_condvar_using_event, NULL ); #if __TBB_WIN8UI_SUPPORT // We expect condition variables to be always available for Windows* store applications, // so there is no need to check presense and use alternative implementation. __TBB_init_condvar = (void (WINAPI *)(PCONDITION_VARIABLE))&InitializeConditionVariable; __TBB_condvar_wait = (BOOL(WINAPI *)(PCONDITION_VARIABLE, LPCRITICAL_SECTION, DWORD))&SleepConditionVariableCS; __TBB_condvar_notify_one = (void (WINAPI *)(PCONDITION_VARIABLE))&WakeConditionVariable; __TBB_condvar_notify_all = (void (WINAPI *)(PCONDITION_VARIABLE))&WakeAllConditionVariable; __TBB_destroy_condvar = (void (WINAPI *)(PCONDITION_VARIABLE))&destroy_condvar_noop; #else if (dynamic_link("Kernel32.dll", CondVarLinkTable, 4)) __TBB_destroy_condvar = (void (WINAPI *)(PCONDITION_VARIABLE))&destroy_condvar_noop; #endif }
/** Caller is responsible for ensuring this routine is called exactly once. */ void MallocInitializeITT() { #if __TBB_NEW_ITT_NOTIFY tbb::internal::__TBB_load_ittnotify(); #else bool success = false; // Check if we are running under control of VTune. if( GetBoolEnvironmentVariable("KMP_FOR_TCHECK") || GetBoolEnvironmentVariable("KMP_FOR_TPROFILE") ) { // Yes, we are under control of VTune. Check for libittnotify library. success = dynamic_link( LIBITTNOTIFY_NAME, ITT_HandlerTable, 5 ); } if (!success){ for (int i = 0; i < 5; i++) *ITT_HandlerTable[i].handler = NULL; } #endif /* !__TBB_NEW_ITT_NOTIFY */ }
static void initialize_hardware_concurrency_info () { dynamic_link( GetModuleHandleA( "Kernel32.dll" ), ProcessorGroupsApiLinkTable, sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) ); SYSTEM_INFO si; GetSystemInfo(&si); DWORD_PTR pam, sam, m = 1; GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam ); int nproc = 0; for ( size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) { if ( pam & m ) ++nproc; } __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetMaximumProcessorCount ) { // The process does not have restricting affinity mask and multiple processor groups are possible ProcessorGroupInfo::NumGroups = (int)TBB_GetMaximumProcessorGroupCount(); __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); // Fail safety bootstrap. Release versions will limit available concurrency // level, while debug ones would assert. if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) ProcessorGroupInfo::NumGroups = MaxProcessorGroups; if ( ProcessorGroupInfo::NumGroups > 1 ) { TBB_GROUP_AFFINITY ga; if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) ProcessorGroupInfo::HoleIndex = ga.Group; int nprocs = 0; for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { ProcessorGroupInfo &pgi = theProcessorGroups[i]; pgi.numProcs = (int)TBB_GetMaximumProcessorCount(i); __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; pgi.numProcsRunningTotal = nprocs += pgi.numProcs; } __TBB_ASSERT( nprocs == (int)TBB_GetMaximumProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); return; } } // Either the process has restricting affinity mask or only a single processor groups is present theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups); if (ProcessorGroupInfo::NumGroups>1) for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i) PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs); }
bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], size_t n, size_t required, dynamic_link_handle *handle ) { // Get library handle in case it is already loaded into the current process #if ! __TBB_DYNAMIC_LOAD_ENABLED dynamic_link_handle library_handle = NULL; #ifndef __TBB_ASSERT_EX #define __TBB_ASSERT_EX LIBRARY_ASSERT #endif __TBB_ASSERT_EX( library, "library name must be provided"); #elif _WIN32||_WIN64 dynamic_link_handle library_handle = GetModuleHandle( library ); #else dynamic_link_handle library_handle = dlopen( NULL, RTLD_LAZY ); #endif /* _WIN32||_WIN64 */ // Get descriptors from the library if ( library_handle && dynamic_link( library_handle, descriptors, n, required ) ) { #if !__TBB_DYNAMIC_LOAD_ENABLED return true; #else // The library has been loaded by another module and contains requested symbols. // But after we obtained the library's handle it can be unloaded by another thread // invalidating our handle copy. Therefore we need to pin the library in memory. #if _WIN32||_WIN64 char library_full_name[ MAX_PATH+1 ]; // Get library's name from earlier found handle if ( GetModuleFileName( library_handle, library_full_name, MAX_PATH+1 ) ) { // Pin the library library_handle = LoadLibrary( library_full_name ); if ( library_handle == NULL ) { int err = GetLastError(); DYNAMIC_LINK_WARNING( dl_lib_not_found, library_full_name, err ); } // if } // if #else /* !WIN */ Dl_info info; // Get library's name from earlier found symbol if ( dladdr( (void*)*descriptors[0].handler, &info ) ) { // Pin the library library_handle = dlopen( info.dli_fname, RTLD_LAZY ); if ( library_handle == NULL ) { char const * err = dlerror(); DYNAMIC_LINK_WARNING( dl_lib_not_found, info.dli_fname, err ); } // if } // if #endif /* !WIN */ else { // The library have been unloaded by another thread library_handle = 0; } if ( library_handle ) { // If original library was unloaded before we pinned it // and then another module loaded in its place, the earlier // found symbols would become invalid. So revalidate them. if ( !dynamic_link( library_handle, descriptors, n, required ) ) { // Wrong library. dynamic_unlink(library_handle); library_handle = 0; } } if ( !library_handle ) { // Failed to pin the library, so clear the descriptors too. for( size_t i=0; i<n; ++i ) *descriptors[i].handler = 0; } #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ } else { library_handle = 0; } #if __TBB_DYNAMIC_LOAD_ENABLED if ( !library_handle ) { #if _WIN32||_WIN64 #if _XBOX library_handle = LoadLibrary (library); #else library_handle = NULL; // Construct absolute path to the library to avoid security issue. size_t const len = MAX_PATH + 1; char path[ len ]; size_t rc = abs_path( library, path, len ); if ( 0 < rc && rc < len ) { // Prevent Windows from displaying silly message boxes if it fails to load library // (e.g. because of MS runtime problems - one of those crazy manifest related ones) UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); library_handle = LoadLibrary (path); SetErrorMode (prev_mode); if ( library_handle == NULL ) { int err = GetLastError(); DYNAMIC_LINK_WARNING( dl_lib_not_found, path, err ); } // if } // if #endif /* !_XBOX */ #else /* !WIN */ library_handle = NULL; // Construct absolute path to the library. size_t const len = PATH_MAX + 1; char path[ len ]; size_t rc = abs_path( library, path, len ); if ( 0 < rc && rc < len ) { library_handle = dlopen( path, RTLD_LAZY ); if ( library_handle == NULL ) { char const * err = dlerror(); DYNAMIC_LINK_WARNING( dl_lib_not_found, library, err ); } // if } // if #endif /* !WIN */ if( library_handle ) { if( !dynamic_link( library_handle, descriptors, n, required ) ) { // The loaded library does not contain all the expected entry points dynamic_unlink( library_handle ); library_handle = NULL; } } } #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ if ( library_handle ) { if ( handle ) *handle = library_handle; #if __TBB_BUILD else handles.add_handle( library_handle ); #endif /* __TBB_BUILD */ return true; } return false; }
bool initialize_cilk_interop() { // Pinning can fail. This is a normal situation, and means that the current // thread does not use cilkrts and consequently does not need interop. return dynamic_link( CILKLIB_NAME, CilkLinkTable, 1, /*handle=*/0, DYNAMIC_LINK_GLOBAL ); }
flx_link_failure_t *flx_dynlink_t::nothrow_dynamic_link(const ::std::string& filename_a) { try { dynamic_link(filename_a); return NULL; } catch (flx_link_failure_t const &x) { return new flx_link_failure_t(x); } }
void init_condvar_module() { __TBB_ASSERT( (uintptr_t)__TBB_init_condvar==(uintptr_t)&init_condvar_using_event, NULL ); if( dynamic_link( "Kernel32.dll", CondVarLinkTable, 4 ) ) __TBB_destroy_condvar = (void (WINAPI *)(PCONDITION_VARIABLE))&destroy_condvar_noop; }
bool initialize_cilk_interop() { // Pinning can fail. This is a normal situation, and means that the current // thread does not use Cilk and consequently does not need interop. return dynamic_link( CILKLIB_NAME, CilkLinkTable, 1 ); }
static void initialize_hardware_concurrency_info () { int err; int availableProcs = 0; int numMasks = 1; #if __linux__ #if __TBB_MAIN_THREAD_AFFINITY_BROKEN int maxProcs = INT_MAX; // To check the entire mask. int pid = 0; // Get the mask of the calling thread. #else int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); int pid = getpid(); #endif #else /* FreeBSD >= 7.1 */ int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); #endif basic_mask_t* processMask; const size_t BasicMaskSize = sizeof(basic_mask_t); for (;;) { const int curMaskSize = BasicMaskSize * numMasks; processMask = new basic_mask_t[numMasks]; memset( processMask, 0, curMaskSize ); #if __linux__ err = sched_getaffinity( pid, curMaskSize, processMask ); if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) break; #else /* FreeBSD >= 7.1 */ // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask #if __TBB_MAIN_THREAD_AFFINITY_BROKEN err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask ); #else err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask ); #endif if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 ) break; #endif /* FreeBSD >= 7.1 */ delete[] processMask; numMasks <<= 1; } if ( !err ) { // We have found the mask size and captured the process affinity mask into processMask. num_masks = numMasks; // do here because it's needed for affinity_helper to work #if __linux__ // For better coexistence with libiomp which might have changed the mask already, // check for its presense and ask it to restore the mask. dynamic_link_handle libhandle; if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) { // We have found the symbol provided by libiomp5 for restoring original thread affinity. affinity_helper affhelp; affhelp.protect_affinity_mask( /*restore_process_mask=*/false ); if ( libiomp_try_restoring_original_mask()==0 ) { // Now we have the right mask to capture, restored by libiomp. const int curMaskSize = BasicMaskSize * numMasks; memset( processMask, 0, curMaskSize ); get_thread_affinity_mask( curMaskSize, processMask ); } else affhelp.dismiss(); // thread mask has not changed dynamic_unlink( libhandle ); // Destructor of affinity_helper restores the thread mask (unless dismissed). } #endif for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) { for ( size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) { if ( CPU_ISSET( i, processMask + m ) ) ++availableProcs; } } process_mask = processMask; } else { // Failed to get the process affinity mask; assume the whole machine can be used. availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs; delete[] processMask; } theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL ); }