//---------------------------------------------------------------------------------- // //---------------------------------------------------------------------------------- void Profiler_Imp::Start(int id) { Profile::Ptr profile = nullptr; for (auto& x : m_profiles) { if (x->GetID() == id) { profile = x; } } if (profile == nullptr) { profile = make_shared<Profile>(id); m_profiles.push_back(profile); } profile->GetCurrent()->SetStartTime(asd::GetTime()); #if _WIN32 profile->GetCurrent()->SetProcessorNumber(GetCurrentProcessorNumber()); #elif defined(__APPLE__) // sched_getcpuがないようなので代用。よりよいものがあれば差し替えてください。 profile->GetCurrent()->SetProcessorNumber( std::hash<std::thread::id>()(std::this_thread::get_id())); #else profile->GetCurrent()->SetProcessorNumber(sched_getcpu()); #endif }
uint32_t getCurrentProcessorNumber() { #if _WIN32_WINNT >= 0x0600 return GetCurrentProcessorNumber(); #else return 0; #endif }
// Start the stopwatch. void StopWatch::Start() { // MSDN recommends setting the thread affinity to avoid bugs in the BIOS and HAL. // Create an affinity mask for the current processor. affinityMask = (DWORD_PTR)1 << GetCurrentProcessorNumber(); HANDLE currThread = GetCurrentThread(); DWORD_PTR prevAffinityMask = SetThreadAffinityMask(currThread, affinityMask); assert(prevAffinityMask != 0); // Query the performance counter. LARGE_INTEGER perfQuery; BOOL result = QueryPerformanceCounter(&perfQuery); assert(result); start = perfQuery.QuadPart; // Restore the thread's affinity mask. prevAffinityMask = SetThreadAffinityMask(currThread, prevAffinityMask); assert(prevAffinityMask != 0); }
int main(void) { int i, k, err, blocks, len, len2; double a0, av, sig, td; unsigned char buf1[BUFLEN]; unsigned char buf2[BUFLEN]; unsigned char iv1[AES_BLOCK_SIZE]; unsigned char iv2[AES_BLOCK_SIZE]; unsigned char key[32]; f_ectx ecx1[1]; f_dctx dcx1[1]; aligned_auto(unsigned char, buf3, BUFLEN, 16); aligned_auto(unsigned char, iv3, AES_BLOCK_SIZE, 16); aligned_auto(f_ectx, ecx2, 1, 16); aligned_auto(f_dctx, dcx2, 1, 16); #if defined( DLL_IMPORT ) && defined( DYNAMIC_LINK ) HINSTANCE h_dll; #endif #if defined( DUAL_CORE ) && defined( _WIN32 ) // we need to constrain the process to one core in order to // obtain meaningful timing data HANDLE ph; DWORD_PTR afp; DWORD_PTR afs; ph = GetCurrentProcess(); if(GetProcessAffinityMask(ph, &afp, &afs)) { afp &= (GetCurrentProcessorNumber() + 1); if(!SetProcessAffinityMask(ph, afp)) { printf("Couldn't set Process Affinity Mask\n\n"); return -1; } } else { printf("Couldn't get Process Affinity Mask\n\n"); return -1; } #endif #if defined( DLL_IMPORT ) && defined( DYNAMIC_LINK ) if(!(h_dll = init_dll(&fn))) return -1; #elif !defined(STATIC_TABLES) aes_init(); #endif if(f_talign(0,16) != EXIT_SUCCESS) return -1; printf("\nRun tests for the AES algorithm"); #if defined( DLL_IMPORT ) printf(" (DLL Version)"); #endif #if defined( __cplusplus ) printf(" (CPP Version)"); #endif for(k = 128; k <= 256; k += 64) { printf("\n\n%03i Bit Keys", k); #ifdef TEST_ECB err = 0; for(i = 0; i < 100; ++i) { block_rndfill(key, 2 * AES_BLOCK_SIZE); f_enc_key(ecx1, key, k); f_enc_key(ecx2, key, k); f_dec_key(dcx1, key, k); f_dec_key(dcx2, key, k); block_rndfill(buf1, BUFLEN); memcpy(buf2, buf1, BUFLEN); memcpy(buf3, buf1, BUFLEN); td = rand32() / (65536.0 * 65536.0); len = (unsigned int)(0.5 * BUFLEN * (1.0 + td)); len = AES_BLOCK_SIZE * (len / AES_BLOCK_SIZE); ECBenc(buf2, len, ecx1); f_ecb_enc(ecx2, buf3, buf3, len); if(memcmp(buf2, buf3, len)) err |= 1; if((err & 1) && !(err & 256)) printf("\nECB encryption FAILURE"); ECBdec(buf2, len, dcx1); f_ecb_dec(dcx2, buf3, buf3, len); if(memcmp(buf1, buf2, len)) err |= 2; if(memcmp(buf1, buf3, len)) err |= 4; if((err & 4) && !(err & 512)) printf("\nECB decryption FAILURE"); if(err & 1) err |= 256; if(err & 4) err |= 512; } if(!err) printf("\nECB encrypt and decrypt of data correct"); #endif #ifdef TEST_CBC err = 0; for(i = 0; i < 100; ++i) { block_rndfill(key, 2 * AES_BLOCK_SIZE); f_enc_key(ecx1, key, k); f_enc_key(ecx2, key, k); f_dec_key(dcx1, key, k); f_dec_key(dcx2, key, k); block_rndfill(iv1, AES_BLOCK_SIZE); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); block_rndfill(buf1, BUFLEN); memcpy(buf2, buf1, BUFLEN); memcpy(buf3, buf1, BUFLEN); td = rand32() / (65536.0 * 65536.0); len = (unsigned int)(0.5 * BUFLEN * (1.0 + td)); len = AES_BLOCK_SIZE * (len / AES_BLOCK_SIZE); CBCenc(buf2, len, iv2, ecx1); f_cbc_enc(ecx2, buf3, buf3, len, iv3); if(memcmp(buf2, buf3, len)) err |= 1; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 2; if((err & 1) && !(err & 256)) printf("\nCBC encryption FAILURE"); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); CBCdec(buf2, len, iv2, dcx1); f_cbc_dec(dcx2, buf3, buf3, len, iv3); if(memcmp(buf1, buf2, len)) err |= 4; if(memcmp(buf1, buf3, len)) err |= 8; if(memcmp(buf2, buf3, len)) err |= 16; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 32; if((err & 16) && !(err & 512)) printf("\nCBC decryption FAILURE"); if(err & 1) err |= 256; if(err & 16) err |= 512; } if(!(err & ~(2 | 4 | 16 | 32))) printf("\nCBC encrypt and decrypt of data correct"); if(err & (2 | 32)) { printf(" (mismatch of final IV on "); if(err & 2) printf("encrypt"); if((err & (2 | 32)) == 34) printf(" and "); if(err & 32) printf("decrypt"); printf(")"); } #endif #ifdef TEST_CFB err = 0; for(i = 0; i < 100; ++i) { block_rndfill(key, 2 * AES_BLOCK_SIZE); f_enc_key(ecx1, key, k); f_enc_key(ecx2, key, k); f_dec_key(dcx1, key, k); f_dec_key(dcx2, key, k); block_rndfill(iv1, AES_BLOCK_SIZE); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); block_rndfill(buf1, BUFLEN); memcpy(buf2, buf1, BUFLEN); memcpy(buf3, buf1, BUFLEN); f_info(ecx1) = 0; f_mode_reset(ecx2); td = rand32() / (65536.0 * 65536.0); len = (unsigned int)(0.5 * BUFLEN * (1.0 + td)); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); #ifdef WHOLE_BLOCKS len = AES_BLOCK_SIZE * (len / AES_BLOCK_SIZE); len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_cfb_enc(ecx2, buf3, buf3, len2, iv3); f_cfb_enc(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3); CFBenc(buf2, len, iv2, ecx1); if(memcmp(buf2, buf3, len)) err |= 1; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 2; if((err & 1) && !(err & 256)) printf("\nCFB encryption FAILURE"); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); f_info(ecx1) = 0; f_mode_reset(ecx2); CFBdec(buf2, len, iv2, ecx1); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); #ifdef WHOLE_BLOCKS len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_cfb_dec(ecx2, buf3, buf3, len2, iv3); f_cfb_dec(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3); if(memcmp(buf1, buf2, len)) err |= 4; if(memcmp(buf1, buf3, len)) err |= 8; if(memcmp(buf2, buf3, len)) err |= 16; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 32; if((err & 16) && !(err & 512)) printf("\nCFB decryption FAILURE"); if(err & 1) err |= 256; if(err & 16) err |= 512; } if(!(err & ~(2 | 4 | 16 | 32))) printf("\nCFB encrypt and decrypt of data correct"); if(err & (2 | 32)) { printf(" (mismatch of final IV on "); if(err & 2) printf("encrypt"); if((err & (2 | 32)) == 34) printf(" and "); if(err & 32) printf("decrypt"); printf(")"); } #endif #ifdef TEST_OFB err = 0; for(i = 0; i < 100; ++i) { block_rndfill(key, 2 * AES_BLOCK_SIZE); f_enc_key(ecx1, key, k); f_enc_key(ecx2, key, k); f_dec_key(dcx1, key, k); f_dec_key(dcx2, key, k); block_rndfill(iv1, AES_BLOCK_SIZE); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); block_rndfill(buf1, BUFLEN); memcpy(buf2, buf1, BUFLEN); memcpy(buf3, buf1, BUFLEN); f_info(ecx1) = 0; f_mode_reset(ecx2); td = rand32() / (65536.0 * 65536.0); len = (unsigned int)(0.5 * BUFLEN * (1.0 + td)); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); #ifdef WHOLE_BLOCKS len = AES_BLOCK_SIZE * (len / AES_BLOCK_SIZE); len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_ofb_cry(ecx2, buf3, buf3, len2, iv3); f_ofb_cry(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3); OFBenc(buf2, len, iv2, ecx1); if(memcmp(buf2, buf3, len)) err |= 1; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 2; if((err & 1) && !(err & 256)) printf("\nOFB encryption FAILURE"); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); f_info(ecx1) = 0; f_mode_reset(ecx2); OFBdec(buf2, len, iv2, ecx1); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); #ifdef WHOLE_BLOCKS len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_ofb_cry(ecx2, buf3, buf3, len2, iv3); f_ofb_cry(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3); if(memcmp(buf1, buf2, len)) err |= 4; if(memcmp(buf1, buf3, len)) err |= 8; if(memcmp(buf2, buf3, len)) err |= 16; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 32; if((err & 16) && !(err & 512)) printf("\nOFB decryption FAILURE"); if(err & 1) err |= 256; if(err & 16) err |= 512; } if(!(err & ~(2 | 4 | 16 | 32))) printf("\nOFB encrypt and decrypt of data correct"); if(err & (2 | 32)) { printf(" (mismatch of final IV on "); if(err & 2) printf("encrypt"); if((err & (2 | 32)) == 34) printf(" and "); if(err & 32) printf("decrypt"); printf(")"); } #endif #ifdef TEST_CTR err = 0; for(i = 0; i < 100; ++i) { block_rndfill(key, 2 * AES_BLOCK_SIZE); f_enc_key(ecx1, key, k); f_enc_key(ecx2, key, k); f_dec_key(dcx1, key, k); f_dec_key(dcx2, key, k); block_rndfill(iv1, AES_BLOCK_SIZE); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); block_rndfill(buf1, BUFLEN); memcpy(buf2, buf1, BUFLEN); memcpy(buf3, buf1, BUFLEN); f_info(ecx1) = 0; f_mode_reset(ecx2); td = rand32() / (65536.0 * 65536.0); len = (unsigned int)(0.5 * BUFLEN * (1.0 + td)); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); #ifdef WHOLE_BLOCKS len = AES_BLOCK_SIZE * (len / AES_BLOCK_SIZE); len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_ctr_cry(ecx2, buf3, buf3, len2, iv3, ctr_inc); f_ctr_cry(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3, ctr_inc); CTRcry(buf2, len, iv2, ctr_inc, ecx1); if(memcmp(buf2, buf3, len)) err |= 1; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 2; if((err & 1) && !(err & 256)) printf("\nCTR encryption FAILURE"); memcpy(iv2, iv1, AES_BLOCK_SIZE); memcpy(iv3, iv1, AES_BLOCK_SIZE); f_info(ecx1) = 0; f_mode_reset(ecx2); td = rand32() / (65536.0 * 65536.0); len2 = (unsigned int)(td * len); CTRcry(buf2, len, iv2, ctr_inc, ecx1); #ifdef WHOLE_BLOCKS len2 = AES_BLOCK_SIZE * (len2 / AES_BLOCK_SIZE); #endif f_ctr_cry(ecx2, buf3, buf3, len2, iv3, ctr_inc); f_ctr_cry(ecx2, buf3 + len2, buf3 + len2, len - len2, iv3, ctr_inc); if(memcmp(buf1, buf2, len)) err |= 4; if(memcmp(buf1, buf3, len)) err |= 8; if(memcmp(buf2, buf3, len)) err |= 16; if(memcmp(iv2, iv3, AES_BLOCK_SIZE)) err |= 32; if((err & 16) && !(err & 512)) printf("\nCTR decryption FAILURE"); if(err & 1) err |= 256; if(err & 16) err |= 512; } if(!(err & ~(2 | 4 | 16 | 32))) printf("\nCTR encrypt and decrypt of data correct"); if(err & (2 | 32)) { printf(" (mismatch of final IV on "); if(err & 2) printf("encrypt"); if((err & (2 | 32)) == 34) printf(" and "); if(err & 32) printf("decrypt"); printf(")"); } #endif } #if defined( USE_VIA_ACE_IF_PRESENT ) if(VIA_ACE_AVAILABLE) printf("\n\nAES Timing (Cycles/Byte) with the VIA ACE Engine"); else #endif printf("\n\nAES Timing (Cycles/Byte)"); printf("\nMode Blocks: 1 10 100 1000"); #ifdef TEST_ECB printf("\necb encrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_ecb_enc(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } printf("\necb decrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_ecb_dec(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } #endif #ifdef TEST_CBC printf("\ncbc encrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_cbc_enc(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } printf("\ncbc decrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_cbc_dec(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } #endif #ifdef TEST_CFB printf("\ncfb encrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_cfb_enc(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } printf("\ncfb decrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_cfb_dec(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } #endif #ifdef TEST_OFB printf("\nofb encrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_ofb_enc(16, blocks, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } #endif #ifdef TEST_CTR printf("\nctr encrypt "); for(blocks = 1; blocks < 10000; blocks *= 10) { time_base(&a0, &sig); time_ctr_crypt(16, blocks, ctr_inc, &av, &sig); sig *= 100.0 / av; av = (int)(100.0 * (av - a0) / (16.0 * blocks)) / 100.0; sig = (int)(10 * sig) / 10.0; printf("%9.2f", av); } #endif #if defined( DLL_IMPORT ) && defined( DYNAMIC_LINK ) if(h_dll) FreeLibrary(h_dll); #endif printf("\n\n"); return 0; }
int CurrentProcessorNo() { return GetCurrentProcessorNumber(); }
void CRWLock2::EnterRead() { t_procId = GetCurrentProcessorNumber(); AcquireSRWLockShared(&m_lock[t_procId]); }
REDHAWK_PALEXPORT unsigned int REDHAWK_PALAPI PalGetCurrentProcessorNumber() { return GetCurrentProcessorNumber(); }
//_cdecl表示c语言的默认函数调用方法 void _cdecl wmain(int argc, wchar_t **argv) { DWORD dwWaitStatus = 0; unsigned int i; SHARED_DATA_AREA * pData; LARGE_INTEGER start, end, freq; //LARGE_INTEG用来表示一项64位有符号整数值 LARGE_INTEGER roundTripNanoSecs[MAXREADWRITENANOSECSIZE]; //获取该程序运行的处理器索引 RtPrintf("SimpleIPCProducer: Started on Processor %d\n", GetCurrentProcessorNumber()); //创建共享内存区域,由于是同步读取内存区域,因此不需要加锁 //RtCreateSharedMemory的用法,查看RTX Helper hSharedMem = RtCreateSharedMemory(PAGE_READWRITE, 0, sizeof(SHARED_DATA_AREA), _T("ProducerConsumerSample"), (void **)&pData); //共享内存区分配失败 if(!pData) { RtPrintf("SimpleIPCProducer: RtCreateSharedMemory failed\n"); ExitProcess(0); } // pData的大小 pData->size = sizeof(SHARED_DATA_AREA); // wcscpy为宽字符函数,复制 wcscpy( pData->id, _T("ProducerConsumerSample")); // create events to share between producer and consumer //创建事件 hEventProducerDone = RtCreateEvent(NULL, FALSE, FALSE, _T("ProducerDone")); hEvent[EXECUTE_EVENT] = RtCreateEvent(NULL, FALSE, FALSE, _T("ConsumedDone")); hEvent[TERMINATE_EVENT] = RtCreateEvent(NULL, FALSE, FALSE, _T("ProducerConsumerExit")); // start loop for(i=0; i< MAXREADWRITENANOSECSIZE; i++) { //QueryPerformanceCounter来精准计算执行时间 QueryPerformanceCounter(&start); // 向共享内存区的producerData中填充数据 memset(pData->producerData, i, sizeof(pData->producerData)); // 发送ProducerDone的信号 RtSetEvent(hEventProducerDone); //// 等待消费者的信号,dwWaitStatus保存返回状态 dwWaitStatus = RtWaitForMultipleObjects(2, hEvent, FALSE, INFINITE); if (dwWaitStatus == WAIT_OBJECT_0 + TERMINATE_EVENT) { RtPrintf("SimpleIPCProducer: WaitForMultipleObjects failed\n"); RtSetEvent(hEvent[TERMINATE_EVENT]); ExitProcess(0); } //执行结束 if (dwWaitStatus == WAIT_OBJECT_0 + EXECUTE_EVENT) { // check results,检查消费者是否读取数据 //if(pData->consumerData[0] != i) if(memcmp(pData->producerData, pData->consumerData, sizeof(pData->producerData)) != 0) { RtPrintf("SimpleIPCProducer: Consumer failed to update data\n"); RtSetEvent(hEvent[TERMINATE_EVENT]); ExitProcess(0); } QueryPerformanceCounter(&end); //QueryPerformanceFrequency返回硬件支持的高精度计数器的频率 QueryPerformanceFrequency(&freq); //如果编译器支持64位,直接使用QuadPart来保存运行时间,单位是纳秒 roundTripNanoSecs[i].QuadPart = (LONGLONG)((end.QuadPart - start.QuadPart) / ((double)(freq.QuadPart)/ (1000000000.0L))); } } RtPrintf("SimpleIPCProducer: exit\n"); RtSetEvent(hEvent[TERMINATE_EVENT]);//发送结束事件 // give consumer a chance to exit,睡眠100毫秒 Sleep(100); for(i = 1; i < MAXREADWRITENANOSECSIZE; i ++) { RtPrintf("roundTrip time of sample %d = %d(nanoseconds)\n", i, roundTripNanoSecs[i].QuadPart); if(i > MAXREADWRITENANOSECSIZE - 1) break; } ExitProcess(0); }
int YabThreadGetCurrentThreadAffinityMask(){ return GetCurrentProcessorNumber(); }
// Overrides osThread int CpuAffinityThread::entryPoint() { #if AMDT_BUILD_TARGET == AMDT_WINDOWS_OS //set the thread affinity to the 1 core specified #pragma message ("TODO: Handle more than 64 cores") GT_ASSERT(m_core < 64); DWORD_PTR affinityMask = (DWORD_PTR)1 << m_core; SetThreadAffinityMask(osGetCurrentThreadHandle(), affinityMask); //Let the thread affinity take affect while (GetCurrentProcessorNumber() != m_core) { osSleep(1); } #else int numCPUs; osGetAmountOfLocalMachineCPUs(numCPUs); if (0 >= numCPUs) { // at least 1, as CPU_ALLOC_SIZE(0) returns 0 numCPUs = m_core + 1; } size_t size = CPU_ALLOC_SIZE(numCPUs); cpu_set_t* mask = CPU_ALLOC(numCPUs); GT_ASSERT(nullptr != mask); // Step 1, bind thread to the logical processor CPU_ZERO_S(size, mask); CPU_SET(m_core, mask); if (-1 == sched_setaffinity((pid_t)syscall(__NR_gettid), size, mask)) { CPU_FREE(mask); return -1; } // Step 2, get the thread's current mask and make sure that it // is running on the target processor cpu_set_t* currentMask = CPU_ALLOC(numCPUs); GT_ASSERT(nullptr != currentMask); int retries = 8; // just don't loop forever! do { pthread_yield(); // trigger re-scheduling CPU_ZERO_S(size, currentMask); sched_getaffinity((pid_t)syscall(__NR_gettid), size, currentMask); } while (!CPU_EQUAL_S(size, mask, currentMask) && (0 != retries--)); // OK, cleanup CPU_FREE(currentMask); CPU_FREE(mask); if (0 == retries) // not a fatal error - an offline processor can cause this { return -1; } #endif osCpuid cpuInfo; m_pSessionTopology->processor = cpuInfo.getcore(); m_pSessionTopology->numaNode = cpuInfo.getNodeId(); return 0; }
mlt_slices mlt_slices_init( int threads, int policy, int priority ) { pthread_attr_t tattr; struct sched_param param; mlt_slices ctx = (mlt_slices)calloc( 1, sizeof( struct mlt_slices_s ) ); char *env = getenv( ENV_SLICES ); #ifdef _WIN32 int cpus = GetCurrentProcessorNumber( ); #else int cpus = sysconf( _SC_NPROCESSORS_ONLN ); #endif int i, env_val = env ? atoi(env) : 0; /* check given threads count */ if ( !env || !env_val ) { if ( threads < 0 ) threads = -threads * cpus; else if ( !threads ) threads = cpus; } else if ( env_val < 0 ) { if ( threads < 0 ) threads = env_val * threads * cpus; else if ( !threads ) threads = -env_val * cpus; else threads = -env_val * threads; } else // env_val > 0 { if ( threads < 0 ) threads = env_val * threads; else if ( !threads ) threads = env_val; else threads = threads; } if ( threads > MAX_SLICES ) threads = MAX_SLICES; ctx->count = threads; /* init attributes */ pthread_mutex_init ( &ctx->cond_mutex, NULL ); pthread_cond_init ( &ctx->cond_var_job, NULL ); pthread_cond_init ( &ctx->cond_var_ready, NULL ); pthread_attr_init( &tattr ); pthread_attr_setschedpolicy( &tattr, policy ); param.sched_priority = priority; pthread_attr_setschedparam( &tattr, ¶m ); /* run worker threads */ for ( i = 0; i < ctx->count; i++ ) { pthread_create( &ctx->threads[i], &tattr, mlt_slices_worker, ctx ); pthread_setschedparam( ctx->threads[i], policy, ¶m); } pthread_attr_destroy( &tattr ); /* ready wait workers */ pthread_mutex_lock( &ctx->cond_mutex ); while ( ctx->readys != ctx->count ) pthread_cond_wait( &ctx->cond_var_ready, &ctx->cond_mutex ); pthread_mutex_unlock( &ctx->cond_mutex ); /* return context */ return ctx; }