void LockStepTaskScheduler4ThreadsLocalCore::syncThreads(const size_t localThreadID) { const unsigned int m = mode; if (localThreadID == 0) { __memory_barrier(); threadState[m][localThreadID] = 1; __memory_barrier(); while( (*(volatile unsigned int*)&threadState[m][0]) != 0x01010101 ) __pause_cpu(WAIT_CYCLES); mode = 1 - mode; __memory_barrier(); *(volatile unsigned int*)&threadState[m][0] = 0; } else { __memory_barrier(); threadState[m][localThreadID] = 1; __memory_barrier(); while (threadState[m][localThreadID] == 1) __pause_cpu(WAIT_CYCLES); } }
void LinearBarrierActive::wait (const size_t threadIndex) { if (mode == 0) { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count1[i] = 0; for (size_t i=1; i<threadCount; i++) { while (likely(count0[i] == 0)) __pause_cpu(); } mode = 1; flag1 = 0; __memory_barrier(); flag0 = 1; } else { count0[threadIndex] = 1; { while (likely(flag0 == 0)) __pause_cpu(); } } } else { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count0[i] = 0; for (size_t i=1; i<threadCount; i++) { while (likely(count1[i] == 0)) __pause_cpu(); } mode = 0; flag0 = 0; __memory_barrier(); flag1 = 1; } else { count1[threadIndex] = 1; { while (likely(flag1 == 0)) __pause_cpu(); } } } }
int main(int argc,char** argv,char** envp) { // don't remove the prints, they make the disassembly easier to understand... printf("===================================\n"); // this is a compiler reordering barrier in the intel compiler... // this does not emit any assembly... __memory_barrier(); printf("===================================\n"); // this is a machine reordering barrier, which is also a compiler barrier // (because the documentation says it is, not because every machine barrier // is also a compiler reordering barrier) __sync_synchronize(); printf("===================================\n"); // here is a compare and swap that succeeds... int a=4; int old_val=__sync_val_compare_and_swap(&a,4,5); printf("old_val is %d and a is %d\n",old_val,a); printf("===================================\n"); // here is a compare and swap that fails... a=17; old_val=__sync_val_compare_and_swap(&a,4,5); printf("old_val is %d and a is %d\n",old_val,a); printf("===================================\n"); return EXIT_SUCCESS; }
void lazyCreate(LazyGeometry* instance) { /* one thread will switch the object from the LAZY_INVALID state to the LAZY_CREATE state */ if (atomic_cmpxchg((int32_t*)&instance->state,LAZY_INVALID,LAZY_CREATE) == 0) { /* create the geometry */ printf("creating sphere %i\n",instance->userID); instance->object = rtcDeviceNewScene(g_device,RTC_SCENE_STATIC,RTC_INTERSECT1); createTriangulatedSphere(instance->object,instance->center,instance->radius); /* now switch to the LAZY_COMMIT state */ __memory_barrier(); instance->state = LAZY_COMMIT; } else { /* wait until the geometry got created */ while (atomic_cmpxchg((int32_t*)&instance->state,10,11) < LAZY_COMMIT) { // instead of actively spinning here, best use a condition to let the thread sleep, or let it help in the creation stage } } /* multiple threads might enter the rtcCommit function to jointly * build the internal data structures */ rtcCommit(instance->object); /* switch to LAZY_VALID state */ atomic_cmpxchg((int32_t*)&instance->state,LAZY_COMMIT,LAZY_VALID); }
void QuadTreeBarrier::CoreSyncData::switchModeAndSendRunSignal(const unsigned int m) { //__memory_barrier(); mode = 1 - mode; __memory_barrier(); *(volatile unsigned int*)&threadState[m][0] = 0; //__memory_barrier(); }
void QuadTreeBarrier::CoreSyncData::init() { *(volatile unsigned int*)&threadState[0][0] = 0; *(volatile unsigned int*)&threadState[1][0] = 0; mode = 0; data[0] = 0; __memory_barrier(); }
void MutexActive::lock () { while (1) { while (flag == 1) __pause(1023); // read without atomic op first if (cmpxchg(flag, 1, 0) == 0) break; } __memory_barrier(); // compiler must not schedule loads and stores around this point }
int create_task(job_t job, void *arg, unsigned long period, unsigned long delay, unsigned long prio_dead, int type, const char *name) { int i; struct task *t; for (i = 1; i < MAX_NUM_TASKS; ++i) /* skip task 0 (idle task) */ if (!taskset[i].valid) break; if (i == MAX_NUM_TASKS) return -1; t = taskset + i; t->job = job; t->arg = (arg == NULL ? t : arg); t->name = name; t->period = period; t->releasetime = ticks + delay; if (type == EDF) { /* this is an EDF task * priority is set to the absolute deadline of the first job * a small absolute deadline yields a large priority */ if (prio_dead == 0) return -1; t->priority = prio_dead + t->releasetime; t->deadline = prio_dead; } else { /* this is a fixed-priority task * to be run in background if no other EDF job is pending */ t->priority = prio_dead; t->deadline = 0; } t->released = 0; init_task_context(t, i); __memory_barrier(); irq_disable(); ++num_tasks; t->valid = 1; irq_enable(); puts("Task "); puts(name); puts(" created, TID="); putu(i); putnl(); return i; }
void QuadTreeBarrier::CoreSyncData::setThreadStateToDone(const unsigned int m, const unsigned int threadID) { __memory_barrier(); threadState[m][threadID % 4] = 1; __memory_barrier(); }
void LinearBarrierActive::syncWithReduction(const size_t threadIndex, const size_t threadCount, void (* reductionFct)(const size_t currentThreadID, const size_t childThreadID, void *ptr), void *ptr) { if (mode == 0) { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count1[i] = 0; for (size_t i=1; i<threadCount; i++) { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(count0[i] == 0)) { pause(wait_cycles); } (*reductionFct)(threadIndex,i,ptr); } mode = 1; flag1 = 0; __memory_barrier(); flag0 = 1; } else { count0[threadIndex] = 1; { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(flag0 == 0)) { pause(wait_cycles); } } } } else { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count0[i] = 0; for (size_t i=1; i<threadCount; i++) { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(count1[i] == 0)) { pause(wait_cycles); } (*reductionFct)(threadIndex,i,ptr); } mode = 0; flag0 = 0; __memory_barrier(); flag1 = 1; } else { count1[threadIndex] = 1; { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(flag1 == 0)) { pause(wait_cycles); } } } } }
void LinearBarrierActive::waitForThreads(const size_t threadIndex, const size_t threadCount) { if (mode == 0) { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count1[i] = 0; for (size_t i=1; i<threadCount; i++) { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(count0[i] == 0)) { pause(wait_cycles); } } mode = 1; flag1 = 0; __memory_barrier(); flag0 = 1; } else { count0[threadIndex] = 1; { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(flag0 == 0)) { pause(wait_cycles); } } } } else { if (threadIndex == 0) { for (size_t i=0; i<threadCount; i++) count0[i] = 0; for (size_t i=1; i<threadCount; i++) { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(count1[i] == 0)) { pause(wait_cycles); } } mode = 0; flag0 = 0; __memory_barrier(); flag1 = 1; } else { count1[threadIndex] = 1; { unsigned int wait_cycles = MIN_MIC_BARRIER_WAIT_CYCLES; while (likely(flag1 == 0)) { pause(wait_cycles); } } } } }
void MutexActive::unlock () { __memory_barrier(); // compiler must not schedule loads and stores around this point flag = 0; }