void mcs_acquire(mcs_lock *L, mcs_qnode_ptr I) { I->next = NULL; #ifndef __tile__ mcs_qnode_ptr pred = (mcs_qnode*) SWAP_PTR((volatile void*) L, (void*) I); #else MEM_BARRIER; mcs_qnode_ptr pred = (mcs_qnode*) SWAP_PTR( L, I); #endif if (pred == NULL) /* lock was free */ return; I->waiting = 1; // word on which to spin MEM_BARRIER; pred->next = I; // make pred point to me #if defined(OPTERON_OPTIMIZE) PREFETCHW(I); #endif /* OPTERON_OPTIMIZE */ while (I->waiting != 0) { #ifndef __MIC__ PAUSE; #endif #if defined(OPTERON_OPTIMIZE) pause_rep(23); PREFETCHW(I); #endif /* OPTERON_OPTIMIZE */ } }
static int __clhepfl_mutex_lock(clhepfl_mutex_t *impl, clhepfl_context_t *me) { clhepfl_node_t *p = me->current; p->spin = LOCKED; MEMORY_BARRIER(); // The thread enqueues clhepfl_node_t *pred = xchg_64((void *)&impl->tail, (void *)p); if (pred == NULL) return 0; // If the previous thread was locked, we wait on its context PREFETCHW(pred); while (pred->spin == LOCKED) { CPU_PAUSE(); pause_rep(REP_VAL); PREFETCHW(pred); } impl->head = p; COMPILER_BARRIER(); // We take the context of the previous thread me->current = pred; return 0; }
int ttasepfl_mutex_lock(ttasepfl_mutex_t *impl, ttasepfl_context_t *me) { volatile uint8_t *l = &(impl->spin_lock); uint32_t delay; while (1) { PREFETCHW(l); while ((*l) != UNLOCKED) { PREFETCHW(l); } if (l_tas_uint8(&(impl->spin_lock)) == UNLOCKED) { #if COND_VAR int ret = REAL(pthread_mutex_lock)(&impl->posix_lock); assert(ret == 0); #endif return 0; } else { // backoff delay = my_random(&(ttas_seeds[0]), &(ttas_seeds[1]), &(ttas_seeds[2])) % (me->limit); me->limit = MAX_DELAY > 2 * (me->limit) ? 2 * (me->limit) : MAX_DELAY; cdelay(delay); } } }
volatile qnode * hclh_acquire(local_queue *lq, global_queue *gq, qnode *my_qnode) { volatile qnode* my_pred; do { #if defined(OPTERON_OPTIMIZE) PREFETCHW(lq); #endif /* OPTERON_OPTIMIZE */ my_pred = *lq; } while (CAS_PTR(lq, my_pred, my_qnode)!=my_pred); if (my_pred != NULL) { uint16_t i_own_lock = wait_for_grant_or_cluster_master(my_pred, my_qnode->fields.cluster_id); if (i_own_lock) { return my_pred; } } PAUSE; PAUSE; volatile qnode * local_tail; do { #if defined(OPTERON_OPTIMIZE) PREFETCHW(gq); PREFETCHW(lq); #endif /* OPTERON_OPTIMIZE */ my_pred = *gq; local_tail = *lq; PAUSE; } while(CAS_PTR(gq, my_pred, local_tail)!=my_pred); local_tail->fields.tail_when_spliced = 1; #if defined(OPTERON_OPTIMIZE) PREFETCHW(my_pred); #endif /* OPTERON_OPTIMIZE */ while (my_pred->fields.successor_must_wait) { PAUSE; #if defined(OPTERON_OPTIMIZE) pause_rep(23); PREFETCHW(my_pred); #endif /* OPTERON_OPTIMIZE */ } return my_pred; }
qnode* hclh_release(qnode *my_qnode, qnode * my_pred) { my_qnode->fields.successor_must_wait = 0; qnode* pr = my_pred; qnode new_node; new_node.data=0; new_node.fields.cluster_id=hclh_node_mine; new_node.fields.successor_must_wait = 1; new_node.fields.tail_when_spliced=0; #if defined(OPTERON_OPTIMIZE) PREFETCHW(pr); #endif /* OPTERON_OPTIMIZE */ uint32_t old_data = pr->data; while (CAS_U32(&pr->data,old_data,new_node.data)!=old_data) { old_data=pr->data; PAUSE; #if defined(OPTERON_OPTIMIZE) PREFETCHW(pr); #endif /* OPTERON_OPTIMIZE */ } my_qnode=pr; return my_qnode; }
void* ssmem_alloc(ssmem_allocator_t* a, size_t size) { void* m = NULL; #ifdef TIGHT_ALLOC m = (void*) malloc(size); #else /* 1st try to use from the collected memory */ ssmem_free_set_t* cs = a->collected_set_list; if (cs != NULL) { m = (void*) cs->set[--cs->curr]; PREFETCHW(m); if (cs->curr <= 0) { a->collected_set_list = cs->set_next; a->collected_set_num--; ssmem_free_set_make_avail(a, cs); } } else { if ((a->mem_curr + size) >= a->mem_size) { /* printf("[ALLOC] out of mem, need to allocate\n"); */ a->mem = (void*) memalign(CACHE_LINE_SIZE, a->mem_size); assert(a->mem != NULL); a->mem_curr = 0; a->tot_size += a->mem_size; a->mem_chunks = ssmem_list_node_new(a->mem, a->mem_chunks); } m = a->mem + a->mem_curr; a->mem_curr += size; } #endif #if SSMEM_TS_INCR_ON == SSMEM_TS_INCR_ON_ALLOC || SSMEM_TS_INCR_ON == SSMEM_TS_INCR_ON_BOTH ssmem_ts_next(); #endif return m; }
void mcs_release(mcs_lock *L, mcs_qnode_ptr I) { #ifdef __tile__ MEM_BARRIER; #endif mcs_qnode_ptr succ; #if defined(OPTERON_OPTIMIZE) PREFETCHW(I); #endif /* OPTERON_OPTIMIZE */ if (!(succ = I->next)) /* I seem to have no succ. */ { /* try to fix global pointer */ if (CAS_PTR(L, I, NULL) == I) return; do { succ = I->next; PAUSE; } while (!succ); // wait for successor } succ->waiting = 0; }
int ticketepfl_mutex_lock(ticketepfl_mutex_t *impl, ticketepfl_context_t *UNUSED(me)) { // Acquire the local lock uint32_t my_ticket = __sync_add_and_fetch(&impl->u.s.request, 1); uint32_t wait = TICKET_BASE_WAIT; uint32_t distance_prev = 1; while (1) { PREFETCHW(&impl->u.u); uint32_t cur = impl->u.s.grant; if (cur == my_ticket) { break; } uint32_t distance = sub_abs(cur, my_ticket); if (distance > 1) { if (distance != distance_prev) { distance_prev = distance; wait = TICKET_BASE_WAIT; } nop_rep(distance * wait); /* wait = (wait + TICKET_BASE_WAIT) & TICKET_MAX_WAIT; */ } else { nop_rep(TICKET_WAIT_NEXT); } if (distance > 20) { sched_yield(); /* pthread_yield(); */ } } #if COND_VAR int ret = REAL(pthread_mutex_lock)(&impl->posix_lock); assert(ret == 0); #endif return 0; }
void* ssmem_alloc(ssmem_allocator_t* a, size_t size) { void* m = NULL; /* 1st try to use from the collected memory */ ssmem_free_set_t* cs = a->collected_set_list; if (cs != NULL) { m = (void*) cs->set[--cs->curr]; PREFETCHW(m); if (cs->curr <= 0) { a->collected_set_list = cs->set_next; a->collected_set_num--; ssmem_free_set_make_avail(a, cs); } } else { if ((a->mem_curr + size) >= a->mem_size) { #if SSMEM_MEM_SIZE_DOUBLE == 1 a->mem_size <<= 1; if (a->mem_size > SSMEM_MEM_SIZE_MAX) { a->mem_size = SSMEM_MEM_SIZE_MAX; } #endif /* printf("[ALLOC] out of mem, need to allocate (chunk = %llu MB)\n", */ /* a->mem_size / (1LL<<20)); */ if (size > a->mem_size) { /* printf("[ALLOC] asking for large mem. chunk\n"); */ while (a->mem_size < size) { if (a->mem_size > SSMEM_MEM_SIZE_MAX) { fprintf(stderr, "[ALLOC] asking for memory chunk larger than max (%llu MB) \n", SSMEM_MEM_SIZE_MAX / (1024 * 1024LL)); assert(a->mem_size <= SSMEM_MEM_SIZE_MAX); } a->mem_size <<= 1; } /* printf("[ALLOC] new mem size chunk is %llu MB\n", a->mem_size / (1024 * 1024LL)); */ } #if SSMEM_TRANSPARENT_HUGE_PAGES int ret = posix_memalign(&a->mem, CACHE_LINE_SIZE, a->mem_size); assert(ret == 0); #else a->mem = (void*) memalign(CACHE_LINE_SIZE, a->mem_size); #endif assert(a->mem != NULL); #if SSMEM_ZERO_MEMORY == 1 memset(a->mem, 0, a->mem_size); #endif a->mem_curr = 0; a->tot_size += a->mem_size; a->mem_chunks = ssmem_list_node_new(a->mem, a->mem_chunks); } m = a->mem + a->mem_curr; a->mem_curr += size; } #if SSMEM_TS_INCR_ON == SSMEM_TS_INCR_ON_ALLOC || SSMEM_TS_INCR_ON == SSMEM_TS_INCR_ON_BOTH ssmem_ts_next(); #endif return m; }
void __ticketepfl_mutex_unlock(ticketepfl_mutex_t *impl) { PREFETCHW(&impl->u.u); COMPILER_BARRIER(); impl->u.s.grant++; }