/* Utility to destroy a thread-safe global variable */ static void destroy_var(pthread_var_np_t vp) { struct slots *slots; struct value *val; if (vp == 0) return; pthread_mutex_lock(&vp->write_lock); while (vp->values != NULL) { val = atomic_read_ptr((volatile void **)&vp->values); vp->values = val->next; if (vp->dtor != NULL) vp->dtor(val->value); free(val); } while (vp->slots != NULL) { slots = atomic_read_ptr((volatile void **)&vp->slots); vp->slots = slots->next; free(slots->slot_array); free(slots); } vp->dtor = NULL; pthread_mutex_unlock(&vp->write_lock); pthread_mutex_destroy(&vp->write_lock); pthread_mutex_destroy(&vp->waiter_lock); pthread_cond_destroy(&vp->waiter_cv); /* XXX We leak var->tkey! */ }
/* * Lock-less utility that scans through logical slot array looking for a * free slot to reuse. */ static struct slot * get_free_slot(pthread_var_np_t vp) { struct slots *slots; struct slot *slot; size_t i; for (slots = atomic_read_ptr((volatile void **)&vp->slots); slots != NULL; slots = atomic_read_ptr((volatile void **)&slots->next)) { for (i = 0; i < slots->slot_count; i++) { slot = &slots->slot_array[i]; if (atomic_cas_32(&slot->in_use, 0, 1) == 0) return slot; } } return NULL; }
/* Lock-less utility to get nth slot */ static struct slot * get_slot(pthread_var_np_t vp, uint32_t slot_idx) { struct slots *slots; uint32_t nslots = 0; for (slots = atomic_read_ptr((volatile void **)&vp->slots); slots != NULL; slots = atomic_read_ptr((volatile void **)&slots->next)) { nslots += slots->slot_count; if (nslots > slot_idx) break; } if (nslots <= slot_idx) return NULL; assert(slot_idx - slots->slot_base < slots->slot_count); return &slots->slot_array[slot_idx - slots->slot_base]; }
/** * Send a signal to the other end if a tread is waiting there. * @param fsq is a pointer to the fs queue object. */ static void fsq_sigsend(struct fs_queue * fsq, enum wait4end ep) { struct signals * waitsigs; struct ksignal_param param = { .si_code = SIGKERN_FSQ, }; waitsigs = atomic_read_ptr((void **)fsq_get_sigs(fsq, ep)); if (waitsigs) ksignal_sendsig(waitsigs, _SIGKERN, ¶m); }
/** * Get the most up to date value of the given cf var. * * @param [in] var Pointer to a cf var * @param [out] res Pointer to location where the variable's value will be output * @param [out] version Pointer (may be NULL) to 64-bit integer where the current version will be output * * @return Zero on success, a system error code otherwise */ int pthread_var_get_np(pthread_var_np_t vp, void **res, uint64_t *version) { int err = 0; uint32_t slot_idx; uint32_t slots_in_use; uint64_t vers; struct slot *slot; struct value *newest; if (version == NULL) version = &vers; *version = 0; *res = NULL; if ((slot = pthread_getspecific(vp->tkey)) == NULL) { /* First time for this thread -> O(N) slow path (subscribe thread) */ slot_idx = atomic_inc_32_nv(&vp->next_slot_idx) - 1; if ((slot = get_free_slot(vp)) == NULL) { /* Slower path still: grow slots array list */ err = grow_slots(vp, slot_idx, 2); /* O(log N) */ assert(err == 0); slot = get_slot(vp, slot_idx); /* O(N) */ assert(slot != NULL); atomic_write_32(&slot->in_use, 1); } assert(slot->vp == vp); slots_in_use = atomic_inc_32_nv(&vp->slots_in_use); assert(slots_in_use > 1); if ((err = pthread_setspecific(vp->tkey, slot)) != 0) return err; } /* * Else/then fast path: one acquire read, one release write, no * free()s. O(1). * * We have to loop because we could read one value in the * conditional and that value could get freed if a writer runs * between the read in the conditional and the assignment to * slot->value with no other readers also succeeding in capturing * that value before that writer completes. * * This loop will run just once if there are no writers, and will * run as many times as writers can run between the conditional and * the body. This loop can only be an infinite loop if there's an * infinite number of writers who run with higher priority than this * thread. This is why writers yield() before dropping their write * lock. * * Note that in the body of this loop we can write a soon-to-become- * invalid value to our slot because many writers can write between * the loop condition and the body. The writer has to jump through * some hoops to deal with this. */ while (atomic_read_ptr((volatile void **)&slot->value) != (newest = atomic_read_ptr((volatile void **)&vp->values))) atomic_write_ptr((volatile void **)&slot->value, newest); if (newest != NULL) { *res = newest->value; *version = newest->version; } return 0; }
/* Lock-less utility to grow the logical slot array */ static int grow_slots(pthread_var_np_t vp, uint32_t slot_idx, int tries) { uint32_t nslots = 0; uint32_t additions; uint32_t i; volatile struct slots **slotsp; struct slots *new_slots; for (slotsp = &vp->slots; atomic_read_ptr((volatile void **)slotsp) != NULL; slotsp = &((struct slots *)atomic_read_ptr((volatile void **)slotsp))->next) nslots += (*slotsp)->slot_count; if (nslots > slot_idx) return 0; if (tries < 1) return EAGAIN; /* shouldn't happen; XXX assert? */ if ((new_slots = calloc(1, sizeof(*new_slots))) == NULL) return errno; additions = (nslots == 0) ? 4 : nslots + nslots / 2; while (slot_idx >= nslots + additions) { /* * In this case we're racing with other readers to grow the slot * list, but if we lose the race then our slot_idx may not be * covered in the list as grown by the winner. We may have to * try again. * * There's always a winner, so eventually we won't need to try * again. */ additions += additions + additions / 2; tries++; } assert(slot_idx - nslots < additions); new_slots->slot_array = calloc(additions, sizeof(*new_slots->slot_array)); if (new_slots->slot_array == NULL) { free(new_slots); return errno; } new_slots->slot_count = additions; new_slots->slot_base = nslots; for (i = 0; i < additions; i++) { new_slots->slot_array[i].in_use = 0; new_slots->slot_array[i].value = 0; new_slots->slot_array[i].vp = vp; } /* Reserve the slot we wanted (new slots not added yet) */ atomic_write_32(&new_slots->slot_array[slot_idx - nslots].in_use, 1); /* Add new slots to logical array of slots */ if (atomic_cas_ptr((volatile void **)slotsp, NULL, new_slots) != NULL) { /* * We lost the race to grow the array. The index we wanted is * not guaranteed to be covered by the array as grown by the * winner. We fall through to recurse to repeat. * * See commentary above where tries is incremented. */ free(new_slots->slot_array); free(new_slots); } /* * If we won the race to grow the array then the index we wanted is * guaranteed to be present and recursing here is cheap. If we lost * the race we need to retry. We could goto the top of the function * though, just in case there's no tail call optimization. */ return grow_slots(vp, slot_idx, tries - 1); }
/** * Set new data on a thread-safe global variable * * @param [in] var Pointer to thread-safe global variable * @param [in] cfdata New value for the thread-safe global variable * @param [out] new_version New version number * * @return 0 on success, or a system error such as ENOMEM. */ int pthread_var_set_np(pthread_var_np_t vp, void *cfdata, uint64_t *new_version) { int err; size_t i; struct var *v; struct vwrapper *old_wrapper = NULL; struct vwrapper *wrapper; struct vwrapper *tmp; uint64_t vers; uint64_t tmp_version; uint64_t nref; if (cfdata == NULL) return EINVAL; if (new_version == NULL) new_version = &vers; *new_version = 0; /* Build a wrapper for the new value */ if ((wrapper = calloc(1, sizeof(*wrapper))) == NULL) return errno; /* * The var itself holds a reference to the current value, thus its * nref starts at 1, but that is made so further below. */ wrapper->dtor = vp->dtor; wrapper->nref = 0; wrapper->ptr = cfdata; if ((err = pthread_mutex_lock(&vp->write_lock)) != 0) { free(wrapper); return err; } /* vp->next_version is stable because we hold the write_lock */ *new_version = wrapper->version = atomic_read_64(&vp->next_version); /* Grab the next slot */ v = vp->vars[(*new_version + 1) & 0x1].other; old_wrapper = atomic_read_ptr((volatile void **)&v->wrapper); if (*new_version == 0) { /* This is the first write; set wrapper on both slots */ for (i = 0; i < sizeof(vp->vars)/sizeof(vp->vars[0]); i++) { v = &vp->vars[i]; nref = atomic_inc_32_nv(&wrapper->nref); v->version = 0; tmp = atomic_cas_ptr((volatile void **)&v->wrapper, old_wrapper, wrapper); assert(tmp == old_wrapper && tmp == NULL); } assert(nref > 1); tmp_version = atomic_inc_64_nv(&vp->next_version); assert(tmp_version == 1); /* Signal waiters */ (void) pthread_mutex_lock(&vp->waiter_lock); (void) pthread_cond_signal(&vp->waiter_cv); /* no thundering herd */ (void) pthread_mutex_unlock(&vp->waiter_lock); return pthread_mutex_unlock(&vp->write_lock); } nref = atomic_inc_32_nv(&wrapper->nref); assert(nref == 1); assert(old_wrapper != NULL && old_wrapper->nref > 0); /* Wait until that slot is quiescent before mutating it */ if ((err = pthread_mutex_lock(&vp->cv_lock)) != 0) { (void) pthread_mutex_unlock(&vp->write_lock); free(wrapper); return err; } while (atomic_read_32(&v->nreaders) > 0) { /* * We have a separate lock for writing vs. waiting so that no * other writer can steal our march. All writers will enter, * all writers will finish. We got here by winning the race for * the writer lock, so we'll hold onto it, and thus avoid having * to restart here. */ if ((err = pthread_cond_wait(&vp->cv, &vp->cv_lock)) != 0) { (void) pthread_mutex_unlock(&vp->cv_lock); (void) pthread_mutex_unlock(&vp->write_lock); free(wrapper); return err; } } if ((err = pthread_mutex_unlock(&vp->cv_lock)) != 0) { (void) pthread_mutex_unlock(&vp->write_lock); free(wrapper); return err; } /* Update that now quiescent slot; these are the release operations */ tmp = atomic_cas_ptr((volatile void **)&v->wrapper, old_wrapper, wrapper); assert(tmp == old_wrapper); v->version = *new_version; tmp_version = atomic_inc_64_nv(&vp->next_version); assert(tmp_version == *new_version + 1); assert(v->version > v->other->version); /* Release the old cf */ assert(old_wrapper != NULL && old_wrapper->nref > 0); wrapper_free(old_wrapper); /* Done */ return pthread_mutex_unlock(&vp->write_lock); }
/** * Get the most up to date value of the given cf var. * * @param [in] var Pointer to a cf var * @param [out] res Pointer to location where the variable's value will be output * @param [out] version Pointer (may be NULL) to 64-bit integer where the current version will be output * * @return Zero on success, a system error code otherwise */ int pthread_var_get_np(pthread_var_np_t vp, void **res, uint64_t *version) { int err = 0; int err2 = 0; uint32_t nref; struct var *v; uint64_t vers, vers2; struct vwrapper *wrapper; int got_both_slots = 0; /* Whether we incremented both slots' nreaders */ int do_signal_writer = 0; if (version == NULL) version = &vers; *version = 0; *res = NULL; if ((wrapper = pthread_getspecific(vp->tkey)) != NULL && wrapper->version == atomic_read_64(&vp->next_version) - 1) { /* Fast path */ *version = wrapper->version; *res = wrapper->ptr; return 0; } /* Get the current next version */ *version = atomic_read_64(&vp->next_version); if (*version == 0) { /* Not set yet */ assert(*version == 0 || *res != NULL); return 0; } (*version)--; /* make it the current version */ /* Get what we hope is still the current slot */ v = &vp->vars[(*version) & 0x1]; /* * We picked a slot, but we could just have lost against one or more * writers. So far nothing we've done would block any number of * them. * * We increment nreaders for the slot we picked to keep out * subsequent writers; we can then lose one more race at most. * * But we still need to learn whether we lost the race. */ (void) atomic_inc_32_nv(&v->nreaders); /* See if we won any race */ if ((vers2 = atomic_read_64(&vp->next_version)) == *version) { /* * We won, or didn't race at all. We can now safely * increment nref for the wrapped value in the current slot. * * We can still have lost one race, but this slot is now ours. * * The key here is that we updated nreaders for one slot, * which might not keep the one writer we might have been * racing with from making the then current slot the now * previous slot, but because writers are serialized it will * keep the next writer from touching the slot we thought * was the current slot. Thus here we either have the * current slot or the previous slot, and either way it's OK * for us to grab a reference to the wrapped value in the * slot we took. */ goto got_a_slot; } /* * We may have incremented nreaders for the wrong slot. Any number * of writers could have written between our getting * vp->next_version the first time, and our incrementing nreaders * for the corresponding slot. We can't incref the nref of the * value wrapper found at the slot we picked. We first have to find * the correct current slot, or ensure that no writer will release * the other slot. * * We increment the reader count on the other slot, but we do it * *before* decrementing the reader count on this one. This should * guarantee that we find the other one present by keeping * subsequent writers (subsequent to the second writer we might be * racing with) out of both slots for the time between the update of * one slot's nreaders and the other's. * * We then have to repeat the race loss detection. We need only do * this at most once. */ atomic_inc_32_nv(&v->other->nreaders); /* We hold both slots */ got_both_slots = 1; /* * vp->next_version can now increment by at most one, and we're * guaranteed to have one usable slot (whichever one we _now_ see as * the current slot, and which can still become the previous slot). */ vers2 = atomic_read_64(&vp->next_version); assert(vers2 > *version); *version = vers2 - 1; /* Select a slot that looks current in this thread */ v = &vp->vars[(*version) & 0x1]; got_a_slot: if (v->wrapper == NULL) { /* Whoa, nothing there; shouldn't happen; assert? */ assert(*version == 0); assert(*version == 0 || *res != NULL); if (got_both_slots && atomic_dec_32_nv(&v->other->nreaders) == 0) { /* Last reader of a slot -> signal writer. */ do_signal_writer = 1; } /* * Optimization TODO: * * If vp->next_version hasn't changed since earlier then we * should be able to avoid having to signal a writer when we * decrement what we know is the current slot's nreaders to * zero. This should read: * * if ((atomic_dec_32_nv(&v->nreaders) == 0 && * atomic_read_64(&vp->next_version) == vers2) || * do_signal_writer) * err2 = signal_writer(vp); */ if (atomic_dec_32_nv(&v->nreaders) == 0 || do_signal_writer) err2 = signal_writer(vp); return (err2 == 0) ? err : err2; } assert(vers2 == atomic_read_64(&vp->next_version) || (vers2 + 1) == atomic_read_64(&vp->next_version)); /* Take the wrapped value for the slot we chose */ nref = atomic_inc_32_nv(&v->wrapper->nref); assert(nref > 1); *version = v->wrapper->version; *res = atomic_read_ptr((volatile void **)&v->wrapper->ptr); assert(*res != NULL); /* * We'll release the previous wrapper and save the new one in * vp->tkey below, after releasing the slot it came from. */ wrapper = v->wrapper; /* * Release the slot(s) and signal any possible waiting writer if * either slot's nreaders drops to zero (that's what the writer will * be waiting for). * * The one blocking operation done by readers happens in * signal_writer(), but that one blocking operation is for a lock * that the writer will have or will soon have released, so it's * a practically uncontended blocking operation. */ if (got_both_slots && atomic_dec_32_nv(&v->other->nreaders) == 0) do_signal_writer = 1; if (atomic_dec_32_nv(&v->nreaders) == 0 || do_signal_writer) err2 = signal_writer(vp); /* * Release the value previously read in this thread, if any. * * Note that we call free() here, which means that we might take a * lock in free(). The application's value destructor also can do * the same. * * TODO We could use a lock-less queue/stack to queue up wrappers * for destruction by writers, then readers could be even more * light-weight. */ if (*res != pthread_getspecific(vp->tkey)) pthread_var_release_np(vp); /* Recall this value we just read */ err = pthread_setspecific(vp->tkey, wrapper); return (err2 == 0) ? err : err2; }
/* Mark half of mark-and-sweep GC */ static volatile struct value * mark_values(pthread_var_np_t vp) { volatile struct value **old_values_array; volatile struct value * volatile *p; volatile struct value *old_values = NULL; volatile struct value *v, *v2; volatile struct slots *slots; struct slot *slot; size_t i; old_values_array = calloc(vp->nvalues, sizeof(old_values_array[0])); /* * XXX There should be no need to atomically read vp->values here, * as we are the writer and hold a lock. */ for (i = 0, v = atomic_read_ptr((volatile void **)&vp->values); i < vp->nvalues && v != NULL; v = v->next, i++) old_values_array[i] = v; assert(i == vp->nvalues && v == NULL); qsort(old_values_array, vp->nvalues, sizeof(old_values_array[0]), value_cmp); /* Assert that qsort() sorted */ for (i = 1; i < vp->nvalues; i++) assert(old_values_array[i-1] < old_values_array[i]); /* * Mark. This is O(N log(N)) where N is the number of subscribed * threads, but with the optimizations below, and with a bit of * luck, this is more like O(N) than like O(N log(N)) */ vp->values->referenced = 1; /* curr value is always in use */ for (i = 0, slots = atomic_read_ptr((volatile void **)&vp->slots); slots != NULL; i++) { assert(slots != NULL); assert(i >= slots->slot_base); assert(i <= slots->slot_base + slots->slot_count); if (i == slots->slot_base + slots->slot_count) { slots = slots->next; if (slots == NULL) break; } assert(slots->slot_count > 0); assert(i >= slots->slot_base); assert(i < slots->slot_base + slots->slot_count); slot = &slots->slot_array[i - slots->slot_base]; v = atomic_read_ptr((volatile void **)&slot->value); /* * Optimization: ignore slots with a NULL value. The owner of * that slot may be about to write a value that we're about to * free, but they will notice that multiple writers went by and * re-read vp->value. * * Also ignore slots with the current value. */ if (v == NULL || v == vp->values) continue; /* * We can't just dereference v->referenced because there's a * window in the get-side where we can set the slot's value to * an immediately-after free()'ed value, and we could be seeing * such a value, which means we can't dereference it. * * Instead we search for v in the old_values_array[]. If it's * found then it's safe to write to v->referenced because it is * stable through the execution of this function and won't be * free()'ed until after. */ if ((value_binary_search(old_values_array, vp->nvalues, v)) != NULL) { v->referenced = 1; /* so v is valid, safe to deref */ continue; } for (v2 = vp->values; v2 != NULL; v2 = v2->next) assert(v2 != v); } free(old_values_array); /* Sweep; O(N) where N is the number of referenced values */ for (p = &vp->values; *p != NULL;) { v = *p; if (!v->referenced) { assert(v != vp->values); /* Remove from list and setup to continue at v->next */ *p = v->next; /* Prepend v to old_values list */ v->next = old_values; old_values = v; vp->nvalues--; /* Sweep the remainder of the list */ continue; } /* Step into this value's next sub-list */ v->referenced = 0; p = &v->next; assert(p != &vp->values); } errno = 0; return old_values; }
/** * Set new data on a thread-safe global variable * * @param [in] var Pointer to thread-safe global variable * @param [in] cfdata New value for the thread-safe global variable * @param [out] new_version New version number * * @return 0 on success, or a system error such as ENOMEM. */ int pthread_var_set_np(pthread_var_np_t vp, void *data, uint64_t *new_version) { struct value *new_value; volatile struct value *old_values = NULL; volatile struct value *value; uint64_t vers; int err; if (new_version == NULL) new_version = &vers; *new_version = 0; if ((new_value = calloc(1, sizeof(*new_value))) == NULL) return errno; if ((err = pthread_mutex_lock(&vp->write_lock)) != 0) { free(new_value); return err; } /* * No allocations/free()s done with write lock held -> higher write * throughput. */ new_value->value = data; new_value->next = atomic_read_ptr((volatile void **)&vp->values); if (new_value->next == NULL) new_value->version = 1; else new_value->version = new_value->next->version + 1; *new_version = new_value->version; /* Publish the new value */ atomic_write_ptr((volatile void **)&vp->values, new_value); vp->nvalues++; if (*new_version < 2) { /* Signal waiters */ (void) pthread_mutex_lock(&vp->waiter_lock); (void) pthread_cond_signal(&vp->waiter_cv); /* no thundering herd */ (void) pthread_mutex_unlock(&vp->waiter_lock); } /* Now comes the slow part: garbage collect vp->values */ old_values = mark_values(vp); /* * Because readers must loop, and could be kept from reading by a * long sequence of back-to-back higher-priority writers (presumably * all threads of a process will run with the same priority, but we * don't know that here), we yield the CPU before releasing the * write lock. Hopefully we yield to a reader. */ yield(); err = pthread_mutex_unlock(&vp->write_lock); /* Free old values now, holding no locks */ for (value = old_values; value != NULL; value = old_values) { if (vp->dtor) vp->dtor(value->value); old_values = value->next; free((void *)value); } return err; }
void* run(void* arg) { unsigned long i; char iamreader; int key; long t = (long) arg; long nbn;//same cache line for the best rand! double start, end; node_t **hd; lock_t *lck; set_affinity(t); /************ init *****************/ thd_ins = 0; thd_del = 0; thd_sea = 0; nbmallocs = 0; nbretry = 0; nbrelink = 0; nbreprev = 0; nbfl = 0; thread=t; thd_nbupdaters=nbupdaters; setsignals(); iamreader = t >= nbupdaters ? 1 : 0; if(!iamreader) prealloc(); mysrand(t^time(NULL)); /************** init done **************/ /************** barrier ****************/ atomic_xadd4(&ready, 1); while(!go) memory_barrier(); /******************* START ****************/ start = d_gettimeofday(); #ifdef RCU rcu_register_thread(); #endif i=0; do{ key = myrand()%nbkeys; //key = rand_r(&thd_seed)%nbthreads; //key = (t+key)%nbkeys; //key = random() % nbkeys; //if(i%100000) printf("%d %d\n", thread, key); get_buckets(key, &hd, &lck); if(iamreader) { thd_sea += search(key, hd, lck); if(i>= NB_TEST) break; } else { if(i%2) thd_ins += insert(key, hd, lck); else thd_del += delete(key, hd, lck); if(done) { //printf("writer stopped\n"); break; } } //if(!(i%10000)) //printf("%d loop %d\n", t, i); #ifdef RCU_QSBR #if ((defined RCU_QSBR_ACCELERATE) || (defined RCU_Q10)) #ifdef RCU_Q10 if(!(i%10)) #else if(!(i%100)) #endif #endif rcu_quiescent_state(); #endif }while(++i); #ifdef RCU //if(!iamreader) rcu_barrier(); //printf("iamreader %d, ops %d\n", iamreader, i); rcu_unregister_thread(); #endif end = d_gettimeofday(); /******************* END ****************/ //number of ops done thd_ops[t] = i; //printf("nbmallocs %g\n", nbmallocs); thd_mallocs[t] = nbmallocs; thd_retry[t] = nbretry; thd_relink[t] = nbrelink; thd_reprev[t] = nbreprev; //if(t==0) printf("%lu | nbblocked %g\n", t, nbblockeds); #ifdef RCU thd_blockeds[t] = atomic_read_ptr(&rcublockeds); #else thd_blockeds[t] = nbblockeds; #endif //average time per ops avg_time[t] = (((end - start))/i); //total time thd_time[t] = (end - start); suc_ins[t] = thd_ins; suc_sea[t] = thd_sea; suc_del[t] = thd_del; return NULL; }