int main(void) { uint64_t s_b, e_b, i; ck_rwlock_t rwlock = CK_RWLOCK_INITIALIZER; for (i = 0; i < STEPS; i++) { ck_rwlock_write_lock(&rwlock); ck_rwlock_write_unlock(&rwlock); } s_b = rdtsc(); for (i = 0; i < STEPS; i++) { ck_rwlock_write_lock(&rwlock); ck_rwlock_write_unlock(&rwlock); } e_b = rdtsc(); printf("WRITE: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); for (i = 0; i < STEPS; i++) { ck_rwlock_read_lock(&rwlock); ck_rwlock_read_unlock(&rwlock); } s_b = rdtsc(); for (i = 0; i < STEPS; i++) { ck_rwlock_read_lock(&rwlock); ck_rwlock_read_unlock(&rwlock); } e_b = rdtsc(); printf("READ: rwlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); return (0); }
datum_t * hash_delete (datum_t *key, hash_t * hash) { size_t i; node_t *bucket, *last = NULL; i = hashval(key,hash); ck_rwlock_write_lock(&hash->lock[i]); bucket = &hash->node[i]; if (bucket->key == NULL ) { ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } for (; bucket != NULL; last = bucket, bucket = bucket->next) { node_t tmp; if (bucket == &hash->node[i]) { tmp.key = bucket->key; tmp.val = bucket->val; if (bucket->next) { bucket->key = bucket->next->key; bucket->val = bucket->next->val; bucket->next = bucket->next->next; } else { memset(bucket, 0, sizeof(*bucket)); } datum_free(tmp.key); ck_rwlock_write_unlock(&hash->lock[i]); } else { last->next = bucket->next; datum_free(bucket->key); tmp.val = bucket->val; free(bucket); ck_rwlock_write_unlock(&hash->lock[i]); } return tmp.val; } ck_rwlock_write_unlock(&hash->lock[i]); return NULL; }
ph_hook_point_t *ph_hook_point_get(ph_string_t *name, bool create) { ph_hook_point_t *hp = 0; ck_rwlock_read_lock(&rwlock); { ph_ht_lookup(&hook_hash, &name, &hp, false); } ck_rwlock_read_unlock(&rwlock); if (hp || !create) { return hp; } ck_rwlock_write_lock(&rwlock); { // Look again: someone may have populated while we were unlocked ph_ht_lookup(&hook_hash, &name, &hp, false); if (!hp) { hp = ph_mem_alloc(mt.hookpoint); if (hp) { if (ph_ht_set(&hook_hash, &name, &hp) != PH_OK) { ph_mem_free(mt.hookpoint, hp); hp = NULL; } } } } ck_rwlock_write_unlock(&rwlock); return hp; }
int _segment_list_close_segment(struct segment_list *segment_list, uint32_t segment_number) { // Take out a write lock so we are mutually exclusive with get_segment ck_rwlock_write_lock(segment_list->lock); segment_t *segment = __segment_number_to_segment(segment_list, segment_number); // Check the refcount and fail to close the segment if the refcount is not zero if (ck_pr_load_32(&segment->refcount) != 0) { // TODO: More specific error ck_rwlock_write_unlock(segment_list->lock); return -1; } // Do not close a segment twice if (segment->state == CLOSED) { // TODO: More specific error ck_rwlock_write_unlock(segment_list->lock); return -1; } // This function may be called when a segment is in the FREE state or the READING state. This // can happen in the lock free synchronization built on top of this structure, since a slow // thread with an old segment number might get here after other threads have advanced past this // segment. Since this is valid, just return an error so that the slow thread can recover. if (segment->state != WRITING) { // TODO: More specific error ck_rwlock_write_unlock(segment_list->lock); return -1; } // Destroy the segment, but close the store rather than destroying it because we don't want to // delete the on disk store files ensure(_free_segment_inlock(segment_list, segment_number, false/*destroy_store*/) == 0, "Failed to internally destroy segment in close segment function"); segment->state = CLOSED; ck_rwlock_write_unlock(segment_list->lock); return 0; }
static ph_result_t do_register(ph_hook_point_t *hp, ph_hook_func func, void *closure, int8_t pri, ph_hook_unreg_func unreg) { ph_hook_point_head_t *old_head, *new_head; ph_result_t res = PH_ERR; uint16_t num_items = 0; ck_rwlock_write_lock(&rwlock); { old_head = hp->head; if (old_head) { num_items = old_head->nitems; } // num_items is 1-less than the number we want, but the head struct // has 1 element embedded, so we're fine to multiply it out here new_head = ph_mem_alloc_size(mt.head, sizeof(*new_head) + (sizeof(ph_hook_item_t) * (num_items))); if (!new_head) { goto done; } new_head->nitems = num_items + 1; // Copy old data in if (old_head) { memcpy(new_head->items, old_head->items, old_head->nitems * sizeof(ph_hook_item_t)); } new_head->items[num_items].closure = closure; new_head->items[num_items].func = func; new_head->items[num_items].pri = pri; new_head->items[num_items].unreg = unreg; qsort(new_head->items, new_head->nitems, sizeof(ph_hook_item_t), compare_item); hp->head = new_head; if (old_head) { ph_thread_epoch_defer(&old_head->entry, free_head); } res = PH_OK; } done: ck_rwlock_write_unlock(&rwlock); return res; }
// TODO: Decide how to handle the flags. Should they be passed to the underlying store? int _segment_list_allocate_segment(segment_list_t *segment_list, uint32_t segment_number) { ck_rwlock_write_lock(segment_list->lock); // Make sure the list is not full // TODO: Return an actual error here ensure(!__is_segment_list_full_inlock(segment_list), "Attempted to allocate segment in full list"); segment_t *segment = __segment_number_to_segment(segment_list, segment_number); // Make sure we are not trying to allocate a segment past our current head. That case is a // programming error. The case where the segment number is much less than the head, however, // can happen during normal multithreaded operation if a slow thread calls this function with an // old segment number. Assert in the former case, but return an error in the latter. ensure(segment_list->head >= segment_number, "Attempted to allocate a segment past the next sequential segment"); // Make sure we are allocating the next sequential segment if (segment_list->head != segment_number) { ck_rwlock_write_unlock(segment_list->lock); return -1; } ensure(segment->state == FREE, "Attempted to allocate segment not in the FREE state"); ensure(_allocate_segment_inlock(segment_list, segment_number, false/*reopen_store*/) == 0, "Failed to allocate segment"); // Move up the head, effectively allocating the segment segment_list->head++; // Newly allocate segments are in the "WRITING" state segment->state = WRITING; ck_rwlock_write_unlock(segment_list->lock); return 0; }
segment_t* _segment_list_get_segment_for_reading(struct segment_list *segment_list, uint32_t segment_number) { // We have to take a write lock because we might be allocating a segment (reopening it from an // existing file). ck_rwlock_write_lock(segment_list->lock); segment_t *segment = __segment_number_to_segment(segment_list, segment_number); // This segment is outside the list // TODO: More specific error handling if (!__is_segment_number_in_segment_list_inlock(segment_list, segment_number)) { segment = NULL; goto end; } // If this segment is free, we may have just been too slow, so return NULL rather than asserting // to give the caller an opportunity to recover // TODO: More specific error handling if (segment->state == FREE) { segment = NULL; goto end; } // We should only be attempting to read from a segment in the READING or CLOSED states // If a user is attempting to get a segment for reading that is in the WRITING state, that is a // programming error, since it cannot happen as a race condition ensure(segment->state == READING || segment->state == CLOSED, "Attempted to get segment for reading not in the READING or CLOSED states"); // If this segment is closed, reopen it if (segment->state == CLOSED) { // Allocate the segment and reopen the existing store file ensure(_allocate_segment_inlock(segment_list, segment_number, true/*reopen_store*/) == 0, "Failed to allocate segment, from existing file"); // Reopened segments are in the READING state segment->state = READING; } // Increment the refcount of the newly initialized segment since we are returning it ck_pr_inc_32(&segment->refcount); end: ck_rwlock_write_unlock(segment_list->lock); return segment; }
/* * This function attempts to free segments, and returns the number of the segment up to which we * have freed. These semantics are a little strange, because segment numbers are uint32_t and our * first segment is zero. We only want this function to return zero when we have not freed any * segments, but if we returned the last segment we freed we would have to return zero after we've * freed segment 0. As it is now, we will return 1 in that case, because having freed segment 0 * means we've freed up to segment 1. */ uint32_t _segment_list_free_segments(struct segment_list *segment_list, uint32_t segment_number, bool destroy_store) { ck_rwlock_write_lock(segment_list->lock); // TODO: Think more carefully about what this function can return uint32_t freed_up_to = segment_list->tail; // Try to free as many segments as we can up to the provided segment number while (segment_list->tail <= segment_number && segment_list->head != segment_list->tail) { segment_t *segment = __segment_number_to_segment(segment_list, segment_list->tail); // We should not be freeing a segment in the WRITING or CLOSED state ensure(segment->state == READING, "Attempted to free segment not in the READING state"); // Do not free this segment if the refcount is not zero if (ck_pr_load_32(&segment->refcount) != 0) { // Do not try to free any more segments break; } ensure(_free_segment_inlock(segment_list, segment->segment_number, true/*destroy_store*/) == 0, "Failed to internally destroy segment in free segments function"); segment->state = FREE; // Move the tail up segment_list->tail++; // Record the segment we have freed up to freed_up_to = segment_list->tail; } ck_rwlock_write_unlock(segment_list->lock); return freed_up_to; }
datum_t * hash_insert (datum_t *key, datum_t *val, hash_t *hash) { size_t i; node_t *bucket; i = hashval(key, hash); ck_rwlock_write_lock(&hash->lock[i]); bucket = &hash->node[i]; if (bucket->key == NULL) { /* This bucket hasn't been used yet */ bucket->key = datum_dup(key); if ( bucket->key == NULL ) { free(bucket); bucket = NULL; ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } bucket->val = datum_dup(val); if ( bucket->val == NULL ) { free(bucket); bucket = NULL; ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } ck_rwlock_write_unlock(&hash->lock[i]); return bucket->val; } /* This node in the hash is already in use. Collision or new data for existing key. */ for (; bucket != NULL; bucket = bucket->next) { if(bucket->key && hash_keycmp(hash, bucket->key, key)) { /* New data for an existing key */ /* Make sure we have enough space */ if ( bucket->val->size < val->size ) { /* Make sure we have enough room */ if(! (bucket->val->data = realloc(bucket->val->data, val->size)) ) { ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } bucket->val->size = val->size; } memset( bucket->val->data, 0, val->size ); memcpy( bucket->val->data, val->data, val->size ); ck_rwlock_write_unlock(&hash->lock[i]); return bucket->val; } } /* It's a Hash collision... link it in the collided bucket */ bucket = calloc(1, sizeof(*bucket)); if (bucket == NULL) { ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } bucket->key = datum_dup (key); if ( bucket->key == NULL ) { free(bucket); ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } bucket->val = datum_dup (val); if ( bucket->val == NULL ) { datum_free(bucket->key); free(bucket); ck_rwlock_write_unlock(&hash->lock[i]); return NULL; } bucket->next = hash->node[i].next; hash->node[i].next = bucket; ck_rwlock_write_unlock(&hash->lock[i]); return bucket->val; }
int _compare_and_swap(persistent_atomic_value_t *pav, uint32_t old_value, uint32_t new_value) { // First lock this counter ck_rwlock_write_lock(pav->_lock); // Then, check to see if someone changed this value before we got here if (ck_pr_load_32(&pav->_current_value) != old_value) { ck_rwlock_write_unlock(pav->_lock); return -1; } // We got here first. Set the new value. ck_pr_store_32(&pav->_current_value, new_value); // Now, persist the value // 1. Write it to a temporary file // 2. Delete the original file // 3. Link the temporary file to the original file // 4. Unlink the temporary file int fail = 0; // 1. int open_flags = O_RDWR | O_CREAT | O_EXCL | O_SYNC ; int fd = open(pav->_temporary_filename, open_flags, (mode_t)0600); if (fd < 0) { fail = -2; goto end; } ssize_t nwritten = write(fd, &pav->_current_value, sizeof(pav->_current_value)); if(fsync(fd) != 0) { fail = -2; close(fd); goto end; } close(fd); if (nwritten < 0) { fail = -2; goto end; } // 2. if(unlink(pav->_filename) != 0) { fail = -3; goto end; } // 3. if (link(pav->_temporary_filename, pav->_filename) != 0) fail = -4; end: if (unlink(pav->_temporary_filename) != 0) fail = -5; if (fail != 0) { ck_pr_store_32(&pav->_current_value, old_value); } ck_rwlock_write_unlock(pav->_lock); // For now ensure(fail == 0, "Failed during persistent update"); return fail; }
static ph_result_t do_unregister(ph_hook_point_t *hp, ph_hook_func func, void *closure) { ph_hook_point_head_t *old_head, *new_head; ph_result_t res = PH_ERR; uint16_t off = 0; bool found = false; struct ph_hook_item_free *unreg = 0; ck_rwlock_write_lock(&rwlock); { old_head = hp->head; if (!old_head) { goto done; } for (off = 0; off < old_head->nitems; off++) { if (old_head->items[off].func == func && old_head->items[off].closure == closure) { found = true; break; } } if (!found) { goto done; } new_head = ph_mem_alloc_size(mt.head, sizeof(*new_head) + (sizeof(ph_hook_item_t) * (old_head->nitems-1))); if (!new_head) { goto done; } if (old_head->items[off].unreg) { unreg = ph_mem_alloc(mt.unreg); if (!unreg) { ph_mem_free(mt.head, new_head); goto done; } unreg->closure = old_head->items[off].closure; unreg->func = old_head->items[off].func; unreg->unreg = old_head->items[off].unreg; } new_head->nitems = old_head->nitems - 1; // Copy before the item if (off) { memcpy(new_head->items, old_head->items, off * sizeof(ph_hook_item_t)); } // Copy after the item if (off + 1 <= old_head->nitems) { memcpy(new_head->items + off, old_head->items + off + 1, (old_head->nitems - (off+1)) * sizeof(ph_hook_item_t)); } // Don't need to re-sort, since we simply removed that item hp->head = new_head; ph_thread_epoch_defer(&old_head->entry, free_head); // Arrange to unregister if (unreg) { ph_thread_epoch_defer(&unreg->entry, call_unreg); } res = PH_OK; } done: ck_rwlock_write_unlock(&rwlock); return res; }