Exemple #1
0
void __mmap_schedule_store_sync(struct mmap_store *mstore, uint32_t write_cursor) {
    // Lock free but not wait free
    uint32_t *sync_cursor = &mstore->sync_cursor;
    uint32_t sync_pos = ck_pr_load_32(sync_cursor);

    sync_pos = ck_pr_load_32(sync_cursor);

    // TODO - Add in the sync flags for allowing things like Dirty read
    //TODO: Protect the nearest page once sunk
    //mprotect(mapping, off, PROT_READ);
    if (write_cursor - sync_pos > (4 * 1024)) {
        int sync_distance = write_cursor - sync_pos;
        msync(mstore->mapping + sync_pos, sync_distance, MS_ASYNC);
    }

    if (write_cursor - sync_pos > (64 * 1024 * 1024)) {
        fsync(mstore->fd);
        // Try to write the new cursor, give up if you miss the race
        ck_pr_cas_32(sync_cursor, sync_pos, write_cursor);
    }
}
Exemple #2
0
/*
 * Write data into the store implementation
 *
 * params
 *  *data - data to write
 *  size - amount to write
 *
 * return
 *  -1 - Capacity exceeded
 */
uint32_t _mmap_write(store_t *store, void *data, uint32_t size) {
    struct mmap_store *mstore = (struct mmap_store*) store;
    void * mapping = mstore->mapping;
    ensure(mapping != NULL, "Bad mapping");

    // [uint32_t,BYTES]
    uint32_t *write_cursor = &mstore->write_cursor;
    uint32_t required_size = (sizeof(uint32_t) + size);

    uint32_t cursor_pos = 0;
    uint32_t new_pos = 0;

    while (true) {
        cursor_pos = ck_pr_load_32(write_cursor);
        ensure(cursor_pos != 0, "Incorrect cursor pos");
        uint32_t remaining = mstore->capacity - cursor_pos;

        if (remaining <= required_size) {
            return 0;
        }

        new_pos = cursor_pos + required_size;
        if (ck_pr_cas_32(write_cursor, cursor_pos, new_pos)) {
            break;
        }
    }
    ensure(new_pos != 0, "Invalid write position");
    ensure(cursor_pos != 0, "Invalid cursor position");

    void *dest = (mapping + cursor_pos);
    ((uint32_t*)dest)[0] = (uint32_t) size;
    dest += sizeof(uint32_t);
    memcpy(dest, data, size);

    __mmap_schedule_store_sync(mstore, cursor_pos);
    return cursor_pos;
}
Exemple #3
0
/*
 * Write data into the store implementation
 *
 * params
 *  *data - data to write
 *  size - amount to write
 *
 * return
 *  -1 - Capacity exceeded
 */
uint32_t _mmap_write(store_t *store, void *data, uint32_t size) {
    struct mmap_store *mstore = (struct mmap_store*) store;
    void * mapping = mstore->mapping;
    ensure(mapping != NULL, "Bad mapping");

    // We must ensure that no writes are happening during a sync.  To do this, we pack both the
    // "syncing" bit and the number of writers in the same 32 bit value.
    // 1. Load the "syncing_and_writers" value
    // 2. Check if "syncing" and abort if so
    // 3. Increment the number of writers
    // 4. Try to Compare and Swap this value
    // 5. Repeat if CAS fails
    bool writers_incremented = false;
    while (!writers_incremented) {

        // 1.
        uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
        uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
        uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

        // Make sure we aren't already at 2^32 - 1 writers.  If we try to increment when we already have
        // that many we will overflow the 31 bits we are using to store the writers.
        ensure(writers < 0xEFFFFFFFU, "Too many writers");

        // 2.
        if (syncing == 1) {
            return 0;
        }

        // 3.
        // 4.
        if (ck_pr_cas_32(&mstore->syncing_and_writers, syncing_and_writers, syncing_and_writers + 1)) {
            writers_incremented = true;
        }
    }

    ensure(ck_pr_load_32(&mstore->synced) == 0, "A writer should not get here when the store is synced");

    // [uint32_t,BYTES]
    uint32_t *write_cursor = &mstore->write_cursor;
    uint32_t required_size = (sizeof(uint32_t) + size);

    uint32_t cursor_pos = 0;
    uint32_t new_pos = 0;
    uint32_t ret = 0;

    // Assert if we are trying to write a block larger than the capacity of this store, and the
    // store is empty.  This is to die fast on the case where we have a block that we can never
    // write to any store of this size.
    // TODO: Actually handle this case gracefully
    ensure(((mstore->capacity - store->start_cursor(store)) >= required_size) ||
           (ck_pr_load_32(write_cursor) != store->start_cursor(store)),
           "Attempting to write a block of data larger than the total capacity of our store");

    while (true) {
        cursor_pos = ck_pr_load_32(write_cursor);
        ensure(cursor_pos != 0, "Incorrect cursor pos");
        uint32_t remaining = mstore->capacity - cursor_pos;

        if (remaining <= required_size) {
            // TODO: Structure this code better.  Right now, this works because "ret" is still zero,
            // and we return zero in the case where our data couldn't be written because the store
            // was full.
            goto decrement_writers;
        }

        new_pos = cursor_pos + required_size;
        if (ck_pr_cas_32(write_cursor, cursor_pos, new_pos)) {
            break;
        }
    }
    ensure(new_pos != 0, "Invalid write position");
    ensure(cursor_pos != 0, "Invalid cursor position");

    void *dest = (mapping + cursor_pos);
    ((uint32_t*)dest)[0] = (uint32_t) size;
    dest += sizeof(uint32_t);
    memcpy(dest, data, size);

    // If our new cursor is 32 pages past where we have last synced, try to sync
    // TODO: Make this tunable
    long page_size = sysconf(_SC_PAGESIZE);
    uint32_t last_sync = ck_pr_load_32(&mstore->last_sync);
    if (new_pos > last_sync + page_size * 1024) {
        ensure(last_sync % page_size == 0,
               "Last sync offset is not a multiple of page size, which is needed for msync");
        uint32_t page_aligned_new_pos = (new_pos - (new_pos % page_size));
        if (ck_pr_cas_32(&mstore->last_sync, last_sync, page_aligned_new_pos)) {
            // TODO: Sync the previous page too, since it may have gotten dirtied
            ensure(msync(mapping + last_sync, page_size * 1024, MS_ASYNC) == 0, "Unable to sync");
        }
    }

    ensure(ck_pr_load_32(&mstore->synced) == 0, "A writer should not be here when the store is synced");

    // Return the position in the store that we wrote to
    // TODO: Clean up the error handling and return values for this function
    ret = cursor_pos;

    bool writers_decremented = false;
decrement_writers:
    // TODO: Need to initialize here, otherwise writers_decremented will be true and uninitialized
    // in the case where we jump to this label.  Structure this function better.
    writers_decremented = false;

    // Decrement the number of writers to indicate that we are finished writing
    // 1. Load the "syncing_and_writers" value
    // 2. Decrement the number of writers
    // 3. Try to Compare and Swap this value
    // 4. Repeat if CAS fails
    while (!writers_decremented) {

        // 1.
        uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
        uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

        // Invariants
        ensure(writers > 0, "Would decrement the number of writers below zero");
        ensure(ck_pr_load_32(&mstore->synced) == 0,
               "The sync should not have gone through since we are not done writing");

        // 2.
        // 3.
        if (ck_pr_cas_32(&mstore->syncing_and_writers, syncing_and_writers, syncing_and_writers - 1)) {
            writers_decremented = true;
        }
    }

    return ret;
}
Exemple #4
0
/**
 * Force this store to sync if needed
 *
 * return
 *  0 - success
 *  1 - failure 
 */
uint32_t _mmap_sync(store_t *store) {
    struct mmap_store *mstore = (struct mmap_store*) store;

    // The point we have written up to
    uint32_t write_cursor = ck_pr_load_32(&mstore->write_cursor);

    ensure(write_cursor > sizeof(uint32_t) * 2, "Attempted to sync an empty store");

    // We must ensure that no writes are happening during a sync.  To do this, we pack both the
    // "syncing" bit and the number of writers in the same 32 bit value.
    // 1. Load the "syncing_and_writers" value
    // 2. Set that we are syncing
    // 3. Try to Compare and Swap this value
    // 4. Repeat until "writers" == 0
    while (1) {

        // 1.
        uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
        uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
        uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

        // Make sure we aren't already at 2^32 - 1 writers.  If we try to increment when we already have
        // that many we will overflow the 31 bits we are using to store the writers.
        ensure(writers < 0xEFFFFFFFU, "Too many writers");

        // 2.
        // 3.
        if (syncing == 0) {
            if (!ck_pr_cas_32(&mstore->syncing_and_writers, syncing_and_writers, SET_SYNCING(syncing_and_writers))) {
                continue;
            }
        }

        // 4.
        if (writers == 0) {
            break;
        }
    }

    // The point we have written up to
    write_cursor = ck_pr_load_32(&mstore->write_cursor);

    // Actually sync.  At this point we are guaranteed there are no writers, so sync the entire
    // store.
    //TODO: Protect the nearest page once sunk
    //mprotect(mapping, off, PROT_READ);
    ensure(msync(mstore->mapping, write_cursor, MS_SYNC) == 0, "Unable to msync");
    ensure(fsync(mstore->fd) == 0, "Unable to fsync");

    // Record that we synced successfully.  This will allow readers to progress.
    ck_pr_store_32(&mstore->synced, 1);

    uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
    uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
    uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

    ensure(writers == 0, "We should not have synced the store when there are still writers");
    ensure(syncing == 1, "We should not have synced the store when we did not mark it as syncing");

    return 0;
}
Exemple #5
0
store_cursor_t* _mmap_pop_cursor(store_t *store) {

    // This is really an mmap store
    struct mmap_store *mstore = (struct mmap_store*) store;

    // Assert invariants
    uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
    uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
    uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);
    ensure(writers == 0, "We should not be reading the store when there are still writers");
    ensure(syncing == 1, "We should not be reading the store before it has started syncing");
    ensure(ck_pr_load_32(&mstore->synced) == 1, "We should not be reading the store before it has been synced");

    // Open a blank cursor
    struct mmap_store_cursor* cursor = (struct mmap_store_cursor*) _mmap_open_cursor(store);

    // Save the current offset so we can try to CAS later
    uint32_t current_offset = ck_pr_load_32(&mstore->read_cursor);

    // If the first cursor has not been returned, don't advance.  Instead seek to the beginning.
    if (current_offset == -1) {

        uint32_t next_offset = store->start_cursor(store);

        // Seek to the read offset
        enum store_read_status ret = _mmap_cursor_seek((store_cursor_t*) cursor, next_offset);
        ensure(ret != END, "Failed to seek due to empty store");
        ensure(ret != UNSYNCED_STORE, "Failed to seek due to unsynced store");
        ensure(ret == SUCCESS, "Failed to seek");

        // Set the read cursor.  Note we are setting it to the offset of the thing we are reading,
        // because of the logic below
        if (ck_pr_cas_32(&mstore->read_cursor, current_offset, next_offset)) {
            return (store_cursor_t*) cursor;
        }

        // If we failed to CAS, reload the current offset and drop down to the normal logic below
        current_offset = ck_pr_load_32(&mstore->read_cursor);
    }

    // Seek to the current read offset
    enum store_read_status ret = _mmap_cursor_seek((store_cursor_t*) cursor, current_offset);
    ensure(ret != UNSYNCED_STORE, "Failed to seek due to unsynced store");
    ensure(ret == SUCCESS, "Failed to seek");

    // Save our offset so we can try to CAS
    uint32_t next_offset = cursor->next_offset;

    // This is our only way to advance, so we have to do this
    ret = _mmap_cursor_advance((store_cursor_t*) cursor);
    ensure(ret == SUCCESS || ret == END, "Failed to advance");

    // If we advanced successfully, try to CAS the read cursor
    while (ret != END) {

        // If we succeed, return the cursor we made
        if (ck_pr_cas_32(&mstore->read_cursor, current_offset, next_offset)) {
            return (store_cursor_t*) cursor;
        }

        // Otherwise, try again

        // Save the current offset so we can try to CAS later
        current_offset = ck_pr_load_32(&mstore->read_cursor);

        // Seek to the current read offset
        ret = _mmap_cursor_seek((store_cursor_t*) cursor, current_offset);
        ensure(ret == SUCCESS, "Failed to seek");

        // Save our offset so we can try to CAS
        next_offset = cursor->next_offset;

        // This is our only way to advance, so we have to do this
        ret = _mmap_cursor_advance((store_cursor_t*) cursor);
        ensure(ret == SUCCESS || ret == END, "Failed to advance");
    }

    ((store_cursor_t*) cursor)->destroy((store_cursor_t*) cursor);
    return NULL;
}