Beispiel #1
0
store_t* open_mmap_store(const char* base_dir, const char* name, int flags) {
    int dir_fd = open(base_dir, O_DIRECTORY, (mode_t)0600);
    if (dir_fd == -1) return NULL;

    int real_fd = openat(dir_fd, name, O_RDWR, (mode_t)0600);
    ensure(real_fd > 0, "Failed to open mmap store file");
    close(dir_fd);

    struct stat sb;
    int ret = fstat(real_fd, &sb);
    ensure(ret != -1, "Failed to fstat file");
    int size = sb.st_size;

    // This is nearly identical to the create_mmap_store.  Maybe should make an "init mmap store" or
    // something?
    struct mmap_store *store = (struct mmap_store*) calloc(1, sizeof(struct mmap_store));
    if (store == NULL) return NULL;

    void *mapping = mmap(NULL, (size_t) size, PROT_READ | PROT_WRITE,
            MAP_SHARED | MAP_POPULATE | MAP_NONBLOCK , real_fd, 0);
    if (mapping == NULL) return NULL;

    madvise(mapping, size, MADV_SEQUENTIAL);

    uint32_t off = sizeof(uint32_t) * 2;
    ensure(((uint32_t *)mapping)[0] == 0xDEADBEEF, "Magic number does not match.  Bad file format");
    ensure(((uint32_t *)mapping)[1] == size, "Size recorded does not match file size.  Bad file format");

    ensure(asprintf(&(store->filename), "%s/%s", base_dir, name) > 0,
           "Failed to allocate store filename");

    store->fd = real_fd;
    store->capacity = size;
    store->flags = flags;
    store->mapping = mapping;

    // These don't really matter because writers aren't allowed...
    ck_pr_store_32(&store->write_cursor, off);
    ck_pr_store_32(&store->last_sync, 0);
    ck_pr_store_32(&store->read_cursor, -1);

    // We infer that this store has been synced...
    ck_pr_store_32(&store->syncing_and_writers, 0x80000000U);
    ck_pr_store_32(&store->synced, 1);
    ck_pr_fence_atomic();
    ensure(msync(mapping, off, MS_SYNC) == 0, "Unable to sync");
    ensure(store->write_cursor != 0, "Cursor incorrect");

    ((store_t *)store)->write        = &_mmap_write;
    ((store_t *)store)->open_cursor  = &_mmap_open_cursor;
    ((store_t *)store)->pop_cursor   = &_mmap_pop_cursor;
    ((store_t *)store)->capacity     = &_mmap_capacity;
    ((store_t *)store)->cursor       = &_mmap_cursor;
    ((store_t *)store)->start_cursor = &_mmap_start_cursor;
    ((store_t *)store)->sync         = &_mmap_sync;
    ((store_t *)store)->close        = &_mmap_close;
    ((store_t *)store)->destroy      = &_mmap_destroy;

    return (store_t *)store;
}
Beispiel #2
0
store_t* create_mmap_store(uint32_t size, const char* base_dir, const char* name, int flags) {
    //TODO : Enforce a max size
    //TODO : Check flags
    //TODO : check thread sanity
    //TODO : check size is near a page
    int dir_fd = open(base_dir, O_DIRECTORY, (mode_t)0600);
    if (dir_fd == -1) return NULL;

    int real_fd = openat(dir_fd, name, O_RDWR | O_CREAT, (mode_t)0600);
    close(dir_fd);

    // TODO - Check for the race condition if two people attempt to create
    // the same segment
    if (real_fd == -1) return NULL;

    if (posix_fallocate(real_fd, 0, size) != 0) {
        close(real_fd);
        return NULL;
    }

    struct mmap_store *store = (struct mmap_store*) calloc(1, sizeof(struct mmap_store));
    if (store == NULL) return NULL;

    void *mapping = mmap(NULL, (size_t) size, PROT_READ | PROT_WRITE, 
            MAP_SHARED | MAP_POPULATE | MAP_NONBLOCK , real_fd, 0);
    if (mapping == NULL) return NULL;

    madvise(mapping, size, MADV_SEQUENTIAL);

    uint32_t off = sizeof(uint32_t) * 2;
    ((uint32_t *)mapping)[0] = 0xDEADBEEF;
    ((uint32_t *)mapping)[1] = size;

    store->fd = real_fd;
    store->capacity = size;
    store->flags = flags;
    store->mapping = mapping;

    ck_pr_store_32(&store->write_cursor, off);
    ck_pr_store_32(&store->sync_cursor, off);
    ck_pr_fence_atomic();
    ensure(msync(mapping, off, MS_SYNC) == 0, "Unable to sync");
    ensure(store->write_cursor != 0, "Cursor incorrect");
    ensure(store->sync_cursor != 0, "Cursor incorrect");

    ((store_t *)store)->write       = &_mmap_write;
    ((store_t *)store)->open_cursor = &_mmap_open_cursor;
    ((store_t *)store)->capacity    = &_mmap_capacity;
    ((store_t *)store)->cursor      = &_mmap_cursor;
    ((store_t *)store)->sync        = &_mmap_sync;
    ((store_t *)store)->close       = &_mmap_close;
    ((store_t *)store)->destroy     = &_mmap_destroy;

    return (store_t *)store;
}
Beispiel #3
0
static int
as_node_create_connection(as_node* node, int* fd)
{
	// Create a non-blocking socket.
	*fd = cf_socket_create_nb();
	
	if (*fd == -1) {
		// Local problem - socket create failed.
		cf_debug("Socket create failed for %s", node->name);
		return CITRUSLEAF_FAIL_CLIENT;
	}
	
	// Try primary address.
	as_address* primary = as_vector_get(&node->addresses, node->address_index);
	
	if (cf_socket_start_connect_nb(*fd, &primary->addr) == 0) {
		// Connection started ok - we have our socket.
		return as_node_authenticate_connection(node, fd);
	}
	
	// Try other addresses.
	as_vector* addresses = &node->addresses;
	for (uint32_t i = 0; i < addresses->size; i++) {
		as_address* address = as_vector_get(addresses, i);
		
		// Address points into alias array, so pointer comparison is sufficient.
		if (address != primary) {
			if (cf_socket_start_connect_nb(*fd, &address->addr) == 0) {
				// Replace invalid primary address with valid alias.
				// Other threads may not see this change immediately.
				// It's just a hint, not a requirement to try this new address first.
				cf_debug("Change node address %s %s:%d", node->name, address->name, (int)cf_swap_from_be16(address->addr.sin_port));
				ck_pr_store_32(&node->address_index, i);
				return as_node_authenticate_connection(node, fd);
			}
		}
	}
	
	// Couldn't start a connection on any socket address - close the socket.
	cf_info("Failed to connect: %s %s:%d", node->name, primary->name, (int)cf_swap_from_be16(primary->addr.sin_port));
	cf_close(*fd);
	*fd = -1;
	return CITRUSLEAF_FAIL_UNAVAILABLE;
}
Beispiel #4
0
store_t* create_mmap_store(uint32_t size, const char* base_dir, const char* name, int flags) {
    //TODO : Enforce a max size
    //TODO : Check flags
    //TODO : check thread sanity
    //TODO : check size is near a page
    int dir_fd = open(base_dir, O_DIRECTORY, (mode_t)0600);
    if (dir_fd == -1) return NULL;

    int openat_flags = O_RDWR | O_CREAT | O_SYNC;

    if (flags & DELETE_IF_EXISTS) {
        openat_flags = openat_flags | O_TRUNC;
    }
    else {
        openat_flags = openat_flags | O_EXCL;
    }

    int real_fd = openat(dir_fd, name, openat_flags, (mode_t)0600);
    close(dir_fd);

    // TODO - Check for the race condition if two people attempt to create
    // the same segment
    if (real_fd == -1) {
        // TODO: This is a terrible hack.  We need to fix the error handling, but for now, actually
        // warn us if we are failing because of loading a garbage file.
        ensure(errno != EEXIST, "Failed to create mmap store because file already exists");
        return NULL;
    }

    if (posix_fallocate(real_fd, 0, size) != 0) {
        close(real_fd);
        return NULL;
    }

    struct mmap_store *store = (struct mmap_store*) calloc(1, sizeof(struct mmap_store));
    if (store == NULL) return NULL;

    void *mapping = mmap(NULL, (size_t) size, PROT_READ | PROT_WRITE, 
            MAP_SHARED | MAP_POPULATE | MAP_NONBLOCK , real_fd, 0);
    if (mapping == NULL) return NULL;

    madvise(mapping, size, MADV_SEQUENTIAL);

    uint32_t off = sizeof(uint32_t) * 2;
    ((uint32_t *)mapping)[0] = 0xDEADBEEF;
    ((uint32_t *)mapping)[1] = size;

    ensure(asprintf(&(store->filename), "%s/%s", base_dir, name) > 0,
           "Failed to allocate store filename");

    store->fd = real_fd;
    store->capacity = size;
    store->flags = flags;
    store->mapping = mapping;

    ck_pr_store_32(&store->write_cursor, off);
    ck_pr_store_32(&store->last_sync, 0);
    ck_pr_store_32(&store->read_cursor, -1);
    ck_pr_store_32(&store->syncing_and_writers, 0);
    ck_pr_store_32(&store->synced, 0);
    ck_pr_fence_atomic();
    ensure(msync(mapping, off, MS_SYNC) == 0, "Unable to sync");
    ensure(store->write_cursor != 0, "Cursor incorrect");

    ((store_t *)store)->write        = &_mmap_write;
    ((store_t *)store)->open_cursor  = &_mmap_open_cursor;
    ((store_t *)store)->pop_cursor   = &_mmap_pop_cursor;
    ((store_t *)store)->capacity     = &_mmap_capacity;
    ((store_t *)store)->cursor       = &_mmap_cursor;
    ((store_t *)store)->start_cursor = &_mmap_start_cursor;
    ((store_t *)store)->sync         = &_mmap_sync;
    ((store_t *)store)->close        = &_mmap_close;
    ((store_t *)store)->destroy      = &_mmap_destroy;

    return (store_t *)store;
}
Beispiel #5
0
/**
 * Force this store to sync if needed
 *
 * return
 *  0 - success
 *  1 - failure 
 */
uint32_t _mmap_sync(store_t *store) {
    struct mmap_store *mstore = (struct mmap_store*) store;

    // The point we have written up to
    uint32_t write_cursor = ck_pr_load_32(&mstore->write_cursor);

    ensure(write_cursor > sizeof(uint32_t) * 2, "Attempted to sync an empty store");

    // We must ensure that no writes are happening during a sync.  To do this, we pack both the
    // "syncing" bit and the number of writers in the same 32 bit value.
    // 1. Load the "syncing_and_writers" value
    // 2. Set that we are syncing
    // 3. Try to Compare and Swap this value
    // 4. Repeat until "writers" == 0
    while (1) {

        // 1.
        uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
        uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
        uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

        // Make sure we aren't already at 2^32 - 1 writers.  If we try to increment when we already have
        // that many we will overflow the 31 bits we are using to store the writers.
        ensure(writers < 0xEFFFFFFFU, "Too many writers");

        // 2.
        // 3.
        if (syncing == 0) {
            if (!ck_pr_cas_32(&mstore->syncing_and_writers, syncing_and_writers, SET_SYNCING(syncing_and_writers))) {
                continue;
            }
        }

        // 4.
        if (writers == 0) {
            break;
        }
    }

    // The point we have written up to
    write_cursor = ck_pr_load_32(&mstore->write_cursor);

    // Actually sync.  At this point we are guaranteed there are no writers, so sync the entire
    // store.
    //TODO: Protect the nearest page once sunk
    //mprotect(mapping, off, PROT_READ);
    ensure(msync(mstore->mapping, write_cursor, MS_SYNC) == 0, "Unable to msync");
    ensure(fsync(mstore->fd) == 0, "Unable to fsync");

    // Record that we synced successfully.  This will allow readers to progress.
    ck_pr_store_32(&mstore->synced, 1);

    uint32_t syncing_and_writers = ck_pr_load_32(&mstore->syncing_and_writers);
    uint32_t syncing = EXTRACT_SYNCING(syncing_and_writers);
    uint32_t writers = EXTRACT_WRITERS(syncing_and_writers);

    ensure(writers == 0, "We should not have synced the store when there are still writers");
    ensure(syncing == 1, "We should not have synced the store when we did not mark it as syncing");

    return 0;
}
static void
as_ev_connect(as_event_command* cmd)
{
	int fd = as_event_create_socket(cmd);
	
	if (fd < 0) {
		return;
	}
		
	// Try primary address.
	as_node* node = cmd->node;
	as_address* primary = as_vector_get(&node->addresses, node->address_index);
	
	// Attempt non-blocking connection.
	if (connect(fd, (struct sockaddr*)&primary->addr, sizeof(struct sockaddr)) == 0) {
		as_ev_watcher_init(cmd, fd);
		return;
	}
	
	// Check if connection is in progress.
	if (errno == EINPROGRESS) {
		as_ev_watcher_init(cmd, fd);
		return;
	}
	
	// Try other addresses.
	as_vector* addresses = &node->addresses;
	for (uint32_t i = 0; i < addresses->size; i++) {
		as_address* address = as_vector_get(addresses, i);
		
		// Address points into alias array, so pointer comparison is sufficient.
		if (address != primary) {
			if (connect(fd, (struct sockaddr*)&address->addr, sizeof(struct sockaddr)) == 0) {
				// Replace invalid primary address with valid alias.
				// Other threads may not see this change immediately.
				// It's just a hint, not a requirement to try this new address first.
				as_log_debug("Change node address %s %s:%d", node->name, address->name, (int)cf_swap_from_be16(address->addr.sin_port));
				ck_pr_store_32(&node->address_index, i);
				as_ev_watcher_init(cmd, fd);
				return;
			}
			
			// Check if connection is in progress.
			if (errno == EINPROGRESS) {
				// Replace invalid primary address with valid alias.
				// Other threads may not see this change immediately.
				// It's just a hint, not a requirement to try this new address first.
				as_log_debug("Change node address %s %s:%d", node->name, address->name, (int)cf_swap_from_be16(address->addr.sin_port));
				ck_pr_store_32(&node->address_index, i);
				
				// Connection hasn't finished.
				as_ev_watcher_init(cmd, fd);
				return;
			}
		}
	}
	
	// Failed to start a connection on any socket address.
	as_error err;
	as_error_update(&err, AEROSPIKE_ERR_ASYNC_CONNECTION, "Failed to connect: %s %s:%d",
					node->name, primary->name, (int)cf_swap_from_be16(primary->addr.sin_port));
	as_event_connect_error(cmd, &err, fd);
}
int _compare_and_swap(persistent_atomic_value_t *pav, uint32_t old_value, uint32_t new_value) {
    // First lock this counter
    ck_rwlock_write_lock(pav->_lock);

    // Then, check to see if someone changed this value before we got here
    if (ck_pr_load_32(&pav->_current_value) != old_value) {
        ck_rwlock_write_unlock(pav->_lock);
        return -1;
    }

    // We got here first.  Set the new value.
    ck_pr_store_32(&pav->_current_value, new_value);

    // Now, persist the value
    // 1. Write it to a temporary file
    // 2. Delete the original file
    // 3. Link the temporary file to the original file
    // 4. Unlink the temporary file
    int fail = 0;

    // 1.
    int open_flags = O_RDWR | O_CREAT | O_EXCL | O_SYNC ;
    int fd = open(pav->_temporary_filename, open_flags, (mode_t)0600);
    if (fd < 0) {
        fail = -2;
        goto end;
    }

    ssize_t nwritten = write(fd, &pav->_current_value, sizeof(pav->_current_value));
    if(fsync(fd) != 0) {
        fail = -2;
        close(fd);
        goto end;
    }
    close(fd);

    if (nwritten < 0) {
        fail = -2;
        goto end;
    }

    // 2.
    if(unlink(pav->_filename) != 0) {
        fail = -3;
        goto end;
    }

    // 3.
    if (link(pav->_temporary_filename, pav->_filename) != 0) fail = -4;

end:
    if (unlink(pav->_temporary_filename) != 0) fail = -5;

    if (fail != 0) {
        ck_pr_store_32(&pav->_current_value, old_value);
    }

    ck_rwlock_write_unlock(pav->_lock);
    // For now
    ensure(fail == 0, "Failed during persistent update");
    return fail;
}