Example #1
0
/** fobuf_close
 * @param b: the fobuf structure to finish up with.
 *
 * Flush the buffers, ensure data is on disk and close the file descriptor.
 *
 * Return value: zero on error, non-zero on success.
 */
int fobuf_close(fobuf_t b)
{
	int ret=1;

	DEBUG("%p", b);

	if ( b->fd == -1 )
		goto noclose;

	if ( !_fobuf_flush(b) )
		ret = 0;

	/* don't error if the output file is a special file
	 * which does not support fsync (eg: a pipe)
	 */
	if ( fsync(b->fd) && errno != EROFS && errno != EINVAL ) {
		ERR("fsync: %s", os_err());
		ret = 0;
	}

	if ( !fd_close(b->fd) ) {
		ERR("fsync: %s", os_err());
		ret = 0;
	}

noclose:
	if ( b->buf )
		free(b->buf);
	free(b);

	return ret;
}
static int aio_submit(struct iothread *t, http_conn_t h)
{
	struct iocb *iocb;
	size_t data_len;
	off_t data_off;
	int ret, fd;

	data_len = http_conn_data(h, &fd, &data_off);
	assert(data_len);

	iocb = hgang_alloc(aio_iocbs);
	if ( NULL == iocb )
		return 0;

	io_prep_sendfile(iocb, fd, data_len, data_off, http_conn_socket(h));
	iocb->data = h;
	io_set_eventfd(iocb, efd->fd);

	ret = io_submit(aio_ctx, 1, &iocb);
	if ( ret <= 0 ) {
		errno = -ret;
		fprintf(stderr, "io_submit: %s\n", os_err());
		return 0;
	}

	dprintf("io_submit: sendfile: %zu bytes\n", data_len);
	in_flight++;
	return 1;
}
static void handle_completion(struct iothread *t, struct iocb *iocb,
				http_conn_t h, int ret)
{
	hgang_return(aio_iocbs, iocb);
	in_flight--;

	if ( ret > 0 ) {
		size_t data_len;
		data_len = http_conn_data_read(h, ret);
		if ( data_len ) {
			printf("re-submit from completion\n");
			if ( !aio_submit(t, h) )
				http_conn_abort(t, h);
		}else{
			dprintf("aio_sendfile: done\n");
			/* automatically removes from waitq */
			http_conn_data_complete(t, h);
		}
		return;
	}

	if ( ret == -EAGAIN ) {
		dprintf("aio_sendfile: failed EAGAIN\n");
		http_conn_wait_on(t, h, NBIO_WRITE);
		return;
	}else if (ret < 0 ) {
		errno = -ret;
		printf("aio_sendfile: %s\n", os_err());
	}
	http_conn_abort(t, h);
}
Example #4
0
static void epoll_pump(struct iothread *t, int mto)
{
	struct epoll_event ev[8];
	struct nbio *n;
	int nfd, i;

again:
	nfd = epoll_wait(t->priv.epoll, ev, sizeof(ev)/sizeof(*ev), mto);
	if ( nfd < 0 ) {
		if ( errno == EINTR )
			goto again;
		fprintf(stderr, "epoll_wait: %s\n", os_err());
		return;
	}

	for(i=0; i < nfd; i++) {
		n = ev[i].data.ptr;
		n->flags = 0;
		if ( ev[i].events & (EPOLLIN|EPOLLHUP) )
			n->flags |= NBIO_READ;
		if ( ev[i].events & EPOLLOUT )
			n->flags |= NBIO_WRITE;
		if ( ev[i].events & EPOLLERR )
			n->flags |= NBIO_ERROR;

		list_move_tail(&n->list, &t->active);
	}
}
Example #5
0
static int write_prep(struct _cola *c, unsigned int lvlno, struct buf *buf)
{
	cola_key_t nr_ent, ofs;
	size_t sz;

	nr_ent = (1 << lvlno);

	ofs = nr_ent - 1;
	ofs *= sizeof(*buf->ptr);
	ofs += sizeof(struct cola_hdr);
	sz = nr_ent * sizeof(*buf->ptr);

	if ( lvlno > c->c_maplvls ) {
		buf->ptr = malloc(nr_ent);
		if ( NULL == buf->ptr ) {
			fprintf(stderr, "%s: malloc: %s\n", cmd, os_err());
			return 0;
		}
		buf->heap = 1;
	}else{
		buf->ptr = (struct cola_elem *)(c->c_map + ofs);
		buf->heap = 0;
	}

	buf->nelem = nr_ent;
	return 1;
}
Example #6
0
int os_sigpipe_ignore(void)
{
	if ( SIG_ERR == signal(SIGPIPE, SIG_IGN) ) {
		fprintf(stderr, "signal: %s\n", os_err());
		return 0;
	}

	return 1;
}
Example #7
0
static int size_up_chunks(struct _gidx_wr *wr)
{
	unsigned int i, c, num_chunks;

	for(i = num_chunks = 0; i < wr->wr_num_fields; i++) {
		struct wr_field *f;

		f = wr->wr_field + i;
		f->f_num_chunks = (*f->f_type->t_wr_num_chunks)(f->f_priv);
		f->f_sys_chunks = sys_num_chunks(f);
		DEBUG("%s: %s: %u system chunks + %u chunks requested",
			f->f_type->t_name, f->f_name,
			f->f_sys_chunks, f->f_num_chunks);
		f->f_num_chunks += f->f_sys_chunks;
		num_chunks += f->f_num_chunks;
	}

	wr->wr_chunk = calloc(num_chunks, sizeof(*wr->wr_chunk));
	if ( NULL == wr->wr_chunk ) {
		ERR("calloc: %s", os_err());
		return 0;
	}

	wr->wr_tot_chunks = num_chunks;

	for(i = c = 0; i < wr->wr_num_fields; i++) {
		struct wr_field *f;
		struct wr_chunk *ch;
		unsigned int j, num;

		f = wr->wr_field + i;
		f->f_chunk = wr->wr_chunk + c;

		setup_sys_chunks(wr, f, f->f_chunk);
		c += sys_num_chunks(f);

		num = (*f->f_type->t_wr_num_chunks)(f->f_priv);
		for(j = 0; j < num; j++, c++) {
			ch = wr->wr_chunk + c;
			ch->c_field = f;
			ch->c_blkid = j;
			if ( f->f_type->t_wr_chunk_format )
				ch->c_format = (*f->f_type->t_wr_chunk_format)
							(f->f_priv, j);
			else
				ch->c_format = GIDX_CHUNK_USER;
			ch->c_len = (*f->f_type->t_wr_chunk_size)
						(f->f_priv, ch->c_blkid);
		}
	}

	assert(c == wr->wr_tot_chunks);
	return 1;
}
Example #8
0
struct nbio *nbio_eventfd_new(eventfd_t initval, eventfd_cb_t cb, void *priv)
{
	struct nb_efd *efd;

	efd = calloc(1, sizeof(*efd));
	if ( NULL == efd ) {
		fprintf(stderr, "nbio_eventfd_new: %s\n", os_err());
		return 0;
	}

	efd->e_nbio.fd = eventfd(initval, EFD_NONBLOCK|EFD_CLOEXEC);
	if ( efd->e_nbio.fd < 0 ) {
		fprintf(stderr, "eventfd: %s\n", os_err());
		free(efd);
		return 0;
	}

	efd->e_nbio.ops = &ops;
	efd->e_cb = cb;
	efd->e_priv = priv;
	return &efd->e_nbio;
}
Example #9
0
static void efd_read(struct iothread *t, struct nbio *n)
{
	struct nb_efd *efd = (struct nb_efd *)n;
	eventfd_t val;

	if ( eventfd_read(efd->e_nbio.fd, &val) ) {
		if ( errno == EAGAIN ) {
			nbio_inactive(t, &efd->e_nbio, NBIO_READ);
			return;
		}
		fprintf(stderr, "eventfd_read: %s\n", os_err());
		return;
	}

	efd->e_cb(t, efd->e_priv, val);
}
Example #10
0
static int node_oid_id_insert(struct u64_val *node, gidx_oid_t oid_id)
{
	gidx_oid_t *poid;

	poid = hgang_alloc(node->n_oids);
	if ( NULL == poid ) {
		ERR("hgang_alloc: %s", os_err());
		return 0;
	}

	*poid = oid_id;
	node->n_num_oid++;

	DEBUG("append oid_id=%u to node val=%"PRIu64, oid_id, node->n_val);
	return 1;
}
Example #11
0
static int io_async_sendfile_init(struct iothread *t)
{
	memset(&aio_ctx, 0, sizeof(aio_ctx));
	if ( io_queue_init(AIO_QUEUE_SIZE, &aio_ctx) ) {
		fprintf(stderr, "io_queue_init: %s\n", os_err());
		return 0;
	}

	aio_iocbs = hgang_new(sizeof(struct iocb), 0);
	if ( NULL == aio_iocbs )
		return 0;

	efd = nbio_eventfd_new(0, aio_event, NULL);
	if ( NULL == efd )
		return 0;
	nbio_eventfd_add(t, efd);
	return 1;
}
Example #12
0
/** _fobuf_flush
 * @param b: the fobuf structure to flush
 *
 * Flush the userspace buffer to disk. Note this does not call fsync()
 * so do not rely on it in order to verify that data is written to disk.
 *
 * Failure modes:
 *  0. return 1: success, all buffered data was written to the kernel
 *  1. undefined: any of the failure modes of fd_write()
 *  2. sig11|sig6|file-corruption: b->buf_len > b->buf_sz
 */
static int _fobuf_flush(struct _fobuf *b)
{
	size_t len = b->buf_sz - b->buf_len;
	const void *buf = b->buf;

	/* buffer empty */
	if ( len == 0 )
		return 1;

	if ( !fd_write(b->fd, buf, len) ) {
		ERR("fd_write: %s", os_err());
		return 0;
	}

	b->ptr = b->buf;
	b->buf_len = b->buf_sz;

	return 1;
}
Example #13
0
static void aio_event(struct iothread *t, void *priv, eventfd_t val)
{
	struct io_event ev[in_flight];
	struct timespec tmo;
	int ret, i;

	memset(&tmo, 0, sizeof(tmo));

	dprintf("aio_event ready, %"PRIu64"/%u in flight\n", val, in_flight);

	ret = io_getevents(aio_ctx, 1, in_flight, ev, &tmo);
	if ( ret < 0 ) {
		fprintf(stderr, "io_getevents: %s\n", os_err());
		return;
	}

	for(i = 0; i < ret; i++)
		handle_completion(t, ev[i].obj, ev[i].data, ev[i].res);
}
Example #14
0
static int remap(struct _cola *c, unsigned int lvlno)
{
	size_t sz;
	uint8_t *map;

	dprintf(" - remap %u\n", lvlno);

	sz = (1U << (lvlno + 2)) - 1;
	sz *= sizeof(struct cola_elem);
	sz += sizeof(struct cola_hdr);

	map = mremap(c->c_map, c->c_mapsz, sz, MREMAP_MAYMOVE);
	if ( map == MAP_FAILED ) {
		fprintf(stderr, "%s: mmap: %s\n", cmd, os_err());
		return 0;
	}

	madvise(map, c->c_mapsz, MADV_RANDOM);
	c->c_maplvls = lvlno;
	c->c_mapsz = sz;
	c->c_map = map;
	return 1;
}
Example #15
0
static int map(struct _cola *c)
{
	int f;
	size_t sz;
	uint8_t *map;

	f = (c->c_rw) ? (PROT_READ|PROT_WRITE) : (PROT_READ);
	sz = (1U << (INITIAL_LEVELS + 1)) - 1;
	sz *= sizeof(struct cola_elem);
	sz += sizeof(struct cola_hdr);

	map = mmap(NULL, sz, f, MAP_SHARED, c->c_fd, 0);
	if ( map == MAP_FAILED ) {
		fprintf(stderr, "%s: mmap: %s\n", cmd, os_err());
		return 0;
	}

	madvise(map, sz, MADV_RANDOM);

	c->c_maplvls = INITIAL_LEVELS;
	c->c_mapsz = sz;
	c->c_map = map;
	return 1;
}
Example #16
0
int cola_insert(cola_t c, cola_key_t key)
{
	cola_key_t newcnt = c->c_nelem + 1;
	struct buf level;
	unsigned int i;

	dprintf("Insert key %"PRIu64"\n", key);

	if ( !buf_alloc(c, 1, &level) )
		return 0;
	level.ptr[0].key = key;

	/* make sure the level we're about to write to is allocated and,
	 * if required, mapped
	*/
	if ( newcnt == (1ULL << c->c_nxtlvl) ) {
		cola_key_t nr_ent, ofs;
		size_t sz;

		nr_ent = (1ULL << c->c_nxtlvl);
		ofs = nr_ent - 1;
		ofs *= sizeof(struct cola_elem);
		ofs += sizeof(struct cola_hdr);

		sz = nr_ent * sizeof(struct cola_elem);
		dprintf("fallocate level %u\n", c->c_nxtlvl);
		if ( posix_fallocate(c->c_fd, ofs, ofs + sz) )
			fprintf(stderr, "%s: fallocate: %s\n",
				cmd, os_err());
		if ( c->c_nxtlvl <= MAP_LEVELS &&
				(1U << c->c_nxtlvl) > c->c_nelem ) {
			if ( !remap(c, c->c_nxtlvl) )
				return 0;
		}
		c->c_nxtlvl++;
	}

	for(i = 0; newcnt >= (1U << i); i++) {
		if ( c->c_nelem & (1U << i) ) {
			struct buf level2, merged;
			int ret;

			dprintf(" - level %u full\n", i);
			if ( !read_level(c, i, &level2) ) {
				buf_finish(&level);
				return 0;
			}

			if ( (c->c_nelem & (1U << (i + 1))) ||
					i + 1 >= c->c_maplvls ) {
				ret = buf_alloc(c, (1U << (i + 1)), &merged);
			}else{
				/* landing in next level so write to map */
				ret = write_prep(c, i + 1, &merged);
			}
			if ( !ret ) {
				buf_finish(&level2);
				buf_finish(&level);
				return 0;
			}

			level_merge(&level2, &level, &merged);
			if ( !write_level(c, i, &level2) ) {
				buf_finish(&level2);
				buf_finish(&level);
				buf_finish(&merged);
				return 0;
			}

			buf_finish(&level2);
			buf_finish(&level);

			memcpy(&level, &merged, sizeof(level));
		}else{
			dprintf(" - level %u empty\n", i);
			if ( !fractional_cascade(c, i, level.ptr) ||
					!write_level(c, i, &level) ) {
				buf_finish(&level);
				return 0;
			}
			buf_finish(&level);
			break;
		}
	}

	c->c_nelem++;
	dprintf("\n");
#if DEBUG
	cola_dump(c);
	dprintf("\n");
#endif
	return 1;
}
Example #17
0
static struct _cola *do_open(const char *fn, int rw, int create, int overwrite)
{
	struct _cola *c = NULL;
	struct cola_hdr hdr;
	size_t sz;
	int eof, oflags;

	c = calloc(1, sizeof(*c));
	if ( NULL == c )
		goto out;

	if ( create ) {
		oflags = O_RDWR | O_CREAT | ((overwrite) ? O_TRUNC : O_EXCL);
	}else{
		oflags = (rw) ? O_RDWR : O_RDONLY;
	}

	c->c_fd = open(fn, oflags, 0644);
	if ( c->c_fd < 0 ) {
		fprintf(stderr, "%s: open: %s: %s\n", cmd, fn, os_err());
		goto out_free;
	}

	if ( create ) {
		off_t initial;

		hdr.h_nelem = 0;
		hdr.h_magic = COLA_MAGIC;
		hdr.h_vers = COLA_CURRENT_VER;
		if ( !fd_write(c->c_fd, &hdr, sizeof(hdr)) ) {
			fprintf(stderr, "%s: write: %s: %s\n",
				cmd, fn, os_err());
			goto out_close;
		}

		initial = (1U << (INITIAL_LEVELS + 1)) - 1;
		initial *= sizeof(struct cola_elem);
		initial += sizeof(hdr);
		if ( posix_fallocate(c->c_fd, 0, initial) ) {
			fprintf(stderr, "%s: %s: fallocate: %s\n",
				cmd, fn, os_err());
		}
	}else{
		sz = sizeof(hdr);
		if ( !fd_read(c->c_fd, &hdr, &sz, &eof) || sz != sizeof(hdr) ) {
			fprintf(stderr, "%s: read: %s: %s\n",
				cmd, fn, os_err2("File truncated"));
			goto out_close;
		}

		if ( hdr.h_magic != COLA_MAGIC ) {
			fprintf(stderr, "%s: %s: Bad magic\n", cmd, fn);
			goto out_close;
		}

		if ( hdr.h_vers != COLA_CURRENT_VER ) {
			fprintf(stderr, "%s: %s: Unsupported vers\n", cmd, fn);
			goto out_close;
		}

		c->c_nelem = hdr.h_nelem;
	}

	c->c_rw = rw;
	if ( !map(c) )
		goto out_close;

	c->c_nxtlvl = cfls(c->c_nelem);
	if ( c->c_nxtlvl < INITIAL_LEVELS )
		c->c_nxtlvl = INITIAL_LEVELS + 1;
	dprintf("next level init to %u\n", c->c_nxtlvl);

	/* success */
	goto out;

out_close:
	close(c->c_fd);
out_free:
	free(c);
	c = NULL;
out:
	return c;
}
Example #18
0
_public gidx_wr_t gidx_wr_new(const struct gidx_schema *schema,
				unsigned int num)
{
	struct _gidx_wr *wr;
	unsigned int i;

	if ( num > GIDX_MAX_INDEX ) {
		ERR("num_fields (%u) greater than maximum permitted (%u)",
			num, GIDX_MAX_INDEX);
		return 0;
	}

	wr = calloc(1, sizeof(*wr));
	if ( NULL == wr ) {
		ERR("calloc: %s", os_err());
		return NULL;
	}

	wr->wr_field = calloc(num, sizeof(*wr->wr_field));
	if ( NULL == wr->wr_field ) {
		ERR("calloc: %s", os_err());
		goto out;
	}

	for(i = 0; i < num; i++) {
		/* Field name */
		if ( NULL == schema[i].s_name ||
			strlen(schema[i].s_name) > GIDX_MAX_INDEX_NLEN ) {
			ERR("%s: bad field name", schema[i].s_name);
			goto out_free_field;
		}
		wr->wr_field[i].f_name = schema[i].s_name;

		/* get_val */
		if ( NULL == schema[i].s_get_val.su_get_str ) {
			ERR("%s: get_val unset", schema[i].s_name);
			goto out_free_field;
		}
		wr->wr_field[i].f_get_val = schema[i].s_get_val;

		/* field type */
		if ( schema->s_type >= GIDX_NR_TYPES ) {
			ERR("%s: bad type (%u)", schema[i].s_name,
						schema[i].s_type);
			goto out_free_field;
		}
		wr->wr_field[i].f_type = _gidx_types[schema[i].s_type];
		wr->wr_field[i].f_type_id = schema[i].s_type;
		wr->wr_field[i].f_mode = schema[i].s_mode;
		wr->wr_num_fields++;
	}

	/* Initialise field */
	for(i = 0; i < num; i++) {
		struct _gidx_type_wr *type;
		gidx_mode_t mode;

		type = wr->wr_field[i].f_type;
		mode = wr->wr_field[i].f_mode;

		DEBUG("field[%u]: %s %s %s", i,
			gidx_mode_str(mode),
			type->t_name,
			wr->wr_field[i].f_name);

		wr->wr_field[i].f_priv =(*type->t_wr_init)(mode,
						schema[i].s_options);
		if ( NULL == wr->wr_field[i].f_priv ) {
			num = i;
			goto out_free_priv;
		}
	}

	wr->wr_rec = malloc(wr->wr_num_fields * sizeof(*wr->wr_rec->r_field) +
				sizeof(*wr->wr_rec));
	if ( NULL == wr->wr_rec )
		goto out_free_rec;
	
	wr->wr_rec->r_record = NULL;

	return wr;

out_free_rec:
	free(wr->wr_rec);
out_free_priv:
	for(i = 0; i < num; i++) {
		struct _gidx_type_wr *type;
		type = wr->wr_field[i].f_type;
		type->t_wr_fini(wr->wr_field[i].f_priv);
		DEBUG("free field %u", i);
	}
out_free_field:
	free(wr->wr_field);
out:
	free(wr);
	return NULL;
}
Example #19
0
static int decompress(fibuf_t in, int outfd)
{
	uint8_t buf[BMO_BLOCK_SIZE];
	struct bmo_hdr h;
	int compressed;
	size_t sz;
	bwt_t idx;


	sz = sizeof(h);
	if ( !fibuf_read(in, &h, &sz) ) {
		dprintf("%s: read: %s\n", cmd, os_err());
		return 0;
	}

	if ( fibuf_eof(in) || sz < sizeof(h) ) {
		dprintf("%s: desync on hdr read\n", cmd);
		return 1;
	}

	if ( h.h_magic != BMO_MAGIC ) {
		dprintf("%s: bad magic\n", cmd);
		return 1;
	}
	if ( h.h_vers != BMO_CURRENT_VERS ) {
		dprintf("%s: wrong version\n", cmd);
		return 1;
	}
again:
	sz = sizeof(idx);
	if ( !fibuf_read(in, &idx, &sz) ) {
		dprintf("%s: read: %s\n", cmd, os_err());
		return 0;
	}

	if ( fibuf_eof(in) || sz < sizeof(idx) ) {
		dprintf("%s: desync on bwt read\n", cmd);
		return 1;
	}

	compressed = !!(idx & BMO_BLOCK_COMPRESSED);
	idx &= ~BMO_BLOCK_COMPRESSED;

	sz = (h.h_len < BMO_BLOCK_SIZE) ? h.h_len : BMO_BLOCK_SIZE;
	h.h_len -= sz;

	if ( compressed ) {
		omega_decode(in, buf, sz);
		dprintf("omega decode:\n");
		dhex_dump(buf, sz, 0);

		mtf_decode(buf, sz);
		dprintf("MTF decode:\n");
		dhex_dump(buf, sz, 0);

		bwt_decode(buf, sz, idx);
		dprintf("BWT decode:\n");
		dhex_dump(buf, sz, 0);
	}else{
		if ( !fibuf_read(in, buf, &sz) )
			return 0;
		dprintf("uncompressed block\n");
	}

	if ( !fd_write(outfd, buf, sz) )
		return 0;

	if ( !fibuf_eof(in) )
		goto again;

	return 1;
}