static int vdi_snapshot(int argc, char **argv) { char *vdiname = argv[optind++]; uint32_t vid; int ret; char buf[SD_INODE_HEADER_SIZE]; struct sheepdog_inode *inode = (struct sheepdog_inode *)buf; if (vdi_cmd_data.snapshot_id != 0) { fprintf(stderr, "please specify a non-integer value for " "a snapshot tag name\n"); return EXIT_USAGE; } ret = find_vdi_name(vdiname, 0, "", &vid, 0); if (ret < 0) { fprintf(stderr, "failed to open vdi %s\n", vdiname); return EXIT_FAILURE; } ret = sd_read_object(vid_to_vdi_oid(vid), inode, SD_INODE_HEADER_SIZE, 0); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "failed to read an inode header\n"); return EXIT_FAILURE; } if (vdi_cmd_data.snapshot_tag[0]) { ret = sd_write_object(vid_to_vdi_oid(vid), 0, vdi_cmd_data.snapshot_tag, SD_MAX_VDI_TAG_LEN, offsetof(struct sheepdog_inode, tag), 0, inode->nr_copies, 0); }
/* Write the whole meta-data of inode out */ int sd_inode_write(write_node_fn writer, struct sd_inode *inode, int flags, bool create, bool direct) { uint32_t len; int ret; if (inode->store_policy == 0) ret = writer(vid_to_vdi_oid(inode->vdi_id), inode, SD_INODE_HEADER_SIZE, 0, flags, inode->nr_copies, inode->copy_policy, create, direct); else { len = SD_INODE_HEADER_SIZE + sd_inode_get_meta_size(inode, 0); ret = writer(vid_to_vdi_oid(inode->vdi_id), inode, len, 0, flags, inode->nr_copies, inode->copy_policy, create, false); if (ret != SD_RES_SUCCESS) goto out; ret = writer(vid_to_vdi_oid(inode->vdi_id), inode, sizeof(uint32_t), offsetof(struct sd_inode, btree_counter), flags, inode->nr_copies, inode->copy_policy, create, false); } out: return ret; }
static int read_account_meta(const char *account, uint64_t *bucket_count, uint64_t *object_count, uint64_t *used) { struct sd_inode *inode = NULL; struct bucket_iterater_arg arg = {}; uint32_t account_vid; uint64_t oid; int ret; ret = sd_lookup_vdi(account, &account_vid); if (ret != SD_RES_SUCCESS) goto out; oid = vid_to_vdi_oid(account_vid); inode = xmalloc(sizeof(*inode)); ret = sd_read_object(oid, (char *)inode, sizeof(struct sd_inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode header %"PRIx64, oid); goto out; } sd_inode_index_walk(inode, bucket_iterater, &arg); *object_count = arg.object_count; *bucket_count = arg.bucket_count; *used = arg.bytes_used; out: free(inode); return ret; }
static int onode_do_create(struct kv_onode *onode, struct sd_inode *inode, uint32_t idx, bool create) { uint32_t vid = inode->vdi_id; uint64_t oid = vid_to_data_oid(vid, idx), len; int ret; onode->oid = oid; if (onode->inlined) len = onode->size; else len = sizeof(struct onode_extent) * onode->nr_extent; ret = sd_write_object(oid, (char *)onode, BLOCK_SIZE + len, 0, create); if (ret != SD_RES_SUCCESS) { sd_err("failed to create object, %" PRIx64, oid); goto out; } if (!create) goto out; INODE_SET_VID(inode, idx, vid); ret = sd_inode_write_vid(sheep_bnode_writer, inode, idx, vid, vid, 0, false, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update inode, %" PRIx64, vid_to_vdi_oid(vid)); goto out; } out: return ret; }
int kv_iterate_bucket(const char *account, bucket_iter_cb cb, void *opaque) { struct sd_inode account_inode; struct bucket_iterater_arg arg = {opaque, cb, 0, 0, 0}; uint32_t account_vid; uint64_t oid; int ret; ret = sd_lookup_vdi(account, &account_vid); if (ret != SD_RES_SUCCESS) { sd_err("Failed to find account %s", account); return ret; } oid = vid_to_vdi_oid(account_vid); sys->cdrv->lock(account_vid); ret = sd_read_object(oid, (char *)&account_inode, sizeof(struct sd_inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read account inode header %s", account); goto out; } traverse_btree(sheep_bnode_reader, &account_inode, bucket_iterater, &arg); out: sys->cdrv->unlock(account_vid); return ret; }
static int bnode_create(struct kv_bnode *bnode, uint32_t account_vid) { struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); uint32_t tmp_vid, idx; uint64_t hval, i; int ret; ret = sd_read_object(vid_to_vdi_oid(account_vid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read %" PRIx32 " %s", account_vid, sd_strerror(ret)); goto out; } hval = sd_hash(bnode->name, strlen(bnode->name)); for (i = 0; i < MAX_DATA_OBJS; i++) { idx = (hval + i) % MAX_DATA_OBJS; tmp_vid = INODE_GET_VID(inode, idx); if (tmp_vid) continue; else break; } if (i == MAX_DATA_OBJS) { ret = SD_RES_NO_SPACE; goto out; } ret = bnode_do_create(bnode, inode, idx); out: free(inode); return ret; }
static int read_vdi_obj(char *vdiname, int snapid, const char *tag, uint32_t *pvid, struct sheepdog_inode *inode, size_t size) { int ret; uint32_t vid; ret = find_vdi_name(vdiname, snapid, tag, &vid, 0); if (ret < 0) { fprintf(stderr, "Failed to open VDI %s\n", vdiname); return EXIT_FAILURE; } ret = sd_read_object(vid_to_vdi_oid(vid), inode, size, 0, true); if (ret != SD_RES_SUCCESS) { if (snapid) { fprintf(stderr, "Failed to read a snapshot %s:%d\n", vdiname, snapid); } else if (tag && tag[0]) { fprintf(stderr, "Failed to read a snapshot %s:%s\n", vdiname, tag); } else { fprintf(stderr, "Failed to read a vdi %s\n", vdiname); } return EXIT_FAILURE; } if (pvid) *pvid = vid; return EXIT_SUCCESS; }
static int bnode_do_create(struct kv_bnode *bnode, struct sd_inode *inode, uint32_t idx, bool create) { uint32_t vid = inode->vdi_id; uint64_t oid = vid_to_data_oid(vid, idx); int ret; bnode->oid = oid; ret = sd_write_object(oid, (char *)bnode, sizeof(*bnode), 0, create); if (ret != SD_RES_SUCCESS) { sd_err("failed to create object, %" PRIx64, oid); goto out; } if (!create) goto out; sd_inode_set_vid(inode, idx, vid); ret = sd_inode_write_vid(inode, idx, vid, vid, 0, false, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update inode, %" PRIx64, vid_to_vdi_oid(vid)); goto out; } out: return ret; }
int parse_vdi(vdi_parser_func_t func, size_t size, void *data) { int ret; unsigned long nr; static struct sd_inode i; struct sd_req req; struct sd_rsp *rsp = (struct sd_rsp *)&req; static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); unsigned int rlen = sizeof(vdi_inuse); sd_init_req(&req, SD_OP_READ_VDIS); req.data_length = sizeof(vdi_inuse); ret = dog_exec_req(sdhost, sdport, &req, &vdi_inuse); if (ret < 0) goto out; if (rsp->result != SD_RES_SUCCESS) { sd_err("%s", sd_strerror(rsp->result)); goto out; } FOR_EACH_VDI(nr, vdi_inuse) { uint64_t oid; uint32_t snapid; oid = vid_to_vdi_oid(nr); memset(&i, 0, sizeof(i)); ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0, true); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode header"); continue; } if (i.name[0] == '\0') /* this VDI has been deleted */ continue; if (size > SD_INODE_HEADER_SIZE) { rlen = DIV_ROUND_UP(i.vdi_size, SD_DATA_OBJ_SIZE) * sizeof(i.data_vdi_id[0]); if (rlen > size - SD_INODE_HEADER_SIZE) rlen = size - SD_INODE_HEADER_SIZE; ret = sd_read_object(oid, ((char *)&i) + SD_INODE_HEADER_SIZE, rlen, SD_INODE_HEADER_SIZE, true); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode"); continue; } } snapid = vdi_is_snapshot(&i) ? i.snap_id : 0; func(i.vdi_id, i.name, i.tag, snapid, 0, &i, data); }
int parse_vdi(vdi_parser_func_t func, size_t size, void *data) { int ret; unsigned long nr; struct sd_inode *i = xmalloc(sizeof(*i)); struct sd_req req; struct sd_rsp *rsp = (struct sd_rsp *)&req; static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); uint32_t rlen; sd_init_req(&req, SD_OP_READ_VDIS); req.data_length = sizeof(vdi_inuse); ret = dog_exec_req(&sd_nid, &req, vdi_inuse); if (ret < 0) goto out; if (rsp->result != SD_RES_SUCCESS) { sd_err("%s", sd_strerror(rsp->result)); goto out; } FOR_EACH_VDI(nr, vdi_inuse) { uint64_t oid; uint32_t snapid; oid = vid_to_vdi_oid(nr); /* for B-tree inode, we also need sd_index_header */ ret = dog_read_object(oid, i, SD_INODE_HEADER_SIZE + sizeof(struct sd_index_header), 0, true); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode header"); continue; } if (i->name[0] == '\0') /* this VDI has been deleted */ continue; if (size > SD_INODE_HEADER_SIZE) { rlen = sd_inode_get_meta_size(i, size); ret = dog_read_object(oid, ((char *)i) + SD_INODE_HEADER_SIZE, rlen, SD_INODE_HEADER_SIZE, true); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode"); continue; } } snapid = vdi_is_snapshot(i) ? i->snap_id : 0; func(i->vdi_id, i->name, i->tag, snapid, 0, i, data); }
static int onode_create(struct kv_onode *onode, uint32_t bucket_vid) { struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); uint32_t tmp_vid, idx; uint64_t hval, i; int ret; bool create = true; sys->cdrv->lock(bucket_vid); ret = sd_read_object(vid_to_vdi_oid(bucket_vid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read %" PRIx32 " %s", bucket_vid, sd_strerror(ret)); goto out; } hval = sd_hash(onode->name, strlen(onode->name)); for (i = 0; i < MAX_DATA_OBJS; i++) { idx = (hval + i) % MAX_DATA_OBJS; tmp_vid = INODE_GET_VID(inode, idx); if (tmp_vid) { uint64_t oid = vid_to_data_oid(bucket_vid, idx); char name[SD_MAX_OBJECT_NAME] = { }; ret = sd_read_object(oid, name, sizeof(name), 0); if (ret != SD_RES_SUCCESS) goto out; if (name[0] == 0) { create = false; goto create; } } else break; } if (i == MAX_DATA_OBJS) { ret = SD_RES_NO_SPACE; goto out; } create: ret = onode_do_create(onode, inode, idx, create); out: free(inode); sys->cdrv->unlock(bucket_vid); return ret; }
/* * Check if object by name exists in a bucket and init 'onode' if it exists. * * Return SD_RES_SUCCESS if found, SD_RES_NO_OBJ if not found. * * We check adjacent objects one by one once we get a start index by hashing * name. Unallocated slot marks the end of the check window. * * For e.g, if we are going to check if fish in the following bucket, assume * fish hashes to 'sheep', so we compare the name one by one from 'sheep' to * 'fish'. '\0' indicates that object was deleted before checking. * * [ sheep, dog, wolve, '\0', fish, {unallocated}, tiger, ] */ static int onode_lookup(struct kv_onode *onode, uint32_t ovid, const char *name) { struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); uint32_t tmp_vid, idx; uint64_t hval, i; int ret; sys->cdrv->lock(ovid); ret = sd_read_object(vid_to_vdi_oid(ovid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read %" PRIx32 " %s", ovid, sd_strerror(ret)); goto out; } hval = sd_hash(name, strlen(name)); for (i = 0; i < MAX_DATA_OBJS; i++) { idx = (hval + i) % MAX_DATA_OBJS; tmp_vid = INODE_GET_VID(inode, idx); if (tmp_vid) { uint64_t oid = vid_to_data_oid(ovid, idx); ret = sd_read_object(oid, (char *)onode, sizeof(*onode), 0); if (ret != SD_RES_SUCCESS) goto out; if (strcmp(onode->name, name) == 0) break; } else { ret = SD_RES_NO_OBJ; break; } } if (i == MAX_DATA_OBJS) { ret = SD_RES_NO_OBJ; goto out; } out: free(inode); sys->cdrv->unlock(ovid); return ret; }
static int bucket_iterate_object(uint32_t bucket_vid, object_iter_cb cb, void *opaque) { struct object_iterater_arg arg = {opaque, cb, 0}; struct sd_inode *inode; int ret; inode = xmalloc(sizeof(*inode)); ret = sd_read_object(vid_to_vdi_oid(bucket_vid), (char *)inode, sizeof(struct sd_inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read inode %s", sd_strerror(ret)); goto out; } traverse_btree(sheep_bnode_reader, inode, object_iterater, &arg); out: free(inode); return ret; }
/* Write the meta-data of inode out */ int sd_inode_write_vid(write_node_fn writer, struct sd_inode *inode, uint32_t idx, uint32_t vid, uint32_t value, int flags, bool create, bool direct) { int ret = SD_RES_SUCCESS; if (inode->store_policy == 0) ret = writer(vid_to_vdi_oid(vid), &value, sizeof(value), SD_INODE_HEADER_SIZE + sizeof(value) * idx, flags, inode->nr_copies, inode->copy_policy, create, direct); else { /* * For btree type sd_inode, we only have to write all * meta-data of sd_inode out. */ ret = sd_inode_write(writer, inode, flags, create, direct); } return ret; }
static int vdi_snapshot(int argc, char **argv) { char *vdiname = argv[optind++]; uint32_t vid; int ret; char buf[SD_INODE_HEADER_SIZE]; struct sheepdog_inode *inode = (struct sheepdog_inode *)buf; if (vdi_cmd_data.snapshot_id != 0) { fprintf(stderr, "Please specify a non-integer value for " "a snapshot tag name\n"); return EXIT_USAGE; } ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE); if (ret != EXIT_SUCCESS) return ret; if (vdi_cmd_data.snapshot_tag[0]) { ret = sd_write_object(vid_to_vdi_oid(vid), 0, vdi_cmd_data.snapshot_tag, SD_MAX_VDI_TAG_LEN, offsetof(struct sheepdog_inode, tag), 0, inode->nr_copies, 0, true); }
int parse_vdi(vdi_parser_func_t func, size_t size, void *data) { int ret, fd; unsigned long nr; static struct sheepdog_inode i; struct sd_req req; static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); unsigned int rlen = sizeof(vdi_inuse); fd = connect_to(sdhost, sdport); if (fd < 0) { fprintf(stderr, "Failed to connect to %s:%d\n", sdhost, sdport); ret = -1; goto out; } sd_init_req(&req, SD_OP_READ_VDIS); req.data_length = sizeof(vdi_inuse); ret = collie_exec_req(fd, &req, &vdi_inuse); if (ret < 0) { fprintf(stderr, "Failed to read VDIs from %s:%d\n", sdhost, sdport); close(fd); goto out; } close(fd); for (nr = 0; nr < SD_NR_VDIS; nr++) { uint64_t oid; uint32_t snapid; if (!test_bit(nr, vdi_inuse)) continue; oid = vid_to_vdi_oid(nr); memset(&i, 0, sizeof(i)); ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0, true); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode header\n"); continue; } if (i.name[0] == '\0') /* this VDI has been deleted */ continue; if (size > SD_INODE_HEADER_SIZE) { rlen = DIV_ROUND_UP(i.vdi_size, SD_DATA_OBJ_SIZE) * sizeof(i.data_vdi_id[0]); if (rlen > size - SD_INODE_HEADER_SIZE) rlen = size - SD_INODE_HEADER_SIZE; ret = sd_read_object(oid, ((char *)&i) + SD_INODE_HEADER_SIZE, rlen, SD_INODE_HEADER_SIZE, true); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode\n"); continue; } } snapid = is_current(&i) ? 0 : i.snap_id; func(i.vdi_id, i.name, i.tag, snapid, 0, &i, data); } out: return ret; }
static int vdi_create(int argc, char **argv) { char *vdiname = argv[optind++]; uint64_t size; uint32_t vid; uint64_t oid; int idx, max_idx, ret; struct sheepdog_inode *inode = NULL; char *buf = NULL; if (!argv[optind]) { fprintf(stderr, "please specify the size of vdi\n"); return EXIT_USAGE; } ret = parse_option_size(argv[optind], &size); if (ret < 0) return EXIT_USAGE; if (size > SD_MAX_VDI_SIZE) { fprintf(stderr, "too big image size, %s\n", argv[optind]); return EXIT_USAGE; } ret = do_vdi_create(vdiname, size, 0, &vid, 0); if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc) goto out; inode = malloc(sizeof(*inode)); buf = zalloc(SD_DATA_OBJ_SIZE); if (!inode || !buf) { fprintf(stderr, "oom\n"); ret = EXIT_SYSFAIL; goto out; } ret = sd_read_object(vid_to_vdi_oid(vid), inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "failed to read a newly created vdi object\n"); ret = EXIT_FAILURE; goto out; } max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE); for (idx = 0; idx < max_idx; idx++) { oid = vid_to_data_oid(vid, idx); ret = sd_write_object(oid, 0, buf, SD_DATA_OBJ_SIZE, 0, 0, inode->nr_copies, 1); if (ret != SD_RES_SUCCESS) { ret = EXIT_FAILURE; goto out; } inode->data_vdi_id[idx] = vid; ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid), SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0, inode->nr_copies, 0); if (ret) { ret = EXIT_FAILURE; goto out; } } ret = EXIT_SUCCESS; out: free(inode); free(buf); return ret; }
int parse_vdi(vdi_parser_func_t func, size_t size, void *data) { int ret, fd; unsigned long nr; static struct sheepdog_inode i; struct sd_req req; static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); unsigned int rlen, wlen = 0; fd = connect_to(sdhost, sdport); if (fd < 0) return fd; memset(&req, 0, sizeof(req)); req.opcode = SD_OP_READ_VDIS; req.data_length = sizeof(vdi_inuse); req.epoch = node_list_version; rlen = sizeof(vdi_inuse); ret = exec_req(fd, &req, vdi_inuse, &wlen, &rlen); if (ret < 0) { close(fd); return ret; } close(fd); for (nr = 0; nr < SD_NR_VDIS; nr++) { uint64_t oid; if (!test_bit(nr, vdi_inuse)) continue; oid = vid_to_vdi_oid(nr); memset(&i, 0, sizeof(i)); ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode header\n"); continue; } if (i.name[0] == '\0') /* this VDI has been deleted */ continue; if (size > SD_INODE_HEADER_SIZE) { rlen = DIV_ROUND_UP(i.vdi_size, SD_DATA_OBJ_SIZE) * sizeof(i.data_vdi_id[0]); if (rlen > size - SD_INODE_HEADER_SIZE) rlen = size - SD_INODE_HEADER_SIZE; ret = sd_read_object(oid, ((char *)&i) + SD_INODE_HEADER_SIZE, rlen, SD_INODE_HEADER_SIZE); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode\n"); continue; } } func(i.vdi_id, i.name, i.tag, i.snap_id, 0, &i, data); } return 0; }
/* * Initialize the data vdi * * @vid: the vdi where the allocator resides */ int oalloc_init(uint32_t vid) { struct strbuf buf = STRBUF_INIT; struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); struct header hd = { .nr_free = 1, }; struct free_desc fd = { .start = 1, /* Use first object as the meta object */ .count = MAX_DATA_OBJS - 1, }; int ret; strbuf_add(&buf, &hd, sizeof(hd)); strbuf_add(&buf, &fd, sizeof(fd)); ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read inode, %" PRIx32", %s", vid, sd_strerror(ret)); goto out; } ret = sd_write_object(vid_to_data_oid(vid, 0), buf.buf, buf.len, 0, true); if (ret != SD_RES_SUCCESS) { sd_err("failed to create meta object for %" PRIx32", %s", vid, sd_strerror(ret)); goto out; } sd_inode_set_vid(inode, 0, vid); ret = sd_inode_write_vid(inode, 0, vid, vid, 0, false, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update inode, %" PRIx32", %s", vid, sd_strerror(ret)); goto out; } out: strbuf_release(&buf); free(inode); return ret; } /* * Allocate the objects and update the free list. * * Callers are expected to call oalloc_new_finish() to update the inode bitmap * after filling up the data. * * @vid: the vdi where the allocator resides * @start: start index of the objects to allocate * @count: number of the objects to allocate */ int oalloc_new_prepare(uint32_t vid, uint64_t *start, uint64_t count) { char *meta = xvalloc(SD_DATA_OBJ_SIZE); struct header *hd; struct free_desc *fd; uint64_t oid = vid_to_data_oid(vid, 0), i; int ret; ret = sd_read_object(oid, meta, SD_DATA_OBJ_SIZE, 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read meta %" PRIx64 ", %s", oid, sd_strerror(ret)); goto out; } hd = (struct header *)meta; fd = (struct free_desc *)(meta + oalloc_meta_length(hd)) - 1; sd_debug("used %"PRIu64", nr_free %"PRIu64, hd->used, hd->nr_free); for (i = 0; i < hd->nr_free; i++, fd--) { sd_debug("start %"PRIu64", count %"PRIu64, fd->start, fd->count); if (fd->count > count) break; } if (i == hd->nr_free) { ret = SD_RES_NO_SPACE; goto out; } *start = fd->start; fd->start += count; fd->count -= count; hd->used += count; /* Update the meta object */ ret = sd_write_object(oid, meta, oalloc_meta_length(hd), 0, false); if (ret != SD_RES_SUCCESS) sd_err("failed to update meta %"PRIx64 ", %s", oid, sd_strerror(ret)); out: free(meta); return ret; } /* * Update the inode map of the vid * * @vid: the vdi where the allocator resides * @start: start index of the objects to update * @count: number of the objects to update */ int oalloc_new_finish(uint32_t vid, uint64_t start, uint64_t count) { struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); int ret; ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read inode, %" PRIx64 ", %s", vid_to_vdi_oid(vid), sd_strerror(ret)); goto out; } sd_debug("start %"PRIu64" end %"PRIu64, start, start + count - 1); sd_inode_set_vid_range(inode, start, (start + count - 1), vid); ret = sd_inode_write(inode, 0, false, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update inode, %" PRIx64", %s", vid_to_vdi_oid(vid), sd_strerror(ret)); goto out; } out: free(inode); return ret; } static int free_desc_cmp(struct free_desc *a, struct free_desc *b) { return -intcmp(a->start, b->start); } static inline int update_and_merge_free_desc(char *meta, uint64_t start, uint64_t count, uint32_t vid) { struct header *hd = (struct header *)meta; struct free_desc *tail, *fd = HEADER_TO_FREE_DESC(hd); uint64_t i, j; /* Try our best to merge it in place, or append it to tail */ for (i = 0; i < hd->nr_free; i++) { if (start + count == fd->start) { fd->start = start; fd->count += count; break; } else if(fd->start + fd->count == start) { fd->count +=count; break; } fd++; } if (i == hd->nr_free) { if (hd->nr_free >= MAX_FREE_DESC) return SD_RES_NO_SPACE; tail = (struct free_desc *)(meta + oalloc_meta_length(hd)); tail->start = start; tail->count = count; hd->nr_free++; } hd->used -= count; xqsort(HEADER_TO_FREE_DESC(hd), hd->nr_free, free_desc_cmp); /* Merge as hard as we can */ j = hd->nr_free - 1; tail = (struct free_desc *)(meta + oalloc_meta_length(hd)) - 1; for (i = 0; i < j; i++, tail--) { struct free_desc *front = tail - 1; sd_debug("start %"PRIu64", count %"PRIu64, tail->start, tail->count); if (tail->start + tail->count > front->start) sd_emerg("bad free descriptor found at %"PRIx32, vid); if (tail->start + tail->count == front->start) { front->start = tail->start; front->count += tail->count; memmove(tail, tail + 1, sizeof(*tail) * i); hd->nr_free--; } } return SD_RES_SUCCESS; } /* * Discard the allocated objects and update the free list of the allocator * * Caller should check the return value since it might fail. * * @vid: the vdi where the allocator resides * @start: start index of the objects to free * @count: number of the objects to free */ int oalloc_free(uint32_t vid, uint64_t start, uint64_t count) { char *meta = xvalloc(SD_DATA_OBJ_SIZE); struct header *hd; uint64_t oid = vid_to_data_oid(vid, 0), i; struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); int ret; ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode, sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read inode, %" PRIx64 ", %s", vid_to_vdi_oid(vid), sd_strerror(ret)); goto out; } sd_debug("discard start %"PRIu64" end %"PRIu64, start, start + count - 1); sd_inode_set_vid_range(inode, start, (start + count - 1), 0); ret = sd_inode_write(inode, 0, false, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update inode, %" PRIx64", %s", vid_to_vdi_oid(vid), sd_strerror(ret)); goto out; } ret = sd_read_object(oid, meta, SD_DATA_OBJ_SIZE, 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read meta %" PRIx64 ", %s", oid, sd_strerror(ret)); goto out; } ret = update_and_merge_free_desc(meta, start, count, vid); if (ret != SD_RES_SUCCESS) goto out; /* XXX use aio to speed up remove of objects */ for (i = 0; i < count; i++) { struct sd_req hdr; int res; sd_init_req(&hdr, SD_OP_REMOVE_OBJ); hdr.obj.oid = vid_to_data_oid(vid, start + i); res = exec_local_req(&hdr, NULL); /* * return the error code if it does not * success or can't find obj. */ if (res != SD_RES_SUCCESS && res != SD_RES_NO_OBJ) ret = res; } hd = (struct header *)meta; ret = sd_write_object(oid, meta, oalloc_meta_length(hd), 0, false); if (ret != SD_RES_SUCCESS) { sd_err("failed to update meta %"PRIx64 ", %s", oid, sd_strerror(ret)); goto out; } sd_debug("used %"PRIu64", nr_free %"PRIu64, hd->used, hd->nr_free); out: free(meta); free(inode); return ret; }