static int list_store(void) { int fd, ret; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; char buf[512] = { 0 }; fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; sd_init_req(&hdr, SD_OP_GET_STORE_LIST); hdr.data_length = 512; ret = exec_req(fd, &hdr, buf); close(fd); if (ret) { fprintf(stderr, "Failed to connect\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Restore failed: %s\n", sd_strerror(rsp->result)); return EXIT_FAILURE; } printf("Available stores:\n"); printf("---------------------------------------\n"); printf("%s\n", buf); return EXIT_SYSFAIL; }
int dog_exec_req(const uint8_t *addr, int port, struct sd_req *hdr, void *buf) { struct node_id nid = {}; struct sockfd *sfd; int ret; memcpy(nid.addr, addr, sizeof(nid.addr)); nid.port = port; sfd = sockfd_cache_get(&nid); if (!sfd) return -1; /* * Retry forever for dog because * 1. We can't get the newest epoch * 2. Some operations might take unexpected long time */ ret = exec_req(sfd->fd, hdr, buf, NULL, 0, UINT32_MAX); sockfd_cache_put(&nid, sfd); return ret ? -1 : 0; }
int collie_exec_req(const char *host, int port, struct sd_req *hdr, void *buf) { struct node_id nid; struct sockfd *sfd; int ret; memset(&nid, 0, sizeof(nid)); str_to_addr(host, nid.addr); nid.port = port; sfd = sockfd_cache_get(&nid); if (!sfd) return -1; /* * Retry forever for collie because * 1. We can't get the newest epoch * 2. Some operations might take unexpected long time */ ret = exec_req(sfd->fd, hdr, buf, NULL, 0, UINT32_MAX); sockfd_cache_put(&nid, sfd); return ret ? -1 : 0; }
int sheep_exec_req(const struct node_id *nid, struct sd_req *hdr, void *buf) { struct sd_rsp *rsp = (struct sd_rsp *)hdr; struct sockfd *sfd; int ret; assert(is_worker_thread()); sfd = sockfd_cache_get(nid); if (!sfd) return SD_RES_NETWORK_ERROR; ret = exec_req(sfd->fd, hdr, buf, sheep_need_retry, hdr->epoch, MAX_RETRY_COUNT); if (ret) { sd_dprintf("remote node might have gone away"); sockfd_cache_del(nid, sfd); return SD_RES_NETWORK_ERROR; } ret = rsp->result; if (ret != SD_RES_SUCCESS) sd_eprintf("failed %s", sd_strerror(ret)); sockfd_cache_put(nid, sfd); return ret; }
static int forward_read_obj_req(struct request *req) { int i, fd, ret = SD_RES_SUCCESS; unsigned wlen, rlen; struct sd_obj_req hdr = *(struct sd_obj_req *)&req->rq; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; struct sd_vnode *v; uint64_t oid = hdr.oid; int nr_copies; hdr.flags |= SD_FLAG_CMD_IO_LOCAL; if (hdr.copies) nr_copies = hdr.copies; else nr_copies = get_nr_copies(req->vnodes); /* TODO: we can do better; we need to check this first */ for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); if (vnode_is_local(v)) { ret = do_local_io(req, hdr.epoch); if (ret != SD_RES_SUCCESS) goto read_remote; return ret; } } read_remote: for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); if (vnode_is_local(v)) continue; fd = get_sheep_fd(v->addr, v->port, v->node_idx, hdr.epoch); if (fd < 0) { ret = SD_RES_NETWORK_ERROR; continue; } wlen = 0; rlen = hdr.data_length; ret = exec_req(fd, (struct sd_req *)&hdr, req->data, &wlen, &rlen); if (ret) { /* network errors */ del_sheep_fd(fd); ret = SD_RES_NETWORK_ERROR; continue; } else { memcpy(&req->rp, rsp, sizeof(*rsp)); ret = rsp->result; break; } } return ret; }
static int trace_read_buffer(void) { int fd, ret, tfd; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; unsigned rlen, wlen; #define TRACE_BUF_LEN (1024 * 1024 * 20) char *buf = xzalloc(TRACE_BUF_LEN); tfd = open(tracefile, O_CREAT | O_RDWR | O_APPEND | O_TRUNC, 0644); if (tfd < 0) { fprintf(stderr, "can't create tracefile\n"); return EXIT_SYSFAIL; } fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; read_buffer: sd_init_req(&hdr, SD_OP_TRACE_READ_BUF); hdr.data_length = rlen = TRACE_BUF_LEN; hdr.epoch = sd_epoch; wlen = 0; ret = exec_req(fd, &hdr, buf, &wlen, &rlen); if (ret) { fprintf(stderr, "Failed to connect\n"); close(fd); return EXIT_SYSFAIL; } if (rsp->result == SD_RES_AGAIN) goto read_buffer; if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Trace failed: %s\n", sd_strerror(rsp->result)); close(fd); return EXIT_FAILURE; } xwrite(tfd, buf, rlen); if (rlen == TRACE_BUF_LEN) { memset(buf, 0, TRACE_BUF_LEN); goto read_buffer; } close(fd); free(buf); return EXIT_SUCCESS; }
static int do_vdi_create(char *vdiname, int64_t vdi_size, uint32_t base_vid, uint32_t *vdi_id, int snapshot) { struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; int fd, ret; unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN]; fd = connect_to(sdhost, sdport); if (fd < 0) { fprintf(stderr, "failed to connect\n"); return EXIT_SYSFAIL; } memset(buf, 0, sizeof(buf)); strncpy(buf, vdiname, SD_MAX_VDI_LEN); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; hdr.base_vdi_id = base_vid; wlen = SD_MAX_VDI_LEN; hdr.flags = SD_FLAG_CMD_WRITE; hdr.snapid = snapshot; hdr.data_length = wlen; hdr.vdi_size = vdi_size; ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "failed to send a request\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "%s, %s\n", sd_strerror(rsp->result), vdiname); return EXIT_FAILURE; } if (vdi_id) *vdi_id = rsp->vdi_id; return EXIT_SUCCESS; }
static int do_vdi_create(char *vdiname, int64_t vdi_size, uint32_t base_vid, uint32_t *vdi_id, int snapshot, int nr_copies) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; int fd, ret; unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN]; fd = connect_to(sdhost, sdport); if (fd < 0) { fprintf(stderr, "Failed to connect\n"); return EXIT_SYSFAIL; } memset(buf, 0, sizeof(buf)); strncpy(buf, vdiname, SD_MAX_VDI_LEN); wlen = SD_MAX_VDI_LEN; sd_init_req(&hdr, SD_OP_NEW_VDI); hdr.flags = SD_FLAG_CMD_WRITE; hdr.data_length = wlen; hdr.vdi.base_vdi_id = base_vid; hdr.vdi.snapid = snapshot; hdr.vdi.vdi_size = roundup(vdi_size, 512); hdr.vdi.copies = nr_copies; ret = exec_req(fd, &hdr, buf, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to send a request\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Failed to create VDI %s: %s\n", vdiname, sd_strerror(rsp->result)); return EXIT_FAILURE; } if (vdi_id) *vdi_id = rsp->vdi.vdi_id; return EXIT_SUCCESS; }
static int cluster_info(int argc, char **argv) { int i, fd, ret; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; struct epoch_log *logs; int nr_logs, log_length; time_t ti, ct; struct tm tm; char time_str[128]; log_length = sd_epoch * sizeof(struct epoch_log); again: logs = malloc(log_length); if (!logs) { if (log_length < 10) { fprintf(stderr, "No memory to allocate.\n"); return EXIT_SYSFAIL; } log_length /= 2; goto again; } fd = connect_to(sdhost, sdport); if (fd < 0) goto error; sd_init_req(&hdr, SD_OP_STAT_CLUSTER); hdr.data_length = log_length; ret = exec_req(fd, &hdr, logs); close(fd); if (ret != 0) goto error; if (!raw_output) printf("Cluster status: "); if (rsp->result == SD_RES_SUCCESS) printf("running\n"); else printf("%s\n", sd_strerror(rsp->result)); if (!raw_output && rsp->data_length > 0) { ct = logs[0].ctime >> 32; printf("\nCluster created at %s\n", ctime(&ct)); printf("Epoch Time Version\n"); }
static int cluster_format(int argc, char **argv) { int fd, ret; struct sd_so_req hdr; struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr; unsigned rlen, wlen; struct timeval tv; char store_name[STORE_LEN]; fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; gettimeofday(&tv, NULL); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_MAKE_FS; hdr.copies = cluster_cmd_data.copies; if (cluster_cmd_data.nohalt) set_nohalt(&hdr.flags); hdr.epoch = node_list_version; hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; if (strlen(cluster_cmd_data.name)) strncpy(store_name, cluster_cmd_data.name, STORE_LEN); else strcpy(store_name, DEFAULT_STORE); hdr.data_length = wlen = strlen(store_name) + 1; hdr.flags |= SD_FLAG_CMD_WRITE; printf("using backend %s store\n", store_name); ret = exec_req(fd, (struct sd_req *)&hdr, store_name, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to connect\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Format failed: %s\n", sd_strerror(rsp->result)); return list_store(); } return EXIT_SUCCESS; }
static int find_vdi_name(char *vdiname, uint32_t snapid, const char *tag, uint32_t *vid, int for_snapshot) { int ret, fd; struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; unsigned int wlen, rlen = 0; char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; fd = connect_to(sdhost, sdport); if (fd < 0) return -1; memset(buf, 0, sizeof(buf)); strncpy(buf, vdiname, SD_MAX_VDI_LEN); strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); memset(&hdr, 0, sizeof(hdr)); if (for_snapshot) hdr.opcode = SD_OP_GET_VDI_INFO; else hdr.opcode = SD_OP_LOCK_VDI; wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN; hdr.proto_ver = SD_PROTO_VER; hdr.data_length = wlen; hdr.snapid = snapid; hdr.flags = SD_FLAG_CMD_WRITE; ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen); if (ret) { ret = -1; goto out; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "cannot get vdi info, %s, %s %d %s\n", sd_strerror(rsp->result), vdiname, snapid, tag); ret = -1; goto out; } *vid = rsp->vdi_id; ret = 0; out: close(fd); return ret; }
static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data, unsigned size) { char name[128]; int i, fd, ret, cb_ret; char *buf; buf = zalloc(size); if (!buf) { fprintf(stderr, "Failed to allocate memory\n"); return; } for (i = 0; i < sd_nodes_nr; i++) { unsigned wlen = 0, rlen = size; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; addr_to_str(name, sizeof(name), sd_nodes[i].nid.addr, 0); fd = connect_to(name, sd_nodes[i].nid.port); if (fd < 0) break; sd_init_req(&hdr, SD_OP_READ_PEER); hdr.data_length = rlen; hdr.flags = 0; hdr.epoch = sd_epoch; hdr.obj.oid = oid; ret = exec_req(fd, &hdr, buf, &wlen, &rlen); close(fd); sprintf(name + strlen(name), ":%d", sd_nodes[i].nid.port); if (ret) fprintf(stderr, "Failed to connect to %s\n", name); else { cb_ret = func(name, oid, rsp, buf, data); if (cb_ret) break; } } free(buf); }
static int get_vdi_bitmap_from(struct sd_node *node) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; static DECLARE_BITMAP(tmp_vdi_inuse, SD_NR_VDIS); int fd, i, ret = SD_RES_SUCCESS; unsigned int rlen, wlen; char host[128]; if (is_myself(node->addr, node->port)) goto out; addr_to_str(host, sizeof(host), node->addr, 0); fd = connect_to(host, node->port); if (fd < 0) { vprintf(SDOG_ERR, "unable to get the VDI bitmap from %s: %m\n", host); ret = -SD_RES_EIO; goto out; } vprintf(SDOG_ERR, "%s:%d\n", host, node->port); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_READ_VDIS; hdr.epoch = sys->epoch; hdr.data_length = sizeof(tmp_vdi_inuse); rlen = hdr.data_length; wlen = 0; ret = exec_req(fd, &hdr, (char *)tmp_vdi_inuse, &wlen, &rlen); close(fd); if (ret || rsp->result != SD_RES_SUCCESS) { vprintf(SDOG_ERR, "unable to get the VDI bitmap (%d, %d)\n", ret, rsp->result); goto out; } for (i = 0; i < ARRAY_SIZE(sys->vdi_inuse); i++) sys->vdi_inuse[i] |= tmp_vdi_inuse[i]; out: return ret; }
int sd_read_object(uint64_t oid, void *data, unsigned int datalen, uint64_t offset) { struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; char name[128]; int n, fd, ret; unsigned wlen = 0, rlen = datalen; n = obj_to_sheep(vnode_list_entries, nr_vnodes, oid, 0); addr_to_str(name, sizeof(name), vnode_list_entries[n].addr, 0); fd = connect_to(name, vnode_list_entries[n].port); if (fd < 0) { fprintf(stderr, "failed to connect %s:%d\n", name, vnode_list_entries[n].port); return SD_RES_EIO; } memset(&hdr, 0, sizeof(hdr)); hdr.epoch = node_list_version; hdr.opcode = SD_OP_READ_OBJ; hdr.oid = oid; /* use direct to avoid checking consistency */ hdr.flags = SD_FLAG_CMD_DIRECT; hdr.data_length = rlen; hdr.offset = offset; ret = exec_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "failed to read object, %lx\n", oid); return SD_RES_EIO; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "failed to read object, %lx %s\n", oid, sd_strerror(rsp->result)); return rsp->result; } return SD_RES_SUCCESS; }
static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data) { char name[128]; int i, fd, ret; char *buf; buf = zalloc(sizeof(struct sheepdog_inode)); if (!buf) { fprintf(stderr, "out of memory\n"); return; } for (i = 0; i < nr_nodes; i++) { unsigned wlen = 0, rlen = sizeof(struct sheepdog_inode); struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; addr_to_str(name, sizeof(name), node_list_entries[i].addr, 0); fd = connect_to(name, node_list_entries[i].port); if (fd < 0) break; memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_READ_OBJ; hdr.data_length = rlen; hdr.flags = SD_FLAG_CMD_DIRECT; hdr.oid = oid; hdr.epoch = node_list_version; ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen); close(fd); sprintf(name + strlen(name), ":%d", node_list_entries[i].port); if (ret) printf("%s: can't connect\n", name); else func(name, oid, rsp, buf, data); } free(buf); }
static int node_cache(int argc, char **argv) { char *p; int fd, ret; uint32_t cache_size; unsigned int wlen, rlen = 0; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; cache_size = strtol(argv[optind], &p, 10); if (argv[optind] == p || cache_size < 0) { fprintf(stderr, "Invalid cache size %s\n", argv[optind]); return EXIT_FAILURE; } fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_FAILURE; wlen = sizeof(cache_size); sd_init_req(&hdr, SD_OP_SET_CACHE_SIZE); hdr.flags = SD_FLAG_CMD_WRITE; hdr.data_length = wlen; ret = exec_req(fd, &hdr, (void *)&cache_size, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to connect\n"); return EXIT_FAILURE; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "specify max cache size failed: %s\n", sd_strerror(rsp->result)); return EXIT_FAILURE; } printf("Max cache size set to %dM\n", cache_size); return EXIT_SUCCESS; }
/* Fetch the object list from all the nodes in the cluster */ static int fetch_object_list(struct sd_node *e, uint32_t epoch, uint8_t *buf, size_t buf_size) { int fd, ret; unsigned wlen, rlen; char name[128]; struct sd_list_req hdr; struct sd_list_rsp *rsp; addr_to_str(name, sizeof(name), e->nid.addr, 0); dprintf("%s %"PRIu32"\n", name, e->nid.port); fd = connect_to(name, e->nid.port); if (fd < 0) { eprintf("%s %"PRIu32"\n", name, e->nid.port); return -1; } wlen = 0; rlen = buf_size; sd_init_req((struct sd_req *)&hdr, SD_OP_GET_OBJ_LIST); hdr.tgt_epoch = epoch - 1; hdr.flags = 0; hdr.data_length = rlen; ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &rlen); close(fd); rsp = (struct sd_list_rsp *)&hdr; if (ret || rsp->result != SD_RES_SUCCESS) { eprintf("failed, %"PRIu32", %"PRIu32"\n", ret, rsp->result); return -1; } dprintf("%zu\n", rsp->data_length / sizeof(uint64_t)); return rsp->data_length / sizeof(uint64_t); }
static int node_recovery(int argc, char **argv) { int i, ret; if (!raw_output) { printf("Nodes In Recovery:\n"); printf(" Id Host:Port V-Nodes Zone\n"); } for (i = 0; i < nr_nodes; i++) { char host[128]; int fd; unsigned wlen, rlen; struct sd_node_req req; struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req; addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0); fd = connect_to(host, node_list_entries[i].port); if (fd < 0) return EXIT_FAILURE; memset(&req, 0, sizeof(req)); req.opcode = SD_OP_STAT_RECOVERY; wlen = 0; rlen = 0; ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen); close(fd); if (!ret && rsp->result == SD_RES_SUCCESS) { addr_to_str(host, sizeof(host), node_list_entries[i].addr, node_list_entries[i].port); printf(raw_output ? "%d %s %d %d\n" : "%4d %-20s%5d%11d\n", i, host, node_list_entries[i].nr_vnodes, node_list_entries[i].zone); } } return EXIT_SUCCESS; }
static int cluster_info(int argc, char **argv) { int i, fd, ret; struct sd_vdi_req hdr; struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr; unsigned rlen, wlen; struct epoch_log logs[8]; int nr_logs; time_t ti, ct; struct tm tm; char time_str[128]; fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_STAT_CLUSTER; hdr.epoch = node_list_version; hdr.data_length = sizeof(logs); rlen = hdr.data_length; wlen = 0; ret = exec_req(fd, (struct sd_req *)&hdr, logs, &wlen, &rlen); close(fd); if (ret != 0) return EXIT_SYSFAIL; if (!raw_output) printf("Cluster status: "); if (rsp->result == SD_RES_SUCCESS) printf("running\n"); else printf("%s\n", sd_strerror(rsp->result)); if (!raw_output) { ct = logs[0].ctime >> 32; printf("\nCluster created at %s\n", ctime(&ct)); printf("Epoch Time Version\n"); }
int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data, unsigned int datalen, uint64_t offset, uint32_t flags, int copies, int create) { struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; int fd, ret; unsigned wlen = datalen, rlen; fd = connect_to(sdhost, sdport); if (fd < 0) { fprintf(stderr, "Failed to connect\n"); return SD_RES_EIO; } memset(&hdr, 0, sizeof(hdr)); hdr.epoch = node_list_version; if (create) hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; else hdr.opcode = SD_OP_WRITE_OBJ; hdr.oid = oid; hdr.cow_oid = cow_oid; hdr.copies = copies; hdr.data_length = wlen; hdr.flags = (flags & ~SD_FLAG_CMD_IO_LOCAL) | SD_FLAG_CMD_WRITE; hdr.offset = offset; ret = exec_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to write object %lx\n", oid); return SD_RES_EIO; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Failed to write object %lx: %s\n", oid, sd_strerror(rsp->result)); return rsp->result; } return SD_RES_SUCCESS; }
int dog_exec_req(const struct node_id *nid, struct sd_req *hdr, void *buf) { struct sockfd *sfd; int ret; sfd = sockfd_cache_get(nid); if (!sfd) return -1; /* * Retry forever for dog because * 1. We can't get the newest epoch * 2. Some operations might take unexpected long time */ ret = exec_req(sfd->fd, hdr, buf, NULL, 0, UINT32_MAX); sockfd_cache_put(nid, sfd); return ret ? -1 : 0; }
int collie_exec_req(const char *host, int port, struct sd_req *hdr, void *data) { int fd, ret; struct sd_rsp *rsp = (struct sd_rsp *)hdr; fd = connect_to(host, port); if (fd < 0) { fprintf(stderr, "Failed to connect to %s:%d\n", host, port); return -1; } /* Retry hard for collie because we can't get the newest epoch */ ret = exec_req(fd, hdr, data, NULL, 0); close(fd); if (ret) return -1; return rsp->result; }
static int cluster_format(int argc, char **argv) { int fd, ret; struct sd_so_req hdr; struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr; unsigned rlen, wlen; struct timeval tv; fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; gettimeofday(&tv, NULL); memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_MAKE_FS; hdr.copies = cluster_cmd_data.copies; if (cluster_cmd_data.nohalt) set_nohalt(&hdr.flags); hdr.epoch = node_list_version; hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; rlen = 0; wlen = 0; ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "failed to connect\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "%s\n", sd_strerror(rsp->result)); return EXIT_FAILURE; } return EXIT_SUCCESS; }
static int list_store(void) { int fd, ret; struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; unsigned rlen, wlen; char buf[512] = { 0 }; fd = connect_to(sdhost, sdport); if (fd < 0) return EXIT_SYSFAIL; memset(&hdr, 0, sizeof(hdr)); wlen = 0; rlen = 512; hdr.opcode = SD_OP_GET_STORE_LIST; hdr.data_length = rlen; ret = exec_req(fd, &hdr, buf, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to connect\n"); return EXIT_SYSFAIL; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Restore failed: %s\n", sd_strerror(rsp->result)); return EXIT_FAILURE; } printf("Available stores:\n"); printf("---------------------------------------\n"); printf("%s\n", buf); return EXIT_SYSFAIL; }
int sd_read_object(uint64_t oid, void *data, unsigned int datalen, uint64_t offset) { struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; int fd, ret; unsigned wlen = 0, rlen = datalen; fd = connect_to(sdhost, sdport); if (fd < 0) { fprintf(stderr, "Failed to connect\n"); return SD_RES_EIO; } memset(&hdr, 0, sizeof(hdr)); hdr.epoch = node_list_version; hdr.opcode = SD_OP_READ_OBJ; hdr.oid = oid; hdr.flags = SD_FLAG_CMD_WEAK_CONSISTENCY; hdr.data_length = rlen; hdr.offset = offset; ret = exec_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen); close(fd); if (ret) { fprintf(stderr, "Failed to read object %lx\n", oid); return SD_RES_EIO; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read object %lx %s\n", oid, sd_strerror(rsp->result)); return rsp->result; } return SD_RES_SUCCESS; }
static int update_node_list(int max_nodes, uint32_t epoch) { int fd, ret; unsigned int size, wlen; char *buf = NULL; struct sd_node *ent; struct sd_node_req hdr; struct sd_node_rsp *rsp = (struct sd_node_rsp *)&hdr; fd = connect_to(sdhost, sdport); if (fd < 0) return -1; size = sizeof(*ent) * max_nodes; buf = zalloc(size); if (!buf) { ret = -1; goto out; } memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_GET_NODE_LIST; hdr.request_ver = epoch; hdr.data_length = size; wlen = 0; ret = exec_req(fd, (struct sd_req *)&hdr, buf, &wlen, &size); if (ret) { ret = -1; goto out; } if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Failed to update node list: %s\n", sd_strerror(rsp->result)); ret = -1; goto out; } nr_nodes = size / sizeof(*ent); if (nr_nodes == 0) { fprintf(stderr, "There are no active sheep daemons\n"); exit(EXIT_FAILURE); } /* FIXME */ if (nr_nodes > max_nodes) { ret = -1; goto out; } memcpy(node_list_entries, buf, size); nr_vnodes = nodes_to_vnodes(node_list_entries, nr_nodes, vnode_list_entries); node_list_version = hdr.epoch; master_idx = rsp->master_idx; out: if (buf) free(buf); if (fd >= 0) close(fd); return ret; }
int collie_exec_req(int sockfd, struct sd_req *hdr, void *data) { /* Retry hard for collie because we can't get the newest epoch */ return exec_req(sockfd, hdr, data, NULL, 0); }
static int node_info(int argc, char **argv) { int i, ret, success = 0; uint64_t total_size = 0, total_avail = 0, total_vdi_size = 0; char total_str[21], avail_str[21], vdi_size_str[21]; if (!raw_output) printf("Id\tSize\tUsed\tUse%%\n"); for (i = 0; i < nr_nodes; i++) { char name[128]; int fd; unsigned wlen, rlen; struct sd_node_req req; struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req; char store_str[21], free_str[21]; addr_to_str(name, sizeof(name), node_list_entries[i].addr, 0); fd = connect_to(name, node_list_entries[i].port); if (fd < 0) return 1; memset(&req, 0, sizeof(req)); req.opcode = SD_OP_STAT_SHEEP; req.epoch = node_list_version; wlen = 0; rlen = 0; ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen); close(fd); size_to_str(rsp->store_size, store_str, sizeof(store_str)); size_to_str(rsp->store_size - rsp->store_free, free_str, sizeof(free_str)); if (!ret && rsp->result == SD_RES_SUCCESS) { printf(raw_output ? "%d %s %s %d%%\n" : "%2d\t%s\t%s\t%3d%%\n", i, store_str, free_str, (int)(((double)(rsp->store_size - rsp->store_free) / rsp->store_size) * 100)); success++; } total_size += rsp->store_size; total_avail += rsp->store_free; } if (success == 0) { fprintf(stderr, "Cannot get information from any nodes\n"); return EXIT_SYSFAIL; } if (parse_vdi(cal_total_vdi_size, SD_INODE_HEADER_SIZE, &total_vdi_size) < 0) return EXIT_SYSFAIL; size_to_str(total_size, total_str, sizeof(total_str)); size_to_str(total_size - total_avail, avail_str, sizeof(avail_str)); size_to_str(total_vdi_size, vdi_size_str, sizeof(vdi_size_str)); printf(raw_output ? "Total %s %s %d%% %s\n" : "Total\t%s\t%s\t%3d%%\n\nTotal virtual image size\t%s\n", total_str, avail_str, (int)(((double)(total_size - total_avail) / total_size) * 100), vdi_size_str); return EXIT_SUCCESS; }
static int recover_object_from_replica(uint64_t oid, struct sd_vnode *entry, uint32_t epoch, uint32_t tgt_epoch) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; char name[128]; unsigned wlen = 0, rlen; int fd, ret = -1; void *buf; struct siocb iocb = { 0 }; rlen = get_objsize(oid); buf = valloc(rlen); if (!buf) { eprintf("%m\n"); goto out; } if (vnode_is_local(entry)) { iocb.epoch = epoch; iocb.length = rlen; ret = sd_store->link(oid, &iocb, tgt_epoch); if (ret == SD_RES_SUCCESS) { ret = 0; goto done; } else { ret = -1; goto out; } } addr_to_str(name, sizeof(name), entry->nid.addr, 0); fd = connect_to(name, entry->nid.port); dprintf("%s, %d\n", name, entry->nid.port); if (fd < 0) { eprintf("failed to connect to %s:%"PRIu32"\n", name, entry->nid.port); ret = -1; goto out; } sd_init_req(&hdr, SD_OP_READ_PEER); hdr.epoch = epoch; hdr.flags = SD_FLAG_CMD_RECOVERY; hdr.data_length = rlen; hdr.obj.oid = oid; hdr.obj.tgt_epoch = tgt_epoch; ret = exec_req(fd, &hdr, buf, &wlen, &rlen); close(fd); if (ret != 0) { eprintf("res: %"PRIx32"\n", rsp->result); ret = -1; goto out; } rsp = (struct sd_rsp *)&hdr; if (rsp->result == SD_RES_SUCCESS) { iocb.epoch = epoch; iocb.length = rlen; iocb.buf = buf; ret = sd_store->atomic_put(oid, &iocb); if (ret != SD_RES_SUCCESS) { ret = -1; goto out; } } else { eprintf("failed, res: %"PRIx32"\n", rsp->result); ret = rsp->result; goto out; } done: dprintf("recovered oid %"PRIx64" from %d to epoch %d\n", oid, tgt_epoch, epoch); out: if (ret == SD_RES_SUCCESS) objlist_cache_insert(oid); free(buf); return ret; }
int parse_vdi(vdi_parser_func_t func, size_t size, void *data) { int ret, fd; unsigned long nr; static struct sheepdog_inode i; struct sd_req req; static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS); unsigned int rlen, wlen = 0; fd = connect_to(sdhost, sdport); if (fd < 0) return fd; memset(&req, 0, sizeof(req)); req.opcode = SD_OP_READ_VDIS; req.data_length = sizeof(vdi_inuse); req.epoch = node_list_version; rlen = sizeof(vdi_inuse); ret = exec_req(fd, &req, vdi_inuse, &wlen, &rlen); if (ret < 0) { close(fd); return ret; } close(fd); for (nr = 0; nr < SD_NR_VDIS; nr++) { uint64_t oid; if (!test_bit(nr, vdi_inuse)) continue; oid = vid_to_vdi_oid(nr); memset(&i, 0, sizeof(i)); ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode header\n"); continue; } if (i.name[0] == '\0') /* this VDI has been deleted */ continue; if (size > SD_INODE_HEADER_SIZE) { rlen = DIV_ROUND_UP(i.vdi_size, SD_DATA_OBJ_SIZE) * sizeof(i.data_vdi_id[0]); if (rlen > size - SD_INODE_HEADER_SIZE) rlen = size - SD_INODE_HEADER_SIZE; ret = sd_read_object(oid, ((char *)&i) + SD_INODE_HEADER_SIZE, rlen, SD_INODE_HEADER_SIZE); if (ret != SD_RES_SUCCESS) { fprintf(stderr, "Failed to read inode\n"); continue; } } func(i.vdi_id, i.name, i.tag, i.snap_id, 0, &i, data); } return 0; }