int ploop_copy_init(struct ploop_disk_images_data *di, struct ploop_copy_param *param, struct ploop_copy_handle **h) { int ret, err; int blocksize; char *image = NULL; char *format = NULL; char device[64]; char partdev[64]; struct ploop_copy_handle *_h = NULL; int is_remote; char mnt[PATH_MAX] = ""; is_remote = is_fd_socket(param->ofd); if (is_remote < 0) { ploop_err(0, "Invalid output fd %d: must be a file, " "a pipe or a socket", param->ofd); return SYSEXIT_PARAM; } if (param->ofd == STDOUT_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOSTDOUT); else if (param->ofd == STDERR_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOCONSOLE); if (ploop_lock_dd(di)) return SYSEXIT_LOCK; if (ploop_find_dev_by_dd(di, device, sizeof(device))) { ploop_err(0, "Can't find running ploop device"); ret = SYSEXIT_SYS; goto err; } ret = get_image_info(device, &image, &format, &blocksize); if (ret) goto err; _h = alloc_ploop_copy_handle(S2B(blocksize)); if (_h == NULL) { ploop_err(0, "alloc_ploop_copy_handle"); ret = SYSEXIT_MALLOC; goto err; } _h->raw = strcmp(format, "raw") == 0; _h->ofd = param->ofd; _h->is_remote = is_remote; _h->async = param->async; _h->devfd = open(device, O_RDONLY|O_CLOEXEC); if (_h->devfd == -1) { ploop_err(errno, "Can't open device %s", device); ret = SYSEXIT_DEVICE; goto err; } ret = get_partition_device_name(device, partdev, sizeof(partdev)); if (ret) goto err; _h->partfd = open(partdev, O_RDONLY|O_CLOEXEC); if (_h->partfd == -1) { ploop_err(errno, "Can't open device %s", partdev); ret = SYSEXIT_DEVICE; goto err; } ret = SYSEXIT_OPEN; err = ploop_get_mnt_by_dev(device, mnt, sizeof(mnt)); if (err == -1) goto err; else if (err == 0) { _h->mntfd = open(mnt, O_RDONLY|O_NONBLOCK|O_DIRECTORY); if (_h->mntfd < 0) { ploop_err(errno, "Can't open %s", mnt); goto err; } } ploop_log(0, "Send image %s dev=%s mnt=%s fmt=%s blocksize=%d local=%d", image, device, mnt, format, blocksize, !is_remote); if (open_delta(&_h->idelta, image, O_RDONLY|O_DIRECT, OD_ALLOW_DIRTY)) { ret = SYSEXIT_OPEN; goto err; } ret = complete_running_operation(di, device); if (ret) goto err; _h->cl = register_cleanup_hook(cancel_sender, _h); pthread_mutex_lock(&_h->sd.wait_mutex); err: if (ret) { ploop_copy_release(_h); free_ploop_copy_handle(_h); } else *h = _h; free(image); ploop_unlock_dd(di); return ret; }
int ploop_copy_send(struct ploop_copy_send_param *arg) { struct delta idelta = { .fd = -1 }; int tracker_on = 0; int fs_frozen = 0; int devfd = -1; int mntfd = -1; int ret = 0; char *send_from = NULL; char *format = NULL; void *iobuf[2] = {}; int blocksize; __u64 cluster; __u64 pos; __u64 iterpos; __u64 trackpos; __u64 trackend; __u64 xferred; int iter; struct ploop_track_extent e; int i; pthread_t send_th = 0; struct send_data sd = { .mutex = PTHREAD_MUTEX_INITIALIZER, .cond = PTHREAD_COND_INITIALIZER, .cond_sent = PTHREAD_COND_INITIALIZER, }; if (!arg) return SYSEXIT_PARAM; sd.fd = arg->ofd; sd.is_pipe = is_fd_pipe(arg->ofd); if (sd.is_pipe < 0) { ploop_err(0, "Invalid output fd %d: must be a file, " "a pipe or a socket", arg->ofd); return SYSEXIT_PARAM; } if (arg->feedback_fd >= 0 && is_fd_pipe(arg->feedback_fd) != 1) { ploop_err(errno, "Invalid feedback fd %d: must be " "a pipe or a socket", arg->feedback_fd); return SYSEXIT_PARAM; } /* If data is to be send to stdout or stderr, * we have to disable logging to appropriate fd. * * As currently there's no way to disable just stderr, * so in this case we have to disable stdout as well. */ if (arg->ofd == STDOUT_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOSTDOUT); else if (arg->ofd == STDERR_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOCONSOLE); devfd = open(arg->device, O_RDONLY); if (devfd < 0) { ploop_err(errno, "Can't open device %s", arg->device); ret = SYSEXIT_DEVICE; goto done; } mntfd = open_mount_point(arg->device); if (mntfd < 0) { /* Error is printed by open_mount_point() */ ret = SYSEXIT_OPEN; goto done; } ret = get_image_info(arg->device, &send_from, &format, &blocksize); if (ret) goto done; cluster = S2B(blocksize); ret = SYSEXIT_MALLOC; for (i = 0; i < 2; i++) if (p_memalign(&iobuf[i], 4096, cluster)) goto done; ret = complete_running_operation(NULL, arg->device); if (ret) goto done; ret = ioctl_device(devfd, PLOOP_IOC_TRACK_INIT, &e); if (ret) goto done; tracker_on = 1; if (open_delta_simple(&idelta, send_from, O_RDONLY|O_DIRECT, OD_NOFLAGS)) { ret = SYSEXIT_OPEN; goto done; } ret = pthread_create(&send_th, NULL, send_thread, &sd); if (ret) { ploop_err(ret, "Can't create send thread"); ret = SYSEXIT_SYS; goto done; } ploop_log(-1, "Sending %s", send_from); trackend = e.end; for (pos = 0; pos < trackend; ) { int n; trackpos = pos + cluster; ret = ioctl_device(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos); if (ret) goto done; n = do_pread(cluster, pos); if (n == 0) /* EOF */ break; async_send(n, pos); pos += n; } /* First copy done */ iter = 1; iterpos = 0; xferred = 0; for (;;) { int err; err = ioctl(devfd, PLOOP_IOC_TRACK_READ, &e); if (err == 0) { //fprintf(stderr, "TRACK %llu-%llu\n", e.start, e.end); fflush(stdout); if (e.end > trackend) trackend = e.end; if (e.start < iterpos) iter++; iterpos = e.end; xferred += e.end - e.start; for (pos = e.start; pos < e.end; ) { int n; int copy = e.end - pos; if (copy > cluster) copy = cluster; if (pos + copy > trackpos) { trackpos = pos + copy; if (ioctl(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos)) { ploop_err(errno, "PLOOP_IOC_TRACK_SETPOS"); ret = SYSEXIT_DEVIOC; goto done; } } n = do_pread(copy, pos); if (n == 0) { ploop_err(0, "Unexpected EOF"); ret = SYSEXIT_READ; goto done; } async_send(n, pos); pos += n; } } else { if (errno == EAGAIN) /* no more dirty blocks */ break; ploop_err(errno, "PLOOP_IOC_TRACK_READ"); ret = SYSEXIT_DEVIOC; goto done; } if (iter > 10 || (iter > 1 && xferred > trackend)) break; } /* Live iterative transfers are done. Either we transferred * everything or iterations did not converge. In any case * now we must suspend VE disk activity. Now it is just * call of an external program (something sort of * "killall -9 writetest; sleep 1; umount /mnt2"), actual * implementation must be intergrated to vzctl/vzmigrate * and suspend VE with subsequent fsyncing FS. */ /* Send the sync command to receiving side. Since older ploop * might be present on the other side, we need to not break the * backward compatibility, so just send the first few (SYNC_MARK) * bytes of delta file contents. New ploop_receive() interprets * this as "sync me" command, while the old one just writes those * bytes which is useless but harmless. */ if (sd.is_pipe) { char buf[LEN_STATUS + 1] = {}; ret = do_pread(4096, 0); if (ret < SYNC_MARK) { ploop_err(errno, "Short read"); ret = SYSEXIT_READ; goto done; } TS("SEND 0 %d (sync)", SYNC_MARK); async_send(SYNC_MARK, 0); /* Now we should wait for the other side to finish syncing * before freezing the container, to optimize CT frozen time. */ if (arg->feedback_fd < 0) { /* No descriptor to receive a response back is given. * As ugly as it looks, let's just sleep for some time * hoping the other side will finish sync. */ TS("SLEEP 5"); sleep(5); goto sync_done; } /* Wait for feedback from the receiving side */ /* FIXME: use select/poll with a timeout */ if (read(arg->feedback_fd, buf, LEN_STATUS) != LEN_STATUS) { ploop_err(errno, "Can't read feedback"); ret = SYSEXIT_PROTOCOL; goto done; } if (strncmp(buf, STATUS_OK, LEN_STATUS) == 0) { goto sync_done; } else if (strncmp(buf, STATUS_FAIL, LEN_STATUS) == 0) { ploop_err(0, "Remote side reported sync failure"); ret = SYSEXIT_FSYNC; goto done; } else { ploop_err(0, "Got back feedback: %s", buf); ret = SYSEXIT_PROTOCOL; goto done; } } else { /* Writing to local file */ fdatasync(arg->ofd); } sync_done: /* Freeze the container */ TS("FLUSH"); ret = run_cmd(arg->flush_cmd); if (ret) goto done; /* Sync fs */ TS("SYNCFS"); if (sys_syncfs(mntfd)) { ploop_err(errno, "syncfs() failed"); ret = SYSEXIT_FSYNC; goto done; } /* Flush journal and freeze fs (this also clears the fs dirty bit) */ TS("FIFREEZE"); ret = ioctl_device(mntfd, FIFREEZE, 0); if (ret) goto done; fs_frozen = 1; TS("IOC_SYNC"); ret = ioctl_device(devfd, PLOOP_IOC_SYNC, 0); if (ret) goto done; iter = 1; iterpos = 0; for (;;) { int err; struct ploop_track_extent e; err = ioctl(devfd, PLOOP_IOC_TRACK_READ, &e); if (err == 0) { __u64 pos; //fprintf(stderr, "TRACK %llu-%llu\n", e.start, e.end); fflush(stdout); if (e.end > trackend) trackend = e.end; if (e.start < iterpos) iter++; iterpos = e.end; for (pos = e.start; pos < e.end; ) { int n; int copy = e.end - pos; if (copy > cluster) copy = cluster; if (pos + copy > trackpos) { trackpos = pos + copy; ret = ioctl(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos); if (ret) goto done; } TS("READ %llu %d", pos, copy); n = do_pread(copy, pos); if (n == 0) { ploop_err(0, "Unexpected EOF"); ret = SYSEXIT_READ; goto done; } TS("SEND %llu %d", pos, n); async_send(n, pos); pos += n; } } else { if (errno == EAGAIN) break; ploop_err(errno, "PLOOP_IOC_TRACK_READ"); ret = SYSEXIT_DEVIOC; goto done; } if (iter > 2) { ploop_err(0, "Too many iterations on frozen FS, aborting"); ret = SYSEXIT_LOOP; goto done; } } /* Must clear dirty flag on ploop1 image. */ if (strcmp(format, "ploop1") == 0) { int n; struct ploop_pvd_header *vh; TS("READ 0 4096"); n = do_pread(4096, 0); if (n < SECTOR_SIZE) { ploop_err(errno, "Short read"); ret = SYSEXIT_READ; goto done; } vh = iobuf[i]; vh->m_DiskInUse = 0; TS("SEND 0 %d (1st sector)", SECTOR_SIZE); async_send(SECTOR_SIZE, 0); } TS("IOCTL TRACK_STOP"); ret = ioctl(devfd, PLOOP_IOC_TRACK_STOP, 0); if (ret) goto done; tracker_on = 0; TS("SEND 0 0 (close)"); async_send(0, 0); pthread_join(send_th, NULL); send_th = 0; done: if (send_th) pthread_cancel(send_th); if (fs_frozen) (void)ioctl_device(mntfd, FITHAW, 0); if (tracker_on) (void)ioctl_device(devfd, PLOOP_IOC_TRACK_ABORT, 0); free(iobuf[0]); free(iobuf[1]); if (devfd >=0) close(devfd); if (mntfd >=0) close(mntfd); free(send_from); if (idelta.fd >= 0) close_delta(&idelta); TS("DONE"); return ret; } #undef do_pread #undef async_send /* Deprecated, please use ploop_copy_send() instead */ int ploop_send(const char *device, int ofd, const char *flush_cmd, int is_pipe) { struct ploop_copy_send_param s = { .device = device, .ofd = ofd, .flush_cmd = flush_cmd, }; return ploop_copy_send(&s); }
static int do_create_snapshot(struct ploop_disk_images_data *di, const char *guid, const char *snap_dir, int temporary) { int ret; int fd; char dev[64]; char snap_guid[UUID_SIZE]; char file_guid[UUID_SIZE]; char fname[PATH_MAX]; char conf[PATH_MAX]; char conf_tmp[PATH_MAX]; int online = 0; int n; off_t size; __u32 blocksize; int version; if (guid != NULL && !is_valid_guid(guid)) { ploop_err(0, "Incorrect guid %s", guid); return SYSEXIT_PARAM; } if (is_old_snapshot_format(di)) return SYSEXIT_PARAM; ret = gen_uuid_pair(snap_guid, sizeof(snap_guid), file_guid, sizeof(file_guid)); if (ret) { ploop_err(errno, "Can't generate uuid"); return ret; } if (guid != NULL) { if (find_snapshot_by_guid(di, guid) != -1) { ploop_err(0, "The snapshot %s already exist", guid); return SYSEXIT_PARAM; } strcpy(snap_guid, guid); } n = get_snapshot_count(di); if (n == -1) { return SYSEXIT_PARAM; } else if (n > 128-2) { /* The number of images limited by 128 so the snapshot limit 128 - base_image - one_reserverd */ ploop_err(errno, "Unable to create a snapshot." " The maximum number of snapshots (%d) has been reached", n-1); return SYSEXIT_PARAM; } ret = ploop_find_dev_by_dd(di, dev, sizeof(dev)); if (ret == -1) return SYSEXIT_SYS; else if (ret == 0) { online = 1; ret = complete_running_operation(di, dev); if (ret) return ret; } else { ret = get_image_param_offline(di, di->top_guid, &size, &blocksize, &version); if (ret == SYSEXIT_OPEN && errno == EBUSY) { /* repair top delta */ char *topdelta[] = {find_image_by_guid(di, di->top_guid), NULL}; blocksize = di->blocksize; ret = check_deltas(di, topdelta, 0, &blocksize); if (ret) return ret; ret = get_image_param_offline(di, di->top_guid, &size, &blocksize, &version); } if (ret) return ret; } ret = merge_temporary_snapshots(di); if (ret) return ret; if (snap_dir != NULL) { char *name; char *dir; dir = realpath(snap_dir, NULL); if (dir == NULL) { ploop_err(errno, "Error in realpath(%s)", snap_dir); return SYSEXIT_CREAT; } name = strrchr(di->images[0]->file, '/'); if (name != NULL) name++; else name = di->images[0]->file; snprintf(fname, sizeof(fname), "%s/%s.%s", dir, name, file_guid); free(dir); } else snprintf(fname, sizeof(fname), "%s.%s", di->images[0]->file, file_guid); ploop_di_change_guid(di, di->top_guid, snap_guid); if (temporary) ploop_di_set_temporary(di, snap_guid); ret = ploop_di_add_image(di, fname, TOPDELTA_UUID, snap_guid); if (ret) return ret; get_disk_descriptor_fname(di, conf, sizeof(conf)); snprintf(conf_tmp, sizeof(conf_tmp), "%s.tmp", conf); ret = ploop_store_diskdescriptor(conf_tmp, di); if (ret) return ret; if (!online) { // offline snapshot fd = create_snapshot_delta(fname, blocksize, size, version); if (fd < 0) { ret = SYSEXIT_CREAT; goto err; } close(fd); } else { // Always sync fs ret = create_snapshot(dev, fname, 1); if (ret) goto err; } if (rename(conf_tmp, conf)) { ploop_err(errno, "Can't rename %s %s", conf_tmp, conf); ret = SYSEXIT_RENAME; } if (ret && !online && unlink(fname)) ploop_err(errno, "Can't unlink %s", fname); ploop_log(0, "ploop %s %s has been successfully created", get_snap_str(temporary), snap_guid); err: if (ret && unlink(conf_tmp)) ploop_err(errno, "Can't unlink %s", conf_tmp); return ret; }
int do_create_snapshot(struct ploop_disk_images_data *di, const char *guid, const char *snap_dir, const char *cbt_uuid, int flags) { int ret, rc; int fd; char dev[64]; char snap_guid[UUID_SIZE]; char top_guid[UUID_SIZE]; char file_guid[UUID_SIZE]; char fname[PATH_MAX]; const char *prev_fname = NULL; char conf[PATH_MAX]; char conf_tmp[PATH_MAX]; int online = 0; int temporary = flags & SNAP_TYPE_TEMPORARY; int n; off_t size; __u32 blocksize; int version; uuid_t u; const __u8 *cbt_u = NULL; if (cbt_uuid != NULL) { ploop_log(0, "Create snapshot CBT uuid=%s", cbt_uuid); if (uuid_parse(cbt_uuid, u)) { ploop_log(-1, "Incorrect cbt uuid is specified %s", cbt_uuid); return SYSEXIT_PARAM; } cbt_u = u; } if (guid != NULL && !is_valid_guid(guid)) { ploop_err(0, "Incorrect guid %s", guid); return SYSEXIT_PARAM; } if (is_old_snapshot_format(di)) return SYSEXIT_PARAM; ret = gen_uuid_pair(snap_guid, sizeof(snap_guid), file_guid, sizeof(file_guid)); if (ret) return ret; if (di->vol && di->vol->parent) { ret = ploop_uuid_generate(top_guid, sizeof(top_guid)); if (ret) return ret; } else strcpy(top_guid, TOPDELTA_UUID); if (guid != NULL) { if (find_snapshot_by_guid(di, guid) != -1) { ploop_err(0, "The snapshot %s already exist", guid); return SYSEXIT_PARAM; } strcpy(snap_guid, guid); } n = get_snapshot_count(di); if (n == -1) { return SYSEXIT_PARAM; } else if (n > 128-2) { /* The number of images limited by 128 so the snapshot limit 128 - base_image - one_reserverd */ ploop_err(errno, "Unable to create a snapshot." " The maximum number of snapshots (%d) has been reached", n-1); return SYSEXIT_PARAM; } rc = ploop_find_dev_by_dd(di, dev, sizeof(dev)); if (rc == -1) return SYSEXIT_SYS; if (rc == 0) { if (flags & SNAP_TYPE_OFFLINE) { ret = get_image_param_online(dev, &size, &blocksize, &version); } else { online = 1; ret = complete_running_operation(di, dev); } if (ret) return ret; } else { ret = get_image_param_offline(di, di->top_guid, &size, &blocksize, &version); if (ret == SYSEXIT_OPEN && errno == EBUSY) { /* repair top delta */ char *topdelta[] = {find_image_by_guid(di, di->top_guid), NULL}; blocksize = di->blocksize; ret = check_deltas(di, topdelta, 0, &blocksize, NULL); if (ret) return ret; ret = get_image_param_offline(di, di->top_guid, &size, &blocksize, &version); } if (ret) return ret; } ret = merge_temporary_snapshots(di); if (ret) return ret; if (snap_dir != NULL) { char *name; char *dir; dir = realpath(snap_dir, NULL); if (dir == NULL) { ploop_err(errno, "Error in realpath(%s)", snap_dir); return SYSEXIT_CREAT; } name = strrchr(di->images[0]->file, '/'); if (name != NULL) name++; else name = di->images[0]->file; snprintf(fname, sizeof(fname), "%s/%s.%s", dir, name, file_guid); free(dir); } else snprintf(fname, sizeof(fname), "%s.%s", di->images[0]->file, file_guid); prev_fname = find_image_by_guid(di, di->top_guid); if (prev_fname == NULL) { ploop_err(0, "Unable to find image by uuid %s", di->top_guid); return SYSEXIT_PARAM; } ploop_di_change_guid(di, di->top_guid, snap_guid); if (temporary) ploop_di_set_temporary(di, snap_guid); ret = ploop_di_add_image(di, fname, top_guid, snap_guid); if (ret) return ret; get_disk_descriptor_fname(di, conf, sizeof(conf)); snprintf(conf_tmp, sizeof(conf_tmp), "%s.tmp", conf); ret = ploop_store_diskdescriptor(conf_tmp, di); if (ret) return ret; if (!online) { // offline snapshot ret = 0; fd = create_snapshot_delta(fname, blocksize, size, version); if (fd < 0) { ret = SYSEXIT_CREAT; goto err; } close(fd); if (cbt_u != NULL) ret = write_empty_cbt_to_image(fname, prev_fname, cbt_u); else if (di->mode != PLOOP_RAW_MODE) { if (rc == 0) ret = cbt_dump(di, dev, fname); else ret = ploop_move_cbt(fname, prev_fname); } if (ret) goto err; } else { // Always sync fs ret = create_snapshot(dev, fname, 1, cbt_u, prev_fname); if (ret) goto err; } if (rename(conf_tmp, conf)) { ploop_err(errno, "Can't rename %s %s", conf_tmp, conf); ret = SYSEXIT_RENAME; } if (ret && !online && unlink(fname)) ploop_err(errno, "Can't unlink %s", fname); ploop_log(0, "ploop %s %s has been successfully created", get_snap_str(temporary), snap_guid); err: if (ret && unlink(conf_tmp)) ploop_err(errno, "Can't unlink %s", conf_tmp); return ret; }