static struct ploop_copy_handle *alloc_ploop_copy_handle(int cluster) { struct ploop_copy_handle *h; h = calloc(1, sizeof(struct ploop_copy_handle)); if (h == NULL) return NULL; pthread_mutex_init(&h->sd.mutex, NULL); pthread_cond_init(&h->sd.cond, NULL); pthread_mutex_init(&h->sd.wait_mutex, NULL); pthread_cond_init(&h->sd.wait_cond, NULL); pthread_barrier_init(&h->sd.barrier, NULL, 2); h->devfd = h->ofd = h->mntfd = h->idelta.fd = -1; h->cluster = cluster; if (p_memalign(&h->iobuf[0], 4096, cluster)) goto err; if (p_memalign(&h->iobuf[1], 4096, cluster)) goto err; return h; err: free_ploop_copy_handle(h); return NULL; }
static int send_optional_header(struct ploop_copy_handle *copy_h) { int ret; struct ploop_pvd_header *vh; size_t block_size; struct ploop_pvd_ext_block_check *hc; struct ploop_pvd_ext_block_element_header *h; __u8 *block = NULL, *data; vh = (struct ploop_pvd_header *)copy_h->idelta.hdr0; block_size = vh->m_Sectors * SECTOR_SIZE; if (p_memalign((void **)&block, 4096, block_size)) return SYSEXIT_MALLOC; hc = (struct ploop_pvd_ext_block_check *)block; h = (struct ploop_pvd_ext_block_element_header *)(hc + 1); data = (__u8 *)(h + 1); h->magic = EXT_MAGIC_DIRTY_BITMAP; ret = save_dirty_bitmap(copy_h->devfd, ©_h->idelta, copy_h->eof_offset, data, &h->size, NULL, cbt_writer, copy_h); if (ret) { if (ret == SYSEXIT_NOCBT) ret = 0; goto out; } vh->m_DiskInUse = SIGNATURE_DISK_CLOSED_V21; vh->m_FormatExtensionOffset = (copy_h->eof_offset + SECTOR_SIZE - 1) / SECTOR_SIZE; if (send_buf(copy_h, vh, sizeof(*vh), 0)) { ploop_err(errno, "Can't write header"); ret = SYSEXIT_WRITE; goto out; } ploop_log(3, "Send extension header offset=%llu size=%d", vh->m_FormatExtensionOffset * SECTOR_SIZE, h->size); hc->m_Magic = FORMAT_EXTENSION_MAGIC; MD5((const unsigned char *)(hc + 1), block_size - sizeof(*hc), hc->m_Md5); if (send_buf(copy_h, block, block_size, vh->m_FormatExtensionOffset * SECTOR_SIZE)) { ploop_err(errno, "Can't write optional header"); ret = SYSEXIT_WRITE; goto out; } out: free(block); return ret; }
int ploop_copy_send(struct ploop_copy_send_param *arg) { struct delta idelta = { .fd = -1 }; int tracker_on = 0; int fs_frozen = 0; int devfd = -1; int mntfd = -1; int ret = 0; char *send_from = NULL; char *format = NULL; void *iobuf[2] = {}; int blocksize; __u64 cluster; __u64 pos; __u64 iterpos; __u64 trackpos; __u64 trackend; __u64 xferred; int iter; struct ploop_track_extent e; int i; pthread_t send_th = 0; struct send_data sd = { .mutex = PTHREAD_MUTEX_INITIALIZER, .cond = PTHREAD_COND_INITIALIZER, .cond_sent = PTHREAD_COND_INITIALIZER, }; if (!arg) return SYSEXIT_PARAM; sd.fd = arg->ofd; sd.is_pipe = is_fd_pipe(arg->ofd); if (sd.is_pipe < 0) { ploop_err(0, "Invalid output fd %d: must be a file, " "a pipe or a socket", arg->ofd); return SYSEXIT_PARAM; } if (arg->feedback_fd >= 0 && is_fd_pipe(arg->feedback_fd) != 1) { ploop_err(errno, "Invalid feedback fd %d: must be " "a pipe or a socket", arg->feedback_fd); return SYSEXIT_PARAM; } /* If data is to be send to stdout or stderr, * we have to disable logging to appropriate fd. * * As currently there's no way to disable just stderr, * so in this case we have to disable stdout as well. */ if (arg->ofd == STDOUT_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOSTDOUT); else if (arg->ofd == STDERR_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOCONSOLE); devfd = open(arg->device, O_RDONLY); if (devfd < 0) { ploop_err(errno, "Can't open device %s", arg->device); ret = SYSEXIT_DEVICE; goto done; } mntfd = open_mount_point(arg->device); if (mntfd < 0) { /* Error is printed by open_mount_point() */ ret = SYSEXIT_OPEN; goto done; } ret = get_image_info(arg->device, &send_from, &format, &blocksize); if (ret) goto done; cluster = S2B(blocksize); ret = SYSEXIT_MALLOC; for (i = 0; i < 2; i++) if (p_memalign(&iobuf[i], 4096, cluster)) goto done; ret = complete_running_operation(NULL, arg->device); if (ret) goto done; ret = ioctl_device(devfd, PLOOP_IOC_TRACK_INIT, &e); if (ret) goto done; tracker_on = 1; if (open_delta_simple(&idelta, send_from, O_RDONLY|O_DIRECT, OD_NOFLAGS)) { ret = SYSEXIT_OPEN; goto done; } ret = pthread_create(&send_th, NULL, send_thread, &sd); if (ret) { ploop_err(ret, "Can't create send thread"); ret = SYSEXIT_SYS; goto done; } ploop_log(-1, "Sending %s", send_from); trackend = e.end; for (pos = 0; pos < trackend; ) { int n; trackpos = pos + cluster; ret = ioctl_device(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos); if (ret) goto done; n = do_pread(cluster, pos); if (n == 0) /* EOF */ break; async_send(n, pos); pos += n; } /* First copy done */ iter = 1; iterpos = 0; xferred = 0; for (;;) { int err; err = ioctl(devfd, PLOOP_IOC_TRACK_READ, &e); if (err == 0) { //fprintf(stderr, "TRACK %llu-%llu\n", e.start, e.end); fflush(stdout); if (e.end > trackend) trackend = e.end; if (e.start < iterpos) iter++; iterpos = e.end; xferred += e.end - e.start; for (pos = e.start; pos < e.end; ) { int n; int copy = e.end - pos; if (copy > cluster) copy = cluster; if (pos + copy > trackpos) { trackpos = pos + copy; if (ioctl(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos)) { ploop_err(errno, "PLOOP_IOC_TRACK_SETPOS"); ret = SYSEXIT_DEVIOC; goto done; } } n = do_pread(copy, pos); if (n == 0) { ploop_err(0, "Unexpected EOF"); ret = SYSEXIT_READ; goto done; } async_send(n, pos); pos += n; } } else { if (errno == EAGAIN) /* no more dirty blocks */ break; ploop_err(errno, "PLOOP_IOC_TRACK_READ"); ret = SYSEXIT_DEVIOC; goto done; } if (iter > 10 || (iter > 1 && xferred > trackend)) break; } /* Live iterative transfers are done. Either we transferred * everything or iterations did not converge. In any case * now we must suspend VE disk activity. Now it is just * call of an external program (something sort of * "killall -9 writetest; sleep 1; umount /mnt2"), actual * implementation must be intergrated to vzctl/vzmigrate * and suspend VE with subsequent fsyncing FS. */ /* Send the sync command to receiving side. Since older ploop * might be present on the other side, we need to not break the * backward compatibility, so just send the first few (SYNC_MARK) * bytes of delta file contents. New ploop_receive() interprets * this as "sync me" command, while the old one just writes those * bytes which is useless but harmless. */ if (sd.is_pipe) { char buf[LEN_STATUS + 1] = {}; ret = do_pread(4096, 0); if (ret < SYNC_MARK) { ploop_err(errno, "Short read"); ret = SYSEXIT_READ; goto done; } TS("SEND 0 %d (sync)", SYNC_MARK); async_send(SYNC_MARK, 0); /* Now we should wait for the other side to finish syncing * before freezing the container, to optimize CT frozen time. */ if (arg->feedback_fd < 0) { /* No descriptor to receive a response back is given. * As ugly as it looks, let's just sleep for some time * hoping the other side will finish sync. */ TS("SLEEP 5"); sleep(5); goto sync_done; } /* Wait for feedback from the receiving side */ /* FIXME: use select/poll with a timeout */ if (read(arg->feedback_fd, buf, LEN_STATUS) != LEN_STATUS) { ploop_err(errno, "Can't read feedback"); ret = SYSEXIT_PROTOCOL; goto done; } if (strncmp(buf, STATUS_OK, LEN_STATUS) == 0) { goto sync_done; } else if (strncmp(buf, STATUS_FAIL, LEN_STATUS) == 0) { ploop_err(0, "Remote side reported sync failure"); ret = SYSEXIT_FSYNC; goto done; } else { ploop_err(0, "Got back feedback: %s", buf); ret = SYSEXIT_PROTOCOL; goto done; } } else { /* Writing to local file */ fdatasync(arg->ofd); } sync_done: /* Freeze the container */ TS("FLUSH"); ret = run_cmd(arg->flush_cmd); if (ret) goto done; /* Sync fs */ TS("SYNCFS"); if (sys_syncfs(mntfd)) { ploop_err(errno, "syncfs() failed"); ret = SYSEXIT_FSYNC; goto done; } /* Flush journal and freeze fs (this also clears the fs dirty bit) */ TS("FIFREEZE"); ret = ioctl_device(mntfd, FIFREEZE, 0); if (ret) goto done; fs_frozen = 1; TS("IOC_SYNC"); ret = ioctl_device(devfd, PLOOP_IOC_SYNC, 0); if (ret) goto done; iter = 1; iterpos = 0; for (;;) { int err; struct ploop_track_extent e; err = ioctl(devfd, PLOOP_IOC_TRACK_READ, &e); if (err == 0) { __u64 pos; //fprintf(stderr, "TRACK %llu-%llu\n", e.start, e.end); fflush(stdout); if (e.end > trackend) trackend = e.end; if (e.start < iterpos) iter++; iterpos = e.end; for (pos = e.start; pos < e.end; ) { int n; int copy = e.end - pos; if (copy > cluster) copy = cluster; if (pos + copy > trackpos) { trackpos = pos + copy; ret = ioctl(devfd, PLOOP_IOC_TRACK_SETPOS, &trackpos); if (ret) goto done; } TS("READ %llu %d", pos, copy); n = do_pread(copy, pos); if (n == 0) { ploop_err(0, "Unexpected EOF"); ret = SYSEXIT_READ; goto done; } TS("SEND %llu %d", pos, n); async_send(n, pos); pos += n; } } else { if (errno == EAGAIN) break; ploop_err(errno, "PLOOP_IOC_TRACK_READ"); ret = SYSEXIT_DEVIOC; goto done; } if (iter > 2) { ploop_err(0, "Too many iterations on frozen FS, aborting"); ret = SYSEXIT_LOOP; goto done; } } /* Must clear dirty flag on ploop1 image. */ if (strcmp(format, "ploop1") == 0) { int n; struct ploop_pvd_header *vh; TS("READ 0 4096"); n = do_pread(4096, 0); if (n < SECTOR_SIZE) { ploop_err(errno, "Short read"); ret = SYSEXIT_READ; goto done; } vh = iobuf[i]; vh->m_DiskInUse = 0; TS("SEND 0 %d (1st sector)", SECTOR_SIZE); async_send(SECTOR_SIZE, 0); } TS("IOCTL TRACK_STOP"); ret = ioctl(devfd, PLOOP_IOC_TRACK_STOP, 0); if (ret) goto done; tracker_on = 0; TS("SEND 0 0 (close)"); async_send(0, 0); pthread_join(send_th, NULL); send_th = 0; done: if (send_th) pthread_cancel(send_th); if (fs_frozen) (void)ioctl_device(mntfd, FITHAW, 0); if (tracker_on) (void)ioctl_device(devfd, PLOOP_IOC_TRACK_ABORT, 0); free(iobuf[0]); free(iobuf[1]); if (devfd >=0) close(devfd); if (mntfd >=0) close(mntfd); free(send_from); if (idelta.fd >= 0) close_delta(&idelta); TS("DONE"); return ret; } #undef do_pread #undef async_send /* Deprecated, please use ploop_copy_send() instead */ int ploop_send(const char *device, int ofd, const char *flush_cmd, int is_pipe) { struct ploop_copy_send_param s = { .device = device, .ofd = ofd, .flush_cmd = flush_cmd, }; return ploop_copy_send(&s); }
int ploop_copy_receiver(struct ploop_copy_receive_param *arg) { int ofd, ret; __u64 cluster = 0; void *iobuf = NULL; int n; struct pcopy_pkt_desc desc; if (!arg) return SYSEXIT_PARAM; if (is_fd_socket(arg->ifd) != 1) { ploop_err(errno, "Invalid input fd %d: must be " "a pipe or a socket", arg->ifd); return SYSEXIT_PARAM; } ofd = open(arg->file, O_WRONLY|O_CREAT|O_TRUNC, 0600); if (ofd < 0) { ploop_err(errno, "Can't open %s", arg->file); return SYSEXIT_CREAT; } ploop_dbg(3, "RCV start %s", arg->file); for (;;) { if (nread(arg->ifd, &desc, sizeof(desc)) < 0) { ploop_err(errno, "Error in nread(desc)"); ret = SYSEXIT_READ; goto out; } if (desc.marker != PCOPY_MARKER) { ploop_err(0, "Stream corrupted"); ret = SYSEXIT_PROTOCOL; goto out; } if (desc.size > cluster) { free(iobuf); iobuf = NULL; cluster = desc.size; if (p_memalign(&iobuf, 4096, cluster)) { ret = SYSEXIT_MALLOC; goto out; } } if (desc.size == 0) break; if (nread(arg->ifd, iobuf, desc.size)) { ploop_err(errno, "Error in nread data"); ret = SYSEXIT_READ; goto out; } ploop_log(3, "RCV type=%d len=%d pos=%" PRIu64, desc.type, desc.size, (uint64_t)desc.pos); ret = 0; switch (desc.type) { case PCOPY_PKT_DATA: case PCOPY_PKT_DATA_ASYNC: { n = TEMP_FAILURE_RETRY(pwrite(ofd, iobuf, desc.size, desc.pos)); if (n != desc.size) { if (n < 0) ploop_err(errno, "Error in pwrite"); else ploop_err(0, "Error: short pwrite"); ret = SYSEXIT_WRITE; goto out; } break; } case PCOPY_PKT_CMD: { unsigned int cmd = ((unsigned int *) iobuf)[0]; switch(cmd) { case PCOPY_CMD_SYNC: ret = data_sync(ofd); if (ret) goto out; break; default: ploop_err(0, "ploop_copy_receiver: unsupported command %d", cmd); ret = SYSEXIT_PARAM; } break; } default: ploop_err(0, "ploop_copy_receiver: unsupported command type%d", desc.type); ret = SYSEXIT_PARAM; break; } /* send reply */ if (desc.type != PCOPY_PKT_DATA_ASYNC && nwrite(arg->ifd, &ret, sizeof(int))) { ret = SYSEXIT_WRITE; ploop_err(errno, "failed to send reply"); goto out; } } ret = data_sync(ofd); if (ret) goto out; ploop_dbg(3, "RCV exited"); /* send final reply */ ret = 0; if (nwrite(arg->ifd, &ret, sizeof(int))) { ret = SYSEXIT_WRITE; ploop_err(errno, "failed to send reply"); goto out; } out: if (close(ofd)) { ploop_err(errno, "Error in close"); if (!ret) ret = SYSEXIT_WRITE; } if (ret) unlink(arg->file); free(iobuf); return ret; }
int ploop_copy_receive(struct ploop_copy_receive_param *arg) { int ofd, ret; __u64 cluster = 0; void *iobuf = NULL; if (!arg) return SYSEXIT_PARAM; if (is_fd_pipe(arg->ifd) != 1) { ploop_err(errno, "Invalid input fd %d: must be " "a pipe or a socket", arg->ifd); return SYSEXIT_PARAM; } if (arg->feedback_fd >= 0 && is_fd_pipe(arg->feedback_fd) != 1) { ploop_err(errno, "Invalid feedback fd %d: must be " "a pipe or a socket", arg->feedback_fd); return SYSEXIT_PARAM; } /* If feedback is to be send to stdout or stderr, * we have to disable logging to appropriate fd. * * As currently there's no way to disable just stderr, * so in this case we have to disable stdout as well. */ if (arg->feedback_fd == STDOUT_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOSTDOUT); else if (arg->feedback_fd == STDERR_FILENO) ploop_set_verbose_level(PLOOP_LOG_NOCONSOLE); ofd = open(arg->file, O_WRONLY|O_CREAT|O_EXCL, 0600); if (ofd < 0) { ploop_err(errno, "Can't open %s", arg->file); return SYSEXIT_CREAT; } /* Read data */ for (;;) { int n; struct xfer_desc desc; if (nread(arg->ifd, &desc, sizeof(desc)) < 0) { ploop_err(errno, "Error in nread(desc)"); ret = SYSEXIT_READ; goto out; } if (desc.marker != PLOOPCOPY_MARKER) { ploop_err(0, "Stream corrupted"); ret = SYSEXIT_PROTOCOL; goto out; } if (desc.size > cluster) { free(iobuf); iobuf = NULL; cluster = desc.size; if (p_memalign(&iobuf, 4096, cluster)) { ret = SYSEXIT_MALLOC; goto out; } } if (desc.size == 0) break; if (nread(arg->ifd, iobuf, desc.size)) { ploop_err(errno, "Error in nread data"); ret = SYSEXIT_READ; goto out; } if (desc.size == SYNC_MARK) { int st; /* ignore received data, instead do sync */ st = fdatasync(ofd); if (arg->feedback_fd >= 0) { /* Tell the sending side how it went */ int w; w = write(arg->feedback_fd, st ? STATUS_FAIL : STATUS_OK, LEN_STATUS); /* check write error only if no error yet */ if (!st && w != LEN_STATUS) { ploop_err(errno, "Error in write(%d)", arg->feedback_fd); ret = SYSEXIT_WRITE; goto out; } } if (st) { ploop_err(errno, "Error in fdatasync()"); ret = SYSEXIT_WRITE; goto out; } continue; } n = pwrite(ofd, iobuf, desc.size, desc.pos); if (n != desc.size) { if (n < 0) ploop_err(errno, "Error in pwrite"); else ploop_err(0, "Error: short pwrite"); ret = SYSEXIT_WRITE; goto out; } } if (fdatasync(ofd)) { ploop_err(errno, "Error in fdatasync"); ret = SYSEXIT_WRITE; goto out; } ret = 0; out: if (close(ofd)) { ploop_err(errno, "Error in close"); if (!ret) ret = SYSEXIT_WRITE; } if (ret) unlink(arg->file); free(iobuf); return ret; }