Beispiel #1
0
static void balloon_deflate_page(VirtIOBalloon *balloon,
                                 MemoryRegion *mr, hwaddr offset)
{
    void *addr = memory_region_get_ram_ptr(mr) + offset;
    RAMBlock *rb;
    size_t rb_page_size;
    ram_addr_t ram_offset, host_page_base;
    void *host_addr;
    int ret;

    /* XXX is there a better way to get to the RAMBlock than via a
     * host address? */
    rb = qemu_ram_block_from_host(addr, false, &ram_offset);
    rb_page_size = qemu_ram_pagesize(rb);
    host_page_base = ram_offset & ~(rb_page_size - 1);

    if (balloon->pbp
        && rb == balloon->pbp->rb
        && host_page_base == balloon->pbp->base) {
        int subpages = rb_page_size / BALLOON_PAGE_SIZE;

        /*
         * This means the guest has asked to discard some of the 4kiB
         * subpages of a host page, but then changed its mind and
         * asked to keep them after all.  It's exceedingly unlikely
         * for a guest to do this in practice, but handle it anyway,
         * since getting it wrong could mean discarding memory the
         * guest is still using. */
        bitmap_clear(balloon->pbp->bitmap,
                     (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE,
                     subpages);

        if (bitmap_empty(balloon->pbp->bitmap, subpages)) {
            g_free(balloon->pbp);
            balloon->pbp = NULL;
        }
    }

    host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));

    /* When a page is deflated, we hint the whole host page it lives
     * on, since we can't do anything smaller */
    ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
    if (ret != 0) {
        warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
                    strerror(errno));
        /* Otherwise ignore, failing to page hint shouldn't be fatal */
    }
}
Beispiel #2
0
/*
 * Handle faults detected by the USERFAULT markings
 */
static void *postcopy_ram_fault_thread(void *opaque)
{
    MigrationIncomingState *mis = opaque;
    struct uffd_msg msg;
    int ret;
    size_t index;
    RAMBlock *rb = NULL;

    trace_postcopy_ram_fault_thread_entry();
    mis->last_rb = NULL; /* last RAMBlock we sent part of */
    qemu_sem_post(&mis->fault_thread_sem);

    struct pollfd *pfd;
    size_t pfd_len = 2 + mis->postcopy_remote_fds->len;

    pfd = g_new0(struct pollfd, pfd_len);

    pfd[0].fd = mis->userfault_fd;
    pfd[0].events = POLLIN;
    pfd[1].fd = mis->userfault_event_fd;
    pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
    trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
    for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
        struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
                                                 struct PostCopyFD, index);
        pfd[2 + index].fd = pcfd->fd;
        pfd[2 + index].events = POLLIN;
        trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
                                                  pcfd->fd);
    }

    while (true) {
        ram_addr_t rb_offset;
        int poll_result;

        /*
         * We're mainly waiting for the kernel to give us a faulting HVA,
         * however we can be told to quit via userfault_quit_fd which is
         * an eventfd
         */

        poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
        if (poll_result == -1) {
            error_report("%s: userfault poll: %s", __func__, strerror(errno));
            break;
        }

        if (pfd[1].revents) {
            uint64_t tmp64 = 0;

            /* Consume the signal */
            if (read(mis->userfault_event_fd, &tmp64, 8) != 8) {
                /* Nothing obviously nicer than posting this error. */
                error_report("%s: read() failed", __func__);
            }

            if (atomic_read(&mis->fault_thread_quit)) {
                trace_postcopy_ram_fault_thread_quit();
                break;
            }
        }

        if (pfd[0].revents) {
            poll_result--;
            ret = read(mis->userfault_fd, &msg, sizeof(msg));
            if (ret != sizeof(msg)) {
                if (errno == EAGAIN) {
                    /*
                     * if a wake up happens on the other thread just after
                     * the poll, there is nothing to read.
                     */
                    continue;
                }
                if (ret < 0) {
                    error_report("%s: Failed to read full userfault "
                                 "message: %s",
                                 __func__, strerror(errno));
                    break;
                } else {
                    error_report("%s: Read %d bytes from userfaultfd "
                                 "expected %zd",
                                 __func__, ret, sizeof(msg));
                    break; /* Lost alignment, don't know what we'd read next */
                }
            }
            if (msg.event != UFFD_EVENT_PAGEFAULT) {
                error_report("%s: Read unexpected event %ud from userfaultfd",
                             __func__, msg.event);
                continue; /* It's not a page fault, shouldn't happen */
            }

            rb = qemu_ram_block_from_host(
                     (void *)(uintptr_t)msg.arg.pagefault.address,
                     true, &rb_offset);
            if (!rb) {
                error_report("postcopy_ram_fault_thread: Fault outside guest: %"
                             PRIx64, (uint64_t)msg.arg.pagefault.address);
                break;
            }

            rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
            trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                qemu_ram_get_idstr(rb),
                                                rb_offset,
                                                msg.arg.pagefault.feat.ptid);
            mark_postcopy_blocktime_begin(
                    (uintptr_t)(msg.arg.pagefault.address),
                                msg.arg.pagefault.feat.ptid, rb);

            /*
             * Send the request to the source - we want to request one
             * of our host page sizes (which is >= TPS)
             */
            if (rb != mis->last_rb) {
                mis->last_rb = rb;
                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
                                         rb_offset, qemu_ram_pagesize(rb));
            } else {
                /* Save some space */
                migrate_send_rp_req_pages(mis, NULL,
                                         rb_offset, qemu_ram_pagesize(rb));
            }
        }

        /* Now handle any requests from external processes on shared memory */
        /* TODO: May need to handle devices deregistering during postcopy */
        for (index = 2; index < pfd_len && poll_result; index++) {
            if (pfd[index].revents) {
                struct PostCopyFD *pcfd =
                    &g_array_index(mis->postcopy_remote_fds,
                                   struct PostCopyFD, index - 2);

                poll_result--;
                if (pfd[index].revents & POLLERR) {
                    error_report("%s: POLLERR on poll %zd fd=%d",
                                 __func__, index, pcfd->fd);
                    pfd[index].events = 0;
                    continue;
                }

                ret = read(pcfd->fd, &msg, sizeof(msg));
                if (ret != sizeof(msg)) {
                    if (errno == EAGAIN) {
                        /*
                         * if a wake up happens on the other thread just after
                         * the poll, there is nothing to read.
                         */
                        continue;
                    }
                    if (ret < 0) {
                        error_report("%s: Failed to read full userfault "
                                     "message: %s (shared) revents=%d",
                                     __func__, strerror(errno),
                                     pfd[index].revents);
                        /*TODO: Could just disable this sharer */
                        break;
                    } else {
                        error_report("%s: Read %d bytes from userfaultfd "
                                     "expected %zd (shared)",
                                     __func__, ret, sizeof(msg));
                        /*TODO: Could just disable this sharer */
                        break; /*Lost alignment,don't know what we'd read next*/
                    }
                }
                if (msg.event != UFFD_EVENT_PAGEFAULT) {
                    error_report("%s: Read unexpected event %ud "
                                 "from userfaultfd (shared)",
                                 __func__, msg.event);
                    continue; /* It's not a page fault, shouldn't happen */
                }
                /* Call the device handler registered with us */
                ret = pcfd->handler(pcfd, &msg);
                if (ret) {
                    error_report("%s: Failed to resolve shared fault on %zd/%s",
                                 __func__, index, pcfd->idstr);
                    /* TODO: Fail? Disable this sharer? */
                }
            }
        }
    }
    trace_postcopy_ram_fault_thread_exit();
    g_free(pfd);
    return NULL;
}
Beispiel #3
0
static void balloon_inflate_page(VirtIOBalloon *balloon,
                                 MemoryRegion *mr, hwaddr offset)
{
    void *addr = memory_region_get_ram_ptr(mr) + offset;
    RAMBlock *rb;
    size_t rb_page_size;
    int subpages;
    ram_addr_t ram_offset, host_page_base;

    /* XXX is there a better way to get to the RAMBlock than via a
     * host address? */
    rb = qemu_ram_block_from_host(addr, false, &ram_offset);
    rb_page_size = qemu_ram_pagesize(rb);
    host_page_base = ram_offset & ~(rb_page_size - 1);

    if (rb_page_size == BALLOON_PAGE_SIZE) {
        /* Easy case */

        ram_block_discard_range(rb, ram_offset, rb_page_size);
        /* We ignore errors from ram_block_discard_range(), because it
         * has already reported them, and failing to discard a balloon
         * page is not fatal */
        return;
    }

    /* Hard case
     *
     * We've put a piece of a larger host page into the balloon - we
     * need to keep track until we have a whole host page to
     * discard
     */
    warn_report_once(
"Balloon used with backing page size > 4kiB, this may not be reliable");

    subpages = rb_page_size / BALLOON_PAGE_SIZE;

    if (balloon->pbp
        && (rb != balloon->pbp->rb
            || host_page_base != balloon->pbp->base)) {
        /* We've partially ballooned part of a host page, but now
         * we're trying to balloon part of a different one.  Too hard,
         * give up on the old partial page */
        g_free(balloon->pbp);
        balloon->pbp = NULL;
    }

    if (!balloon->pbp) {
        /* Starting on a new host page */
        size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long);
        balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen);
        balloon->pbp->rb = rb;
        balloon->pbp->base = host_page_base;
    }

    bitmap_set(balloon->pbp->bitmap,
               (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE,
               subpages);

    if (bitmap_full(balloon->pbp->bitmap, subpages)) {
        /* We've accumulated a full host page, we can actually discard
         * it now */

        ram_block_discard_range(rb, balloon->pbp->base, rb_page_size);
        /* We ignore errors from ram_block_discard_range(), because it
         * has already reported them, and failing to discard a balloon
         * page is not fatal */

        g_free(balloon->pbp);
        balloon->pbp = NULL;
    }
}