示例#1
0
文件: io.cpp 项目: yaochang/shore-mt
w_rc_t 
sthread_t::set_bufsize(size_t size, char *&buf_start /* in/out*/,
    bool 
#ifdef HAVE_HUGETLBFS
    // This argument is used only by the unit tests.
    use_normal_if_huge_fails /*=false*/
#endif
    )
{
    if (_disk_buffer && size == 0) {
        do_unmap();
        return RCOK;
    }

    if (_disk_buffer) {
        std::cerr << "Can't re-allocate disk buffer without disabling"
            << std::endl;
        return RC(fcINTERNAL);
    }

    buf_start = 0;

    long system_page_size = sysconf(_SC_PAGESIZE);

#ifdef WITHOUT_MMAP
    // If the user configured --without-mmap, then don't even 
    // bother with the mmap attempts below.
    return set_bufsize_memalign(size, buf_start, system_page_size);
#endif

#ifdef HAVE_HUGETLBFS
    // Ok, we have to have configured for hugefs AND we have to
    // have set a path for it.  If we have no path string,
    // we have chosen not to use hugetlbfs.  This is the result
    // of setting run-time options sm_hugetlbfs_path to "NULL".
    // So if we've set the path to "NULL", we will just use the 
    // "normal way".
    if(hugefs_path != NULL) {
        w_rc_t rc =  set_bufsize_huge(size, buf_start, system_page_size);
        if( !rc.is_error() ) {
#if W_DEBUG_LEVEL > 10
            cout << "Using hugetlbfs size " << size
                << " system_page_size " << system_page_size
                << " path " << hugefs_path << ". " << std::endl;
#endif
            return rc;
        }
        if(!use_normal_if_huge_fails)
        {
            return rc;
        }
        // else, try the other way
        std::cerr << "Skipping hugetlbfs sue to mmap failure: " << rc << std::endl;
    } else {
        cout << "Skipping hugetlbfs based on user option. " << std::endl;
    }
#endif
    return set_bufsize_normal(size, buf_start, system_page_size);
}
errval_t page_mappings_unmap(struct capability *pgtable, struct cte *mapping,
                             size_t slot, size_t num_pages)
{
    assert(type_is_vnode(pgtable->type));
    errval_t err;
    debug(SUBSYS_PAGING, "page_mappings_unmap(%zd pages)\n", num_pages);

    // get page table entry data
    genpaddr_t paddr;

    read_pt_entry(pgtable, slot, &paddr, NULL, NULL);
    lvaddr_t pt = local_phys_to_mem(gen_phys_to_local_phys(get_address(pgtable)));

    // get virtual address of first page
    // TODO: error checking
    genvaddr_t vaddr;
    bool tlb_flush_necessary = true;
    struct cte *leaf_pt = cte_for_cap(pgtable);
    err = compile_vaddr(leaf_pt, slot, &vaddr);
    if (err_is_fail(err)) {
        if (err_no(err) == SYS_ERR_VNODE_NOT_INSTALLED) {
            debug(SUBSYS_PAGING, "couldn't reconstruct virtual address\n");
        } else if (err_no(err) == SYS_ERR_VNODE_SLOT_INVALID
                   && leaf_pt->mapping_info.pte == 0) {
            debug(SUBSYS_PAGING, "unmapping in floating page table; not flushing TLB\n");
            tlb_flush_necessary = false;
        } else {
            return err;
        }
    }

    if (num_pages != mapping->mapping_info.pte_count) {
        // want to unmap a different amount of pages than was mapped
        return SYS_ERR_VM_MAP_SIZE;
    }

    do_unmap(pt, slot, num_pages);

    // flush TLB for unmapped pages if we got a valid virtual address
    // TODO: heuristic that decides if selective or full flush is more
    //       efficient?
    if (tlb_flush_necessary) {
        if (num_pages > 1 || err_is_fail(err)) {
            do_full_tlb_flush();
        } else {
            do_one_tlb_flush(vaddr);
        }
    }

    // update mapping info
    memset(&mapping->mapping_info, 0, sizeof(struct mapping_info));

    return SYS_ERR_OK;
}
示例#3
0
errval_t page_mappings_unmap(struct capability *pgtable, struct cte *mapping)
{
    assert(type_is_vnode(pgtable->type));
    assert(type_is_mapping(mapping->cap.type));
    struct Frame_Mapping *info = &mapping->cap.u.frame_mapping;
    errval_t err;
    debug(SUBSYS_PAGING, "page_mappings_unmap(%hu pages)\n", info->pte_count);

    // calculate page table address
    lvaddr_t pt = local_phys_to_mem(gen_phys_to_local_phys(get_address(pgtable)));

    cslot_t slot = info->entry;
    // get virtual address of first page
    genvaddr_t vaddr;
    bool tlb_flush_necessary = true;
    struct cte *leaf_pt = cte_for_cap(pgtable);
    err = compile_vaddr(leaf_pt, slot, &vaddr);
    if (err_is_fail(err)) {
        if (err_no(err) == SYS_ERR_VNODE_NOT_INSTALLED && vaddr == 0) {
            debug(SUBSYS_PAGING, "unmapping in floating page table; not flushing TLB\n");
            tlb_flush_necessary = false;
        } else if (err_no(err) == SYS_ERR_VNODE_SLOT_INVALID) {
            debug(SUBSYS_PAGING, "couldn't reconstruct virtual address\n");
        } else {
            return err;
        }
    }

    do_unmap(pt, slot, info->pte_count);

    // flush TLB for unmapped pages if we got a valid virtual address
    // TODO: heuristic that decides if selective or full flush is more
    //       efficient?
    if (tlb_flush_necessary) {
        if (info->pte_count > 1 || err_is_fail(err)) {
            do_full_tlb_flush();
        } else {
            do_one_tlb_flush(vaddr);
        }
    }

    return SYS_ERR_OK;
}
示例#4
0
文件: io.cpp 项目: yaochang/shore-mt
char  *
sthread_t::set_bufsize(size_t size)
{
    w_rc_t    e;
    char    *start;

    if(size==0) { do_unmap(); return NULL; }

    e = set_bufsize(size, start);

    if (e.is_error()) {
        std::cerr << "Hidden Failure: set_bufsize(" << size << "):"
            << std::endl << e << std::endl;
        return 0;
    }

    /* compatability on free */
    if (size == 0)
        start = 0;

    return start;
}
示例#5
0
文件: attach.c 项目: nelhage/reptyr
int steal_pty(pid_t pid, int *pty) {
    int err = 0;
    struct steal_pty_state steal = {};
    long page_size = sysconf(_SC_PAGE_SIZE);

    if ((err = preflight_check(pid)))
        goto out;

    if ((err = get_terminal_state(&steal, pid)))
        goto out;

    if ((err = setup_steal_socket(&steal)))
        goto out;

    debug("Listening on socket: %s", steal.addr_un.sun_path);
    debug("Attaching terminal emulator pid=%d", steal.emulator_pid);

    if ((err = grab_pid(steal.emulator_pid, &steal.child, &steal.child_scratch)))
        goto out;

    debug("Attached to terminal emulator (pid %d)",
          (int)steal.emulator_pid);

    if ((err = find_master_fd(&steal))) {
        error("Unable to find the fd for the pty!");
        goto out;
    }

    if ((err = setup_steal_socket_child(&steal)))
        goto out;

    if ((err = steal_child_pty(&steal)))
        goto out;

    if ((err = steal_block_hup(&steal)))
        goto out;

    if ((err = steal_cleanup_child(&steal)))
        goto out;

    goto out_no_child;

out:
    if (steal.ptyfd) {
        close(steal.ptyfd);
        steal.ptyfd = 0;
    }

    if (steal.child_fd > 0)
        do_syscall(&steal.child, close, steal.child_fd, 0, 0, 0, 0, 0);

    if (steal.child_scratch > 0)
        do_unmap(&steal.child, steal.child_scratch, page_size);

    if (steal.child.state != ptrace_detached) {
        ptrace_restore_regs(&steal.child);
        ptrace_detach_child(&steal.child);
    }

out_no_child:

    if (steal.sockfd > 0) {
        close(steal.sockfd);
        unlink(steal.addr_un.sun_path);
    }

    if (steal.tmpdir[0]) {
        rmdir(steal.tmpdir);
    }

    if (steal.ptyfd)
        *pty = steal.ptyfd;

    free(steal.master_fds.fds);

    return err;
}
示例#6
0
文件: attach.c 项目: nelhage/reptyr
int attach_child(pid_t pid, const char *pty, int force_stdio) {
    struct ptrace_child child;
    child_addr_t scratch_page = -1;
    int *child_tty_fds = NULL, n_fds, child_fd, statfd = -1;
    int i;
    int err = 0;
    long page_size = sysconf(_SC_PAGE_SIZE);
#ifdef __linux__
    char stat_path[PATH_MAX];
#endif

    if ((err = check_pgroup(pid))) {
        return err;
    }

    if ((err = preflight_check(pid))) {
        return err;
    }

    debug("Using tty: %s", pty);

    if ((err = copy_tty_state(pid, pty))) {
        if (err == ENOTTY && !force_stdio) {
            error("Target is not connected to a terminal.\n"
                  "    Use -s to force attaching anyways.");
            return err;
        }
    }

#ifdef __linux__
    snprintf(stat_path, sizeof stat_path, "/proc/%d/stat", pid);
    statfd = open(stat_path, O_RDONLY);
    if (statfd < 0) {
        error("Unable to open %s: %s", stat_path, strerror(errno));
        return -statfd;
    }
#endif

    kill(pid, SIGTSTP);
    wait_for_stop(pid, statfd);

    if ((err = grab_pid(pid, &child, &scratch_page))) {
        goto out_cont;
    }

    if (force_stdio) {
        child_tty_fds = malloc(3 * sizeof(int));
        if (!child_tty_fds) {
            err = ENOMEM;
            goto out_unmap;
        }
        n_fds = 3;
        child_tty_fds[0] = 0;
        child_tty_fds[1] = 1;
        child_tty_fds[2] = 2;
    } else {
        child_tty_fds = get_child_tty_fds(&child, statfd, &n_fds);
        if (!child_tty_fds) {
            err = child.error;
            goto out_unmap;
        }
    }

    if (ptrace_memcpy_to_child(&child, scratch_page, pty, strlen(pty) + 1)) {
        err = child.error;
        error("Unable to memcpy the pty path to child.");
        goto out_free_fds;
    }

    child_fd = do_syscall(&child, openat,
                          -1, scratch_page, O_RDWR | O_NOCTTY,
                          0, 0, 0);
    if (child_fd < 0) {
        err = child_fd;
        error("Unable to open the tty in the child.");
        goto out_free_fds;
    }

    debug("Opened the new tty in the child: %d", child_fd);

    err = ignore_hup(&child, scratch_page);
    if (err < 0)
        goto out_close;

    err = do_syscall(&child, getsid, 0, 0, 0, 0, 0, 0);
    if (err != child.pid) {
        debug("Target is not a session leader, attempting to setsid.");
        err = do_setsid(&child);
    } else {
        do_syscall(&child, ioctl, child_tty_fds[0], TIOCNOTTY, 0, 0, 0, 0);
    }
    if (err < 0)
        goto out_close;

    err = do_syscall(&child, ioctl, child_fd, TIOCSCTTY, 1, 0, 0, 0);
    if (err != 0) { /* Seems to be returning >0 for error */
        error("Unable to set controlling terminal: %s", strerror(err));
        goto out_close;
    }

    debug("Set the controlling tty");

    for (i = 0; i < n_fds; i++) {
        err = do_dup2(&child, child_fd, child_tty_fds[i]);
        if (err < 0)
            error("Problem moving child fd number %d to new tty: %s", child_tty_fds[i], strerror(errno));
    }


    err = 0;

out_close:
    do_syscall(&child, close, child_fd, 0, 0, 0, 0, 0);
out_free_fds:
    free(child_tty_fds);

out_unmap:
    do_unmap(&child, scratch_page, page_size);

    ptrace_restore_regs(&child);
    ptrace_detach_child(&child);

    if (err == 0) {
        kill(child.pid, SIGSTOP);
        wait_for_stop(child.pid, statfd);
    }
    kill(child.pid, SIGWINCH);
out_cont:
    kill(child.pid, SIGCONT);
#ifdef __linux__
    close(statfd);
#endif

    return err < 0 ? -err : err;
}
示例#7
0
文件: attach.c 项目: ag4ve/reptyr
int attach_child(pid_t pid, const char *pty, int force_stdio) {
    struct ptrace_child child;
    unsigned long scratch_page = -1;
    int *child_tty_fds = NULL, n_fds, child_fd, statfd;
    int i;
    int err = 0;
    long page_size = sysconf(_SC_PAGE_SIZE);
    char stat_path[PATH_MAX];
    long mmap_syscall;

    if ((err = copy_tty_state(pid, pty))) {
        if (err == ENOTTY && !force_stdio) {
            error("Target is not connected to a terminal.\n"
                  "    Use -s to force attaching anyways.");
            return err;
        }
    }

    snprintf(stat_path, sizeof stat_path, "/proc/%d/stat", pid);
    statfd = open(stat_path, O_RDONLY);
    if (statfd < 0) {
        error("Unable to open %s: %s", stat_path, strerror(errno));
        return -statfd;
    }

    kill(pid, SIGTSTP);
    wait_for_stop(pid, statfd);

    if (ptrace_attach_child(&child, pid)) {
        err = child.error;
        goto out_cont;
    }

    if (ptrace_advance_to_state(&child, ptrace_at_syscall)) {
        err = child.error;
        goto out_detach;
    }
    if (ptrace_save_regs(&child)) {
        err = child.error;
        goto out_detach;
    }

    mmap_syscall = ptrace_syscall_numbers(&child)->nr_mmap2;
    if (mmap_syscall == -1)
        mmap_syscall = ptrace_syscall_numbers(&child)->nr_mmap;
    scratch_page = ptrace_remote_syscall(&child, mmap_syscall, 0,
                                         page_size, PROT_READ|PROT_WRITE,
                                         MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);

    if (scratch_page > (unsigned long)-1000) {
        err = -(signed long)scratch_page;
        goto out_unmap;
    }

    debug("Allocated scratch page: %lx", scratch_page);

    if (force_stdio) {
        child_tty_fds = malloc(3 * sizeof(int));
        if (!child_tty_fds) {
            err = ENOMEM;
            goto out_unmap;
        }
        n_fds = 3;
        child_tty_fds[0] = 0;
        child_tty_fds[1] = 1;
        child_tty_fds[2] = 2;
    } else {
        child_tty_fds = get_child_tty_fds(&child, statfd, &n_fds);
        if (!child_tty_fds) {
            err = child.error;
            goto out_unmap;
        }
    }

    if (ptrace_memcpy_to_child(&child, scratch_page, pty, strlen(pty)+1)) {
        err = child.error;
        error("Unable to memcpy the pty path to child.");
        goto out_free_fds;
    }

    child_fd = do_syscall(&child, open,
                          scratch_page, O_RDWR|O_NOCTTY,
                          0, 0, 0, 0);
    if (child_fd < 0) {
        err = child_fd;
        error("Unable to open the tty in the child.");
        goto out_free_fds;
    }

    debug("Opened the new tty in the child: %d", child_fd);

    err = ignore_hup(&child, scratch_page);
    if (err < 0)
        goto out_close;

    err = do_syscall(&child, getsid, 0, 0, 0, 0, 0, 0);
    if (err != child.pid) {
        debug("Target is not a session leader, attempting to setsid.");
        err = do_setsid(&child);
    } else {
        do_syscall(&child, ioctl, child_tty_fds[0], TIOCNOTTY, 0, 0, 0, 0);
    }
    if (err < 0)
        goto out_close;

    err = do_syscall(&child, ioctl, child_fd, TIOCSCTTY, 0, 0, 0, 0);
    if (err < 0) {
        error("Unable to set controlling terminal.");
        goto out_close;
    }

    debug("Set the controlling tty");

    for (i = 0; i < n_fds; i++)
        do_syscall(&child, dup2, child_fd, child_tty_fds[i], 0, 0, 0, 0);


    err = 0;

 out_close:
    do_syscall(&child, close, child_fd, 0, 0, 0, 0, 0);
 out_free_fds:
    free(child_tty_fds);

 out_unmap:
    do_unmap(&child, scratch_page, page_size);

    ptrace_restore_regs(&child);
 out_detach:
    ptrace_detach_child(&child);

    if (err == 0) {
        kill(child.pid, SIGSTOP);
        wait_for_stop(child.pid, statfd);
    }
    kill(child.pid, SIGWINCH);
 out_cont:
    kill(child.pid, SIGCONT);
    close(statfd);

    return err < 0 ? -err : err;
}
示例#8
0
errval_t unmap_capability(struct cte *mem)
{
    errval_t err;

    TRACE_CAP_MSG("unmapping", mem);

    genvaddr_t vaddr = 0;
    bool single_page_flush = false;
    int mapping_count = 0, unmap_count = 0;
    genpaddr_t faddr = get_address(&mem->cap);

    // iterate over all mappings associated with 'mem' and unmap them
    struct cte *next = mem;
    struct cte *to_delete = NULL;

    while ((next = mdb_successor(next)) && get_address(&next->cap) == faddr) {
        TRACE_CAP_MSG("looking at", next);
        if (next->cap.type == get_mapping_type(mem->cap.type) &&
            next->cap.u.frame_mapping.cap == &mem->cap)
        {
            TRACE_CAP_MSG("cleaning up mapping", next);
            mapping_count ++;

            // do unmap
            struct Frame_Mapping *mapping = &next->cap.u.frame_mapping;
            struct cte *pgtable = mapping->ptable;
            if (!pgtable) {
                debug(SUBSYS_PAGING, "mapping->ptable == 0: just deleting mapping\n");
                // mem is not mapped, so just return
                goto delete_mapping;
            }
            if (!type_is_vnode(pgtable->cap.type)) {
                debug(SUBSYS_PAGING,
                        "mapping->ptable.type not vnode (%d): just deleting mapping\n",
                        mapping->ptable->cap.type);
                // mem is not mapped, so just return
                goto delete_mapping;
            }

            lpaddr_t ptable_lp = gen_phys_to_local_phys(get_address(&pgtable->cap));
            lvaddr_t ptable_lv = local_phys_to_mem(ptable_lp);
            cslot_t slot = mapping->entry;

            // unmap
            do_unmap(ptable_lv, slot, mapping->pte_count);

            unmap_count ++;

            // TLB flush?
            if (unmap_count == 1) {
                err = compile_vaddr(pgtable, slot, &vaddr);
                if (err_is_ok(err) && mapping->pte_count == 1) {
                    single_page_flush = true;
                }
            }

delete_mapping:
            assert(!next->delete_node.next);
            // mark mapping cap for delete: cannot do delete here as it messes
            // up mdb_successor()
            next->delete_node.next = to_delete;
            to_delete = next;
        }
    }

    // delete mapping caps
    while (to_delete) {
        next = to_delete->delete_node.next;
        err = caps_delete(to_delete);
        if (err_is_fail(err)) {
            printk(LOG_NOTE, "caps_delete: %"PRIuERRV"\n", err);
        }
        to_delete = next;
    }

    TRACE_CAP_MSGF(mem, "unmapped %d/%d instances", unmap_count, mapping_count);

    // do TLB flush
    if (single_page_flush) {
        do_one_tlb_flush(vaddr);
    } else {
        do_full_tlb_flush();
    }

    return SYS_ERR_OK;
}
示例#9
0
文件: io.cpp 项目: yaochang/shore-mt
w_rc_t sthread_t::set_bufsize_normal(
    size_t size, char *&buf_start /* in/out*/, long system_page_size)
{
    size_t requested_size = size; // save for asserts later

    // ***********************************************************
    //
    //  GET PAGE SIZES
    //
    // If the SM pagesize is larger than the largest system page size,
    // align everything on the former (safe and is less confusing).
    //
    // ***********************************************************
    long max_page_size = get_max_page_size(system_page_size);
    w_assert1(system_page_size <= max_page_size);
    long align_page_size = (SM_PAGESIZE > max_page_size)? SM_PAGESIZE : max_page_size;

    // ***********************************************************
    //
    //  GET FILE DESCRIPTOR FOR MMAP
    //
    // ***********************************************************
    int fd(-1); // must be -1 if not mapping to a file

    // ***********************************************************
    //
    //  GET FLAGS FOR MMAP
    //
    // If posix mmapped file are available, _POSIX_MAPPED_FILES is defined
    // in <unistd.h> to be > 0
    //
    // That should give you these flags:
    // MAP_FIXED, MAP_PRIVATE, MAP_NORESERVE, MAP_ANONYMOUS
    // If MAP_ANONYMOUS is not there, MAP_ANON might be.
    //
    // However... systems aren't exactly in sync here, so configure.ac
    // checks for each of these flags.
    //
    // ***********************************************************
    int flags1 = MAP_PRIVATE;
    size_t extra_align = align_page_size;
    size_t align_arg = 0;

#if HAVE_DECL_MAP_ANONYMOUS==1
    flags1  |= MAP_ANONYMOUS;
#elif HAVE_DECL_MAP_ANON==1
    flags1  |= MAP_ANON;
#else
#endif

#if HAVE_DECL_MAP_NORESERVE==1
    flags1  |= MAP_NORESERVE;
#endif

#if HAVE_DECL_MAP_ALIGN==1
    flags1 |= MAP_ALIGN;
    extra_align = 0;
    align_arg = align_page_size;
#endif
    
    // add the extra alignment to the size requested before alignment,
    // and then do our own alignment at the end In the case of
    // MAP_ALIGN this is unnecessary, and the extra alignment is zero.
    size += extra_align;
    align_bufsize(size, system_page_size, align_page_size);

    // ***********************************************************
    //
    // FIRST MMAP: get a mapped region from the kernel.
    // If we are using hugetlbfs, fd will be >= 0 and
    // we won't have to do the remap -- the first mapping will
    // give us the best page sizes we can get.  In that case,
    // skip the first mmap and do exactly one "second mmap"
    //
    // ***********************************************************

    errno = 0;
    _disk_buffer = (char*) mmap((char*)align_arg, _disk_buffer_size,
               PROT_NONE,
               flags1,
               fd,   /* fd */
               0     /* off_t */
               );

    if (_disk_buffer == MAP_FAILED) {
        std::cerr 
            << __LINE__ << " " 
            << "mmap (size=" << _disk_buffer_size 
            << " = " << int(_disk_buffer_size/1024)
            << " KB ) returns " << long(_disk_buffer)
            << " errno is " <<  errno  << " " << strerror(errno)
            << " flags " <<  flags1  
            << " fd " <<  fd  
            << std::endl;
        return RC(fcMMAPFAILED);
    }
#if W_DEBUG_LEVEL > 4
    else
    {
        std::cerr 
            << __LINE__ << " " 
            << "mmap SUCCESS! (size=" << _disk_buffer_size 
            << " = " << int(_disk_buffer_size/1024)
            << " KB ) returns " << long(_disk_buffer)
            << " errno is " <<  errno  << " " << strerror(errno)
            << " flags " <<  flags1  
            << " fd " <<  fd  
            << std::endl;
    }
#endif

    // ***********************************************************
    //
    // RE-MMAP: manually align the region and give the useful part R/W
    // permissions. 
    //
    // ***********************************************************
    _disk_buffer = (char*)alignon(_disk_buffer, align_page_size);
    alignon(requested_size, system_page_size);
    if (mprotect(_disk_buffer, requested_size, PROT_READ|PROT_WRITE)) {
        std::cerr 
            << __LINE__ << " " 
            << "mprotect (addr=" << long(_disk_buffer)
            << ", size=" << requested_size << ") returns -1;"
            << " errno is " <<  errno  << " " << strerror(errno)
            << std::endl;
        do_unmap();
        return RC(fcMMAPFAILED);
    }
    
#ifdef HAVE_MEMCNTL
    struct memcntl_mha info;
    info.mha_cmd = MHA_MAPSIZE_VA;
    info.mha_flags = 0;
    info.mha_pagesize = max_page_size;
    // Ask the kernel to use the max page size here
    if(memcntl(_disk_buffer, requested_size, MC_HAT_ADVISE, (char *)&info, 0, 0) < 0)
       
        {
            std::cerr << "memcntl returns -1;"
                 << " errno is " <<  errno  << " " << strerror(errno)
                 << " requested size " <<  max_page_size  << std::endl;
        }
#endif

    align_for_sm(requested_size);
    buf_start = _disk_buffer;
    clear(buf_start, requested_size);
    return RCOK;
}