/* Dynamically load code from a file. One day NaCl might provide a syscall that provides this functionality without needing to make a copy of the code. offset and size do not need to be page-aligned. */ int nacl_dyncode_map (int fd, void *dest, size_t offset, size_t size) { size_t alignment_padding = offset & (getpagesize() - 1); uint8_t *mapping; if (alignment_padding == 0 && (size & (getpagesize() - 1)) == 0) { /* First try mmap using PROT_EXEC directly. */ mapping = __mmap(dest, size, PROT_READ | PROT_EXEC, MAP_PRIVATE | MAP_FIXED, fd, offset); if (mapping == dest) { return 0; } else if (mapping != MAP_FAILED) { /* Mapped to an unexpected location. Unmap and fall back. */ __munmap(mapping, size); } } mapping = __mmap (NULL, size + alignment_padding, PROT_READ, MAP_PRIVATE, fd, offset - alignment_padding); if (mapping == MAP_FAILED) return -1; int result = __nacl_dyncode_create (dest, mapping + alignment_padding, size); /* Tell Valgrind about this mapping. */ __nacl_dyncode_map_for_valgrind (dest, size, offset, mapping); int munmap_result = __munmap (mapping, size); if (result != 0 || munmap_result != 0) return -1; return 0; }
void * mmap(void *addr, size_t length, int prot, int nflags, int fd, off_t offset) { int align = (nflags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT; long amask = (1L << align) - 1L; long off; void *mem; int flags = (nflags & MAP_SHARED ? LINUX_MAP_SHARED : 0) | (nflags & MAP_PRIVATE ? LINUX_MAP_PRIVATE : 0) | (nflags & MAP_FIXED ? LINUX_MAP_FIXED : 0) | (nflags & MAP_ANON ? LINUX_MAP_ANON : 0) | (nflags & MAP_STACK ? LINUX_MAP_STACK : 0); #ifdef HUGEPAGESIZE if (usehuge && length >= HUGEPAGESIZE && (align == 0 || (1L << align) <= HUGEPAGESIZE)) { mem = __mmap(addr, length, prot, flags | LINUX_MAP_HUGE(HUGEPAGESIZE), fd, offset); if (mem != MAP_FAILED) return mem; usehuge = 0; } #endif if (align == 0 || (1L << align) <= __pagesize) { return __mmap(addr, length, prot, flags, fd, offset); } /* do not support aligned file mappings */ if (fd != -1 || offset != 0) { errno = EINVAL; return MAP_FAILED; } mem = __mmap(addr, length * 2, prot, flags, -1, 0); if (mem == MAP_FAILED) { errno = ENOMEM; return MAP_FAILED; } off = (long) mem & amask; /* XXX we could just top and tail the allocations always */ if (off == 0) { /* we were lucky, just unmap the excess at end */ if (munmap((char *)mem + length, length) == -1) { errno = ENOMEM; return MAP_FAILED; } return mem; } if (munmap(mem, length * 2) == -1) { errno = ENOMEM; return MAP_FAILED; } return __mmap((char *)mem + length - off, length, prot, flags | MAP_FIXED, -1, 0); }
int _ioperm (unsigned long int from, unsigned long int num, int turn_on) { if (! io.initdone && init_iosys () < 0) return -1; /* this test isn't as silly as it may look like; consider overflows! */ if (from >= MAX_PORT || from + num > MAX_PORT) { __set_errno (EINVAL); return -1; } if (turn_on) { if (! io.base) { int fd; fd = __open ("/dev/mem", O_RDWR); if (fd < 0) return -1; io.base = (unsigned long int) __mmap (0, MAX_PORT << io.shift, PROT_READ | PROT_WRITE, MAP_SHARED, fd, io.io_base); __close (fd); if ((long) io.base == -1) return -1; } } return 0; }
internal_function _dl_sysdep_read_whole_file (const char *file, size_t *sizep, int prot) { void *result = MAP_FAILED; struct stat64 st; int flags = O_RDONLY; #ifdef O_CLOEXEC flags |= O_CLOEXEC; #endif int fd = __open (file, flags); if (fd >= 0) { if (__fxstat64 (_STAT_VER, fd, &st) >= 0) { *sizep = st.st_size; /* No need to map the file if it is empty. */ if (*sizep != 0) /* Map a copy of the file contents. */ result = __mmap (NULL, *sizep, prot, #ifdef MAP_COPY MAP_COPY #else MAP_PRIVATE #endif #ifdef MAP_FILE | MAP_FILE #endif , fd, 0); } __close (fd); } return result; }
static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { OPAL_PATCHER_BEGIN; void *result = 0; if (prot == PROT_NONE) { opal_mem_hooks_release_hook (start, length, true); } if (!original_mmap) { #if OPAL_MEMORY_PATCHER_HAVE___MMAP /* the darwin syscall returns an int not a long so call the underlying __mmap function */ result = __mmap (start, length, prot, flags, fd, offset); #else result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); #endif // I thought we had some issue in the past with the above line for IA32, // like maybe syscall() wouldn't handle that many arguments. But just now // I used gcc -m32 and it worked on a recent system. But there's a possibility // that older ia32 systems may need some other code to make the above syscall. } else { result = original_mmap (start, length, prot, flags, fd, offset); } OPAL_PATCHER_END; return result; }
void __init_tls(size_t *aux) { unsigned char *p, *mem; size_t n; Phdr *phdr, *tls_phdr=0; size_t base = 0; libc.tls_size = sizeof(struct pthread); for (p=(void *)aux[AT_PHDR],n=aux[AT_PHNUM]; n; n--,p+=aux[AT_PHENT]) { phdr = (void *)p; if (phdr->p_type == PT_PHDR) base = aux[AT_PHDR] - phdr->p_vaddr; if (phdr->p_type == PT_TLS) tls_phdr = phdr; } if (!tls_phdr) return; image = (void *)(base + tls_phdr->p_vaddr); len = tls_phdr->p_filesz; size = tls_phdr->p_memsz; align = tls_phdr->p_align; size += (-size - (uintptr_t)image) & (align-1); if (align < 4*sizeof(size_t)) align = 4*sizeof(size_t); libc.tls_size = 2*sizeof(void *)+size+align+sizeof(struct pthread); mem = __mmap(0, libc.tls_size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (!__install_initial_tls(__copy_tls(mem))) a_crash(); }
void *nacl_dyncode_alloc_fixed (void *dest, size_t code_size, size_t data_size, size_t data_offset) { /* TODO(eaeltsin): probably these alignment requirements are overly strict. If really so, support unaligned case. */ assert (dest == round_up_to_pagesize (dest)); assert (data_offset == round_up_to_pagesize (data_offset)); nacl_dyncode_alloc_init (); if (nacl_next_code > dest) { return NULL; } nacl_next_code = dest; code_size = round_up_to_pagesize (code_size); data_size = round_up_to_pagesize (data_size); if (data_size != 0) { size_t last_offset = nacl_next_data - nacl_next_code; if (data_offset > last_offset) { /* Leaves unused space in the data area. */ nacl_next_data += data_offset - last_offset; } else if (data_offset < last_offset) { /* Cannot move code. */ return NULL; } assert (nacl_next_code + data_offset == nacl_next_data); /* Check whether the data space is available and reserve it. MAP_FIXED cannot be used because it overwrites existing mappings. Instead, fail if returned value is different from address hint. */ void *mapped = __mmap (nacl_next_data, data_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mapped == MAP_FAILED) { return NULL; } if (mapped != nacl_next_data) { __munmap (nacl_next_data, data_size); return NULL; } } nacl_next_data += data_size; nacl_next_code += code_size; return dest; }
const char unsigned* __map_file(const char* pathname, size_t* size) { struct stat st; const unsigned char* map = MAP_FAILED; int fd = __sys_open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); if (fd < 0) return 0; if (!__syscall(SYS_fstat, fd, &st)) { map = __mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0); *size = st.st_size; } __syscall(SYS_close, fd); return map == MAP_FAILED ? 0 : map; }
__ptr_t __mmap64 (__ptr_t addr, size_t len, int prot, int flags, int fd, __off64_t offset) { off_t small_offset = (off_t) offset; if (small_offset != offset) { /* We cannot do this since the offset is too large. */ __set_errno (EOVERFLOW); return MAP_FAILED; } return __mmap (addr, len, prot, flags, fd, small_offset); }
void __assert_fail_base (const char *fmt, const char *assertion, const char *file, unsigned int line, const char *function) { char *str; #ifdef FATAL_PREPARE FATAL_PREPARE; #endif int total; if (__asprintf (&str, fmt, __progname, __progname[0] ? ": " : "", file, line, function ? function : "", function ? ": " : "", assertion, &total) >= 0) { /* Print the message. */ (void) __fxprintf (NULL, "%s", str); (void) fflush (stderr); total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); if (__builtin_expect (buf != MAP_FAILED, 1)) { buf->size = total; strcpy (buf->msg, str); /* We have to free the old buffer since the application might catch the SIGABRT signal. */ struct abort_msg_s *old = atomic_exchange_acq (&__abort_msg, buf); if (old != NULL) __munmap (old, old->size); } free (str); } else { /* At least print a minimal message. */ static const char errstr[] = "Unexpected error.\n"; __libc_write (STDERR_FILENO, errstr, sizeof (errstr) - 1); } abort (); }
/* Allocate space for code and data simultaneously. This is a simple allocator that doesn't know how to deallocate. */ void *nacl_dyncode_alloc (size_t code_size, size_t data_size, size_t data_offset) { assert (data_offset == round_up_to_pagesize (data_offset)); nacl_dyncode_alloc_init (); code_size = round_up_to_pagesize (code_size); data_size = round_up_to_pagesize (data_size); if (data_size != 0) { size_t last_offset = nacl_next_data - nacl_next_code; if (data_offset > last_offset) { /* Leaves unused space in the data area. */ nacl_next_data += data_offset - last_offset; } else if (data_offset < last_offset) { /* Leaves unused space in the code area. */ nacl_next_code += last_offset - data_offset; } assert (nacl_next_code + data_offset == nacl_next_data); /* Check whether the data space is available and reserve it. MAP_FIXED cannot be used because it overwrites existing mappings. Instead, fail if returned value is different from address hint. TODO(mseaborn): Retry on failure or avoid failure by reserving a big chunk of address space at startup. */ void *mapped = __mmap (nacl_next_data, data_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mapped == MAP_FAILED) { return NULL; } if (mapped != nacl_next_data) { __munmap (nacl_next_data, data_size); return NULL; } } void *code_addr = nacl_next_code; nacl_next_data += data_size; nacl_next_code += code_size; return code_addr; }
/* * mmap stub, with preemptory failures due to extra parameter checking * mandated for conformance. * * This is for UNIX03 only. */ void * mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) { /* * Preemptory failures: * * o off is not a multiple of the page size * o flags does not contain either MAP_PRIVATE or MAP_SHARED * o len is zero */ extern void cthread_set_errno_self(int); if ((off & PAGE_MASK) || (((flags & MAP_PRIVATE) != MAP_PRIVATE) && ((flags & MAP_SHARED) != MAP_SHARED)) || (len == 0)) { cthread_set_errno_self(EINVAL); return(MAP_FAILED); } return(__mmap(addr, len, prot, flags, fildes, off)); }
int __open_catalog (const char *cat_name, const char *nlspath, const char *env_var, __nl_catd catalog) { int fd = -1; struct stat64 st; int swapping; size_t cnt; size_t max_offset; size_t tab_size; const char *lastp; int result = -1; char *buf = NULL; if (strchr (cat_name, '/') != NULL || nlspath == NULL) fd = open_not_cancel_2 (cat_name, O_RDONLY); else { const char *run_nlspath = nlspath; #define ENOUGH(n) \ if (__glibc_unlikely (bufact + (n) >= bufmax)) \ { \ char *old_buf = buf; \ bufmax += (bufmax < 256 + (n)) ? 256 + (n) : bufmax; \ buf = realloc (buf, bufmax); \ if (__glibc_unlikely (buf == NULL)) \ { \ free (old_buf); \ return -1; \ } \ } /* The RUN_NLSPATH variable contains a colon separated list of descriptions where we expect to find catalogs. We have to recognize certain % substitutions and stop when we found the first existing file. */ size_t bufact; size_t bufmax = 0; size_t len; fd = -1; while (*run_nlspath != '\0') { bufact = 0; if (*run_nlspath == ':') { /* Leading colon or adjacent colons - treat same as %N. */ len = strlen (cat_name); ENOUGH (len); memcpy (&buf[bufact], cat_name, len); bufact += len; } else while (*run_nlspath != ':' && *run_nlspath != '\0') if (*run_nlspath == '%') { const char *tmp; ++run_nlspath; /* We have seen the `%'. */ switch (*run_nlspath++) { case 'N': /* Use the catalog name. */ len = strlen (cat_name); ENOUGH (len); memcpy (&buf[bufact], cat_name, len); bufact += len; break; case 'L': /* Use the current locale category value. */ len = strlen (env_var); ENOUGH (len); memcpy (&buf[bufact], env_var, len); bufact += len; break; case 'l': /* Use language element of locale category value. */ tmp = env_var; do { ENOUGH (1); buf[bufact++] = *tmp++; } while (*tmp != '\0' && *tmp != '_' && *tmp != '.'); break; case 't': /* Use territory element of locale category value. */ tmp = env_var; do ++tmp; while (*tmp != '\0' && *tmp != '_' && *tmp != '.'); if (*tmp == '_') { ++tmp; do { ENOUGH (1); buf[bufact++] = *tmp++; } while (*tmp != '\0' && *tmp != '.'); } break; case 'c': /* Use code set element of locale category value. */ tmp = env_var; do ++tmp; while (*tmp != '\0' && *tmp != '.'); if (*tmp == '.') { ++tmp; do { ENOUGH (1); buf[bufact++] = *tmp++; } while (*tmp != '\0'); } break; case '%': ENOUGH (1); buf[bufact++] = '%'; break; default: /* Unknown variable: ignore this path element. */ bufact = 0; while (*run_nlspath != '\0' && *run_nlspath != ':') ++run_nlspath; break; } } else { ENOUGH (1); buf[bufact++] = *run_nlspath++; } ENOUGH (1); buf[bufact] = '\0'; if (bufact != 0) { fd = open_not_cancel_2 (buf, O_RDONLY); if (fd >= 0) break; } ++run_nlspath; } } /* Avoid dealing with directories and block devices */ if (__builtin_expect (fd, 0) < 0) { free (buf); return -1; } if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0) goto close_unlock_return; if (__builtin_expect (!S_ISREG (st.st_mode), 0) || (size_t) st.st_size < sizeof (struct catalog_obj)) { /* `errno' is not set correctly but the file is not usable. Use an reasonable error value. */ __set_errno (EINVAL); goto close_unlock_return; } catalog->file_size = st.st_size; #ifdef _POSIX_MAPPED_FILES # ifndef MAP_COPY /* Linux seems to lack read-only copy-on-write. */ # define MAP_COPY MAP_PRIVATE # endif # ifndef MAP_FILE /* Some systems do not have this flag; it is superfluous. */ # define MAP_FILE 0 # endif catalog->file_ptr = (struct catalog_obj *) __mmap (NULL, st.st_size, PROT_READ, MAP_FILE|MAP_COPY, fd, 0); if (__builtin_expect (catalog->file_ptr != (struct catalog_obj *) MAP_FAILED, 1)) /* Tell the world we managed to mmap the file. */ catalog->status = mmapped; else #endif /* _POSIX_MAPPED_FILES */ { /* mmap failed perhaps because the system call is not implemented. Try to load the file. */ size_t todo; catalog->file_ptr = malloc (st.st_size); if (catalog->file_ptr == NULL) goto close_unlock_return; todo = st.st_size; /* Save read, handle partial reads. */ do { size_t now = read_not_cancel (fd, (((char *) catalog->file_ptr) + (st.st_size - todo)), todo); if (now == 0 || now == (size_t) -1) { #ifdef EINTR if (now == (size_t) -1 && errno == EINTR) continue; #endif free ((void *) catalog->file_ptr); goto close_unlock_return; } todo -= now; } while (todo > 0); catalog->status = malloced; } /* Determine whether the file is a catalog file and if yes whether it is written using the correct byte order. Else we have to swap the values. */ if (__glibc_likely (catalog->file_ptr->magic == CATGETS_MAGIC)) swapping = 0; else if (catalog->file_ptr->magic == SWAPU32 (CATGETS_MAGIC)) swapping = 1; else { invalid_file: /* Invalid file. Free the resources and mark catalog as not usable. */ #ifdef _POSIX_MAPPED_FILES if (catalog->status == mmapped) __munmap ((void *) catalog->file_ptr, catalog->file_size); else #endif /* _POSIX_MAPPED_FILES */ free (catalog->file_ptr); goto close_unlock_return; } #define SWAP(x) (swapping ? SWAPU32 (x) : (x)) /* Get dimensions of the used hashing table. */ catalog->plane_size = SWAP (catalog->file_ptr->plane_size); catalog->plane_depth = SWAP (catalog->file_ptr->plane_depth); /* The file contains two versions of the pointer tables. Pick the right one for the local byte order. */ #if __BYTE_ORDER == __LITTLE_ENDIAN catalog->name_ptr = &catalog->file_ptr->name_ptr[0]; #elif __BYTE_ORDER == __BIG_ENDIAN catalog->name_ptr = &catalog->file_ptr->name_ptr[catalog->plane_size * catalog->plane_depth * 3]; #else # error Cannot handle __BYTE_ORDER byte order #endif /* The rest of the file contains all the strings. They are addressed relative to the position of the first string. */ catalog->strings = (const char *) &catalog->file_ptr->name_ptr[catalog->plane_size * catalog->plane_depth * 3 * 2]; /* Determine the largest string offset mentioned in the table. */ max_offset = 0; tab_size = 3 * catalog->plane_size * catalog->plane_depth; for (cnt = 2; cnt < tab_size; cnt += 3) if (catalog->name_ptr[cnt] > max_offset) max_offset = catalog->name_ptr[cnt]; /* Now we can check whether the file is large enough to contain the tables it says it contains. */ if ((size_t) st.st_size <= (sizeof (struct catalog_obj) + 2 * tab_size + max_offset)) /* The last string is not contained in the file. */ goto invalid_file; lastp = catalog->strings + max_offset; max_offset = (st.st_size - sizeof (struct catalog_obj) + 2 * tab_size + max_offset); while (*lastp != '\0') { if (--max_offset == 0) goto invalid_file; ++lastp; } /* We succeeded. */ result = 0; /* Release the lock again. */ close_unlock_return: close_not_cancel_no_status (fd); free (buf); return result; }
/* load an EXOS_MAGIC binary */ int __do_simple_load (int fd, struct Env *e) { // struct Uenv cu; u_int start_text_addr, start_text_pg; struct exec hdr; u_int text_size, data_size, bss_size, overlap_size; u_int envid = e->env_id; /* read a.out headers */ if (lseek(fd, 0, SEEK_SET) == -1 || read(fd, &hdr, sizeof(hdr)) != sizeof(hdr) || lseek(fd, sizeof(hdr) + hdr.a_text, SEEK_SET) == -1 || read(fd, &start_text_addr, sizeof(start_text_addr)) != sizeof(start_text_addr)) { errornf("Invalid executable format.\n"); } start_text_pg = PGROUNDDOWN(start_text_addr); text_size = hdr.a_text + sizeof(hdr); data_size = hdr.a_data; if (text_size % NBPG) { data_size += text_size % NBPG; text_size = PGROUNDDOWN(text_size); } bss_size = hdr.a_bss; if (!(data_size % NBPG)) overlap_size = 0; else { /* read in the page that contains both bss and inited data */ u_int temp_page; temp_page = (u_int)__malloc(NBPG); overlap_size = NBPG; if (temp_page == 0 || lseek(fd, text_size + PGROUNDDOWN(data_size), SEEK_SET) == -1 || read(fd, (void*)temp_page, data_size % NBPG) != data_size % NBPG || _exos_insert_pte (0, vpt[PGNO(temp_page)], start_text_pg + text_size + PGROUNDDOWN(data_size), 0, envid, 0, NULL) != 0) { _exos_self_unmap_page(0, temp_page); error("Error mmaping text segment\n"); } bzero((void*)temp_page + (data_size % NBPG), NBPG - (data_size % NBPG)); _exos_self_unmap_page(0, temp_page); __free((void*)temp_page); bss_size -= NBPG - (data_size % NBPG); bss_size = PGROUNDUP(bss_size); data_size = PGROUNDDOWN(data_size); } /* mmap the text segment readonly */ if ((u_int)__mmap((void*)start_text_pg, text_size, PROT_READ | PROT_EXEC, MAP_FILE | MAP_FIXED | MAP_COPY, fd, (off_t)0, 0, envid) != start_text_pg) { errornf("Error mmaping text segment\n"); } /* mmap the data segment read/write */ if ((u_int)__mmap((void*)(start_text_pg + text_size), data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FILE | MAP_FIXED | MAP_COPY, fd, text_size, (off_t)0, envid) != start_text_pg + text_size) { errornf("Error mmaping data segment\n"); } #if 0 /* we set up a stack page later on when setting up arguments */ /* allocate a stack page */ if (_exos_insert_pte (0, PG_U|PG_W|PG_P, USTACKTOP-NBPG, 0, envid, 0, NULL) < 0) { errornf("could not allocate stack\n"); } #endif /* set the entry point */ assert(e->env_id == envid); e->env_tf.tf_eip = start_text_addr; return 1; }
int internal_function __gconv_load_cache (void) { #if 0 int fd; struct stat64 st; struct gconvcache_header *header; #endif /* We cannot use the cache if the GCONV_PATH environment variable is set. */ // __gconv_path_envvar = getenv ("GCONV_PATH"); // if (__gconv_path_envvar != NULL) return -1; #if 0 /* See whether the cache file exists. */ fd = __open (GCONV_MODULES_CACHE, O_RDONLY); if (__builtin_expect (fd, 0) == -1) /* Not available. */ return -1; /* Get information about the file. */ if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0 /* We do not have to start looking at the file if it cannot contain at least the cache header. */ || (size_t) st.st_size < sizeof (struct gconvcache_header)) { close_and_exit: __close (fd); return -1; } /* Make the file content available. */ cache_size = st.st_size; #ifdef _POSIX_MAPPED_FILES gconv_cache = __mmap (NULL, cache_size, PROT_READ, MAP_SHARED, fd, 0); if (__builtin_expect (gconv_cache == MAP_FAILED, 0)) #endif { size_t already_read; gconv_cache = malloc (cache_size); if (gconv_cache == NULL) goto close_and_exit; already_read = 0; do { ssize_t n = __read (fd, (char *) gconv_cache + already_read, cache_size - already_read); if (__builtin_expect (n, 0) == -1) { free (gconv_cache); gconv_cache = NULL; goto close_and_exit; } already_read += n; } while (already_read < cache_size); cache_malloced = 1; } /* We don't need the file descriptor anymore. */ __close (fd); /* Check the consistency. */ header = (struct gconvcache_header *) gconv_cache; if (__builtin_expect (header->magic, GCONVCACHE_MAGIC) != GCONVCACHE_MAGIC || __builtin_expect (header->string_offset >= cache_size, 0) || __builtin_expect (header->hash_offset >= cache_size, 0) || __builtin_expect (header->hash_size == 0, 0) || __builtin_expect ((header->hash_offset + header->hash_size * sizeof (struct hash_entry)) > cache_size, 0) || __builtin_expect (header->module_offset >= cache_size, 0) || __builtin_expect (header->otherconv_offset > cache_size, 0)) { if (cache_malloced) { free (gconv_cache); cache_malloced = 0; } #ifdef _POSIX_MAPPED_FILES else __munmap (gconv_cache, cache_size); #endif gconv_cache = NULL; return -1; } /* That worked. */ return 0; #endif }
/* Try to get a file descriptor for the shared meory segment containing the database. */ static struct mapped_database * get_mapping (request_type type, const char *key, struct mapped_database **mappedp) { struct mapped_database *result = NO_MAPPING; #ifdef SCM_RIGHTS const size_t keylen = strlen (key) + 1; char resdata[keylen]; int saved_errno = errno; int mapfd = -1; /* Send the request. */ struct iovec iov[2]; request_header req; int sock = open_socket (); if (sock < 0) goto out; req.version = NSCD_VERSION; req.type = type; req.key_len = keylen; iov[0].iov_base = &req; iov[0].iov_len = sizeof (req); iov[1].iov_base = (void *) key; iov[1].iov_len = keylen; if (__builtin_expect (TEMP_FAILURE_RETRY (__writev (sock, iov, 2)) != iov[0].iov_len + iov[1].iov_len, 0)) /* We cannot even write the request. */ goto out_close2; /* Room for the data sent along with the file descriptor. We expect the key name back. */ iov[0].iov_base = resdata; iov[0].iov_len = keylen; union { struct cmsghdr hdr; char bytes[CMSG_SPACE (sizeof (int))]; } buf; struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1, .msg_control = buf.bytes, .msg_controllen = sizeof (buf) }; struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN (sizeof (int)); /* This access is well-aligned since BUF is correctly aligned for an int and CMSG_DATA preserves this alignment. */ *(int *) CMSG_DATA (cmsg) = -1; msg.msg_controllen = cmsg->cmsg_len; if (wait_on_socket (sock) <= 0) goto out_close2; # ifndef MSG_NOSIGNAL # define MSG_NOSIGNAL 0 # endif if (__builtin_expect (TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_NOSIGNAL)) != keylen, 0)) goto out_close2; mapfd = *(int *) CMSG_DATA (cmsg); if (__builtin_expect (CMSG_FIRSTHDR (&msg)->cmsg_len != CMSG_LEN (sizeof (int)), 0)) goto out_close; struct stat64 st; if (__builtin_expect (strcmp (resdata, key) != 0, 0) || __builtin_expect (fstat64 (mapfd, &st) != 0, 0) || __builtin_expect (st.st_size < sizeof (struct database_pers_head), 0)) goto out_close; struct database_pers_head head; if (__builtin_expect (TEMP_FAILURE_RETRY (__pread (mapfd, &head, sizeof (head), 0)) != sizeof (head), 0)) goto out_close; if (__builtin_expect (head.version != DB_VERSION, 0) || __builtin_expect (head.header_size != sizeof (head), 0) /* This really should not happen but who knows, maybe the update thread got stuck. */ || __builtin_expect (! head.nscd_certainly_running && head.timestamp + MAPPING_TIMEOUT < time (NULL), 0)) goto out_close; size_t size = (sizeof (head) + roundup (head.module * sizeof (ref_t), ALIGN) + head.data_size); if (__builtin_expect (st.st_size < size, 0)) goto out_close; /* The file is large enough, map it now. */ void *mapping = __mmap (NULL, size, PROT_READ, MAP_SHARED, mapfd, 0); if (__builtin_expect (mapping != MAP_FAILED, 1)) { /* Allocate a record for the mapping. */ struct mapped_database *newp = malloc (sizeof (*newp)); if (newp == NULL) { /* Ugh, after all we went through the memory allocation failed. */ __munmap (mapping, size); goto out_close; } newp->head = mapping; newp->data = ((char *) mapping + head.header_size + roundup (head.module * sizeof (ref_t), ALIGN)); newp->mapsize = size; /* Set counter to 1 to show it is usable. */ newp->counter = 1; result = newp; } out_close: __close (mapfd); out_close2: __close (sock); out: __set_errno (saved_errno); #endif /* SCM_RIGHTS */ struct mapped_database *oldval = *mappedp; *mappedp = result; if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0) __nscd_unmap (oldval); return result; } struct mapped_database * __nscd_get_map_ref (request_type type, const char *name, struct locked_map_ptr *mapptr, int *gc_cyclep) { struct mapped_database *cur = mapptr->mapped; if (cur == NO_MAPPING) return cur; int cnt = 0; while (atomic_compare_and_exchange_val_acq (&mapptr->lock, 1, 0) != 0) { // XXX Best number of rounds? if (++cnt > 5) return NO_MAPPING; atomic_delay (); } cur = mapptr->mapped; if (__builtin_expect (cur != NO_MAPPING, 1)) { /* If not mapped or timestamp not updated, request new map. */ if (cur == NULL || (cur->head->nscd_certainly_running == 0 && cur->head->timestamp + MAPPING_TIMEOUT < time (NULL))) cur = get_mapping (type, name, &mapptr->mapped); if (__builtin_expect (cur != NO_MAPPING, 1)) { if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0, 0)) cur = NO_MAPPING; else atomic_increment (&cur->counter); } } mapptr->lock = 0; return cur; } const struct datahead * __nscd_cache_search (request_type type, const char *key, size_t keylen, const struct mapped_database *mapped) { unsigned long int hash = __nis_hash (key, keylen) % mapped->head->module; ref_t work = mapped->head->array[hash]; while (work != ENDREF) { struct hashentry *here = (struct hashentry *) (mapped->data + work); if (type == here->type && keylen == here->len && memcmp (key, mapped->data + here->key, keylen) == 0) { /* We found the entry. Increment the appropriate counter. */ const struct datahead *dh = (struct datahead *) (mapped->data + here->packet); /* See whether we must ignore the entry or whether something is wrong because garbage collection is in progress. */ if (dh->usable && ((char *) dh + dh->allocsize <= (char *) mapped->head + mapped->mapsize)) return dh; } work = here->next; } return NULL; }
void * mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) { _gfs_hook_debug_v(fputs("Hooking mmap\n", stderr)); return (__mmap(addr, len, prot, flags, fildes, off)); }
void *mmap (void *addr, size_t len, int prot, int flags, int fd, off_t offset) { u_int pageoff; caddr_t ret; off_t pos = offset; size_t size = len; struct Mmap *m; struct stat sb; struct file *filp; struct mmap_ustruct *mus; OSCALLENTER(OSCALL_mmap); if (!mmap_inited) mmap_init(); /* if given a bad fd then return */ if (fd != -1 && fstat (fd, &sb) < 0) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t )-1; } if ((flags & MAP_COPY) && (flags & MAP_ANON)) flags &= ~MAP_COPY; /* OpenBSD 2.1 code */ /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (pos & PGMASK); pos -= pageoff; /* Adjust size for rounding (on both ends). */ size += pageoff; /* low end... */ size = PGROUNDUP(size); /* hi end */ /* Do not allow mappings that cause address wrap... */ if ((ssize_t)size < 0) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } /* * Check for illegal addresses. Watch out for address wrap... */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo NBPG, so it * should be aligned after adjustment by pageoff. */ addr -= pageoff; if ((u_int)addr & PGMASK) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } /* Address range must be all in user VM space. */ if (UTOP > 0 && (u_int)addr + size > UTOP) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } if ((u_int)addr > (u_int)addr + size) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } } if ((flags & MAP_ANON) == 0) { if (fd < 0 || fd > NR_OPEN || __current->fd[fd] == NULL) { errno = EBADF; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } /* * XXX hack to handle use of /dev/zero to map anon * memory (ala SunOS). */ if (S_ISCHR(__current->fd[fd]->f_mode) && mmap_iszerodev(__current->fd[fd]->f_dev)) { flags |= MAP_ANON; goto is_anon; } /* * Only files and cdevs are mappable, and cdevs does not * provide private mappings of any kind. */ if (!S_ISREG(__current->fd[fd]->f_mode) && (!S_ISCHR(__current->fd[fd]->f_mode) || (flags & (MAP_PRIVATE|MAP_COPY)))) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? * What if proc does a setuid? */ if (((__current->fd[fd]->f_flags & O_ACCMODE) == O_WRONLY) && (prot & PROT_READ)) { errno = EACCES; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } /* * If we are sharing potential changes (either via MAP_SHARED * or via the implicit sharing of character device mappings), * and we are trying to get write permission although we * opened it without asking for it, bail out. */ if (((flags & MAP_SHARED) != 0 || S_ISCHR(__current->fd[fd]->f_mode)) && ((__current->fd[fd]->f_flags & O_ACCMODE) == O_RDONLY) && (prot & PROT_WRITE) != 0) { errno = EACCES; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } } else { /* * (flags & MAP_ANON) == TRUE * Mapping blank space is trivial. */ if (fd != -1) { errno = EINVAL; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } is_anon: pos = 0; } if (size == 0) { OSCALLEXIT(OSCALL_mmap); return addr; } if (fd >= 0) filp = __current->fd[fd]; else filp = NULL; if ((flags & MAP_FIXED) == 0) { addr = __malloc(size); if (addr == NULL) { __free(addr); errno = ENOMEM; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } } mus = exos_pinned_malloc(sizeof(*mus)); if (mus == NULL) { if ((flags & MAP_FIXED) == 0) __free(addr); errno = ENOMEM; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } m = &(mus->m); m->mmap_addr = addr; m->mmap_len = size; m->mmap_prot = prot; m->mmap_flags = flags; m->mmap_offset = pos; m->mmap_filp = filp; m->mmap_dev = ((fd != -1) ? sb.st_dev : 0); LIST_INSERT_HEAD (&mmap_list, m, mmap_link); mus->mru.handler = mmap_fault_handler; mus->oldmru = mregion_get_ustruct(addr); /* XXX - check return value */ if (__vm_free_region((u_int)addr, size, 0) < 0 || mregion_alloc(addr, size, (struct mregion_ustruct*)mus) != 0) { if ((flags & MAP_FIXED) == 0) __free(addr); exos_pinned_free(mus); errno = ENOMEM; OSCALLEXIT(OSCALL_mmap); return (caddr_t)-1; } if (filp) { lock_filp(filp); filp_refcount_inc(filp); unlock_filp(filp); } if (flags & MAP_COPY) ret = __mmap(addr, size, prot, flags, fd, pos, 0, __envid); else ret = addr + pageoff; OSCALLEXIT(OSCALL_mmap); return ret; }
/* Spawn a new process executing PATH with the attributes describes in *ATTRP. Before running the process perform the actions described in FILE-ACTIONS. */ static int __spawnix (pid_t * pid, const char *file, const posix_spawn_file_actions_t * file_actions, const posix_spawnattr_t * attrp, char *const argv[], char *const envp[], int xflags, int (*exec) (const char *, char *const *, char *const *)) { pid_t new_pid; struct posix_spawn_args args; int ec; if (__pipe2 (args.pipe, O_CLOEXEC)) return errno; /* To avoid imposing hard limits on posix_spawn{p} the total number of arguments is first calculated to allocate a mmap to hold all possible values. */ ptrdiff_t argc = 0; /* Linux allows at most max (0x7FFFFFFF, 1/4 stack size) arguments to be used in a execve call. We limit to INT_MAX minus one due the compatiblity code that may execute a shell script (maybe_script_execute) where it will construct another argument list with an additional argument. */ ptrdiff_t limit = INT_MAX - 1; while (argv[argc++] != NULL) if (argc == limit) { errno = E2BIG; return errno; } int prot = (PROT_READ | PROT_WRITE | ((GL (dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); /* Add a slack area for child's stack. */ size_t argv_size = (argc * sizeof (void *)) + 512; size_t stack_size = ALIGN_UP (argv_size, GLRO(dl_pagesize)); void *stack = __mmap (NULL, stack_size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (__glibc_unlikely (stack == MAP_FAILED)) { close_not_cancel (args.pipe[0]); close_not_cancel (args.pipe[1]); return errno; } /* Disable asynchronous cancellation. */ int cs = LIBC_CANCEL_ASYNC (); args.file = file; args.exec = exec; args.fa = file_actions; args.attr = attrp ? attrp : &(const posix_spawnattr_t) { 0 }; args.argv = argv; args.argc = argc; args.envp = envp; args.xflags = xflags; __sigprocmask (SIG_BLOCK, &SIGALL_SET, &args.oldmask); /* The clone flags used will create a new child that will run in the same memory space (CLONE_VM) and the execution of calling thread will be suspend until the child calls execve or _exit. These condition as signal below either by pipe write (_exit with SPAWN_ERROR) or a successful execve. Also since the calling thread execution will be suspend, there is not need for CLONE_SETTLS. Although parent and child share the same TLS namespace, there will be no concurrent access for TLS variables (errno for instance). */ new_pid = CLONE (__spawni_child, STACK (stack, stack_size), stack_size, CLONE_VM | CLONE_VFORK | SIGCHLD, &args); close_not_cancel (args.pipe[1]); if (new_pid > 0) { if (__read (args.pipe[0], &ec, sizeof ec) != sizeof ec) ec = 0; else __waitpid (new_pid, NULL, 0); } else ec = -new_pid; __munmap (stack, stack_size); close_not_cancel (args.pipe[0]); if (!ec && new_pid) *pid = new_pid; __sigprocmask (SIG_SETMASK, &args.oldmask, 0); LIBC_CANCEL_RESET (cs); return ec; } /* Spawn a new process executing PATH with the attributes describes in *ATTRP. Before running the process perform the actions described in FILE-ACTIONS. */ int __spawni (pid_t * pid, const char *file, const posix_spawn_file_actions_t * acts, const posix_spawnattr_t * attrp, char *const argv[], char *const envp[], int xflags) { return __spawnix (pid, file, acts, attrp, argv, envp, xflags, xflags & SPAWN_XFLAGS_USE_PATH ? __execvpe : __execve); }
u_int __load_prog_fd(int fd, int _static, u_int envid) { u_int start_text_addr; struct exec hdr; u_int text_size, data_size, bss_size, overlap_size; u_int dynamic, start_text_pg; /* read a.out headers */ if (lseek(fd, 0, SEEK_SET) == -1 || read(fd, &hdr, sizeof(hdr)) != sizeof(hdr) || lseek(fd, sizeof(hdr) + hdr.a_text, SEEK_SET) == -1 || read(fd, &dynamic, sizeof(dynamic)) != sizeof(dynamic) || read(fd, &start_text_addr, sizeof(start_text_addr)) != sizeof(start_text_addr)) { fprintf(stderr,"Invalid executable format.\n"); errno = ENOEXEC; goto err; } start_text_pg = PGROUNDDOWN(start_text_addr); text_size = hdr.a_text + sizeof(hdr); data_size = hdr.a_data; if (text_size % NBPG) { data_size += text_size % NBPG; text_size = PGROUNDDOWN(text_size); } bss_size = hdr.a_bss; if (_static) { if (!(data_size % NBPG)) overlap_size = 0; else { /* read in the page that contains both bss and inited data */ u_int temp_page; temp_page = (u_int)__malloc(NBPG); overlap_size = NBPG; if (temp_page == 0 || lseek(fd, text_size + PGROUNDDOWN(data_size), SEEK_SET) == -1 || read(fd, (void*)temp_page, data_size % NBPG) != data_size % NBPG || _exos_insert_pte(0, vpt[PGNO(temp_page)], start_text_pg + text_size + PGROUNDDOWN(data_size), 0, envid, 0, NULL) != 0) { _exos_self_unmap_page(0, temp_page); __free((void*)temp_page); fprintf(stderr,"Error mmaping text segment\n"); goto err; } bzero((void*)temp_page + (data_size % NBPG), NBPG - (data_size % NBPG)); _exos_self_unmap_page(0, temp_page); __free((void*)temp_page); bss_size -= NBPG - (data_size % NBPG); bss_size = PGROUNDUP(bss_size); data_size = PGROUNDDOWN(data_size); } /* mmap the text segment readonly */ if ((u_int)__mmap((void*)start_text_pg, text_size, PROT_READ | PROT_EXEC, MAP_FILE | MAP_FIXED | MAP_COPY, fd, (off_t)0, 0, envid) != start_text_pg) { fprintf(stderr,"Error mmaping text segment\n"); goto err; } /* mmap the data segment read/write */ if ((u_int)__mmap((void*)(start_text_pg + text_size), data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FILE | MAP_FIXED | MAP_COPY, fd, text_size, (off_t)0, envid) != start_text_pg + text_size) { fprintf(stderr,"Error mmaping data segment\n"); goto err; } } else { /* if dynamic... */ u_int mflags; if (!(data_size % NBPG)) overlap_size = 0; else { /* read in the page that contains both bss and inited data */ overlap_size = NBPG; if (_exos_self_insert_pte(0, PG_P | PG_W | PG_U, start_text_pg + text_size + PGROUNDDOWN(data_size), 0, NULL) < 0 || lseek(fd, text_size + PGROUNDDOWN(data_size), SEEK_SET) == -1 || read(fd, (void*)(start_text_pg + text_size + PGROUNDDOWN(data_size)), data_size % NBPG) != data_size % NBPG) { fprintf(stderr,"Error mmaping text segment\n"); goto err; } bzero((void*)(start_text_pg + text_size + data_size), NBPG - (data_size % NBPG)); bss_size -= NBPG - (data_size % NBPG); bss_size = PGROUNDUP(bss_size); data_size = PGROUNDDOWN(data_size); } /* mmap the text segment readonly */ mflags = MAP_FILE | MAP_FIXED; if (getenv("NO_DEMAND_LOAD")) mflags |= MAP_COPY; else mflags |= MAP_SHARED; if ((u_int)mmap((void*)start_text_pg, text_size, PROT_READ | PROT_EXEC, mflags, fd, (off_t)0) != start_text_pg) { fprintf(stderr,"Error mmaping text segment\n"); goto err; } /* mmap the data segment read/write */ if (!(mflags & MAP_COPY)) mflags = MAP_FILE | MAP_FIXED | MAP_PRIVATE; if ((u_int)mmap((void*)(start_text_pg + text_size), data_size, PROT_READ | PROT_WRITE | PROT_EXEC, mflags, fd, (off_t)text_size) != start_text_pg + text_size) { fprintf(stderr,"Error mmaping data segment: %d\n", errno); goto err; } /* mmap the bss as demand zero'd */ if ((u_int)mmap((void*)(start_text_pg + text_size + data_size + overlap_size), bss_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_FIXED | MAP_PRIVATE, (off_t)-1, 0) != start_text_pg + text_size + data_size + overlap_size) { fprintf(stderr,"Error mmaping bss\n"); goto err; } } return start_text_addr; err: return 0; }