DEFINE_SYSCALL(recvfrom, int, sockfd, void *, buf, size_t, len, int, flags, struct sockaddr *, src_addr, int *, addrlen) { log_info("recvfrom(%d, %p, %d, %x, %p, %p)", sockfd, buf, len, flags, src_addr, addrlen); if (!mm_check_write(buf, len)) return -L_EFAULT; if (src_addr) { if (!mm_check_write(addrlen, sizeof(*addrlen))) return -L_EFAULT; if (!mm_check_write(src_addr, *addrlen)) return -L_EFAULT; } struct file *f = vfs_get(sockfd); if (!f) return -L_EBADF; int r; if (!f->op_vtable->recvfrom) { log_error("recvfrom() not implemented."); r = -L_ENOTSOCK; } else r = f->op_vtable->recvfrom(f, buf, len, flags, src_addr, addrlen); vfs_release(f); return r; }
DEFINE_SYSCALL(sched_getaffinity, pid_t, pid, size_t, cpusetsize, uint8_t *, mask) { log_info("sched_getaffinity(%d, %d, %p)\n", pid, cpusetsize, mask); if (pid != 0) { log_error("pid != 0.\n"); return -ESRCH; } int bytes = (cpusetsize + 7) & ~7; if (!mm_check_write(mask, bytes)) return -EFAULT; for (int i = 0; i < bytes; i++) mask[i] = 0; /* TODO: Applications (i.e. ffmpeg) use this to detect the number of cpus and enable multithreading * on cpu with multiple cores. * Since we does not support multithreading at the time, we just report back one bit to let them * think we only have one core and give up multithreading. */ mask[0] = 1; #if 0 GROUP_AFFINITY affinity; GetThreadGroupAffinity(GetCurrentThread(), &affinity); int size = min(sizeof(uintptr_t), cpusetsize) * 8; for (int i = 0; i < size; i++) if (affinity.Mask & (1 << i)) mask[i / 8] |= 1 << i; #endif return sizeof(uintptr_t); }
DEFINE_SYSCALL(clock_gettime, int, clk_id, struct timespec *, tp) { log_info("sys_clock_gettime(%d, 0x%p)", clk_id, tp); if (!mm_check_write(tp, sizeof(struct timespec))) return -L_EFAULT; switch (clk_id) { case CLOCK_REALTIME: { FILETIME system_time; win7compat_GetSystemTimePreciseAsFileTime(&system_time); filetime_to_unix_timespec(&system_time, tp); return 0; } case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_COARSE: case CLOCK_MONOTONIC_RAW: { LARGE_INTEGER freq, counter; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&counter); uint64_t ns = (double)counter.QuadPart / (double)freq.QuadPart * NANOSECONDS_PER_SECOND; tp->tv_sec = ns / NANOSECONDS_PER_SECOND; tp->tv_nsec = ns % NANOSECONDS_PER_SECOND; return 0; } default: return -L_EINVAL; } }
DEFINE_SYSCALL(getrlimit, int, resource, struct rlimit *, rlim) { log_info("getrlimit(%d, %p)\n", resource, rlim); if (!mm_check_write(rlim, sizeof(struct rlimit))) return -EFAULT; switch (resource) { case RLIMIT_STACK: rlim->rlim_cur = STACK_SIZE; rlim->rlim_max = STACK_SIZE; break; case RLIMIT_NPROC: log_info("RLIMIT_NPROC: return fake result.\n"); rlim->rlim_cur = 65536; rlim->rlim_max = 65536; break; case RLIMIT_NOFILE: rlim->rlim_cur = MAX_FD_COUNT; rlim->rlim_max = MAX_FD_COUNT; break; default: log_error("Unsupported resource: %d\n", resource); return -EINVAL; } return 0; }
DEFINE_SYSCALL(getrandom, void *, buf, size_t, buflen, unsigned int, flags) { log_info("getrandom(%p, %d, %x)", buf, buflen, flags); if (!mm_check_write(buf, buflen)) return -L_EFAULT; if (!RtlGenRandom(buf, buflen)) return 0; return buflen; }
static int mm_check_write_msghdr(struct msghdr *msg) { if (!mm_check_write(msg, sizeof(struct msghdr))) return 0; if (msg->msg_namelen && !mm_check_write(msg->msg_name, msg->msg_namelen)) return 0; if (msg->msg_iovlen && !mm_check_write(msg->msg_iov, sizeof(struct iovec) * msg->msg_iovlen)) return 0; if (msg->msg_controllen & !mm_check_write(msg->msg_control, msg->msg_controllen)) return 0; for (int i = 0; i < msg->msg_iovlen; i++) { log_info("iov %d: [%p, %p)", i, msg->msg_iov[i].iov_base, (uintptr_t)msg->msg_iov[i].iov_base + msg->msg_iov[i].iov_len); if (!mm_check_write(msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len)) return 0; } return 1; }
DEFINE_SYSCALL(nanosleep, const struct timespec *, req, struct timespec *, rem) { log_info("nanosleep(0x%p, 0x%p)", req, rem); if (!mm_check_read(req, sizeof(struct timespec)) || rem && !mm_check_write(rem, sizeof(struct timespec))) return -L_EFAULT; LARGE_INTEGER delay_interval; delay_interval.QuadPart = 0ULL - (((uint64_t)req->tv_sec * 1000000000ULL + req->tv_nsec) / 100ULL); NtDelayExecution(FALSE, &delay_interval); return 0; }
DEFINE_SYSCALL(accept4, int, sockfd, struct sockaddr *, addr, int *, addrlen, int, flags) { log_info("accept4(%d, %p, %p, %d)", sockfd, addr, addrlen, flags); if (addr && !mm_check_write(addr, sizeof(struct sockaddr))) return -L_EFAULT; if (addrlen && !mm_check_write(addrlen, sizeof(int))) return -L_EFAULT; struct file *f = vfs_get(sockfd); if (!f) return -L_EBADF; int r; if (!f->op_vtable->accept4) { log_error("accept4() not implemented."); r = -L_ENOTSOCK; } else r = f->op_vtable->accept4(f, addr, addrlen, flags); vfs_release(f); return r; }
DEFINE_SYSCALL(getpeername, int, sockfd, struct sockaddr *, addr, int *, addrlen) { log_info("getpeername(%d, %p, %p)", sockfd, addr, addrlen); if (!mm_check_write(addrlen, sizeof(*addrlen))) return -L_EFAULT; if (!mm_check_write(addr, *addrlen)) return -L_EFAULT; struct file *f = vfs_get(sockfd); if (!f) return -L_EBADF; int r; if (!f->op_vtable->getpeername) { log_error("getpeername() not implemented."); r = -L_ENOTSOCK; } else r = f->op_vtable->getpeername(f, addr, addrlen); vfs_release(f); return r; }
DEFINE_SYSCALL(getsockopt, int, sockfd, int, level, int, optname, void *, optval, int *, optlen) { log_info("getsockopt(%d, %d, %d, %p, %p)", sockfd, level, optname, optval, optlen); if (optlen && !mm_check_write(optlen, sizeof(*optlen))) return -L_EFAULT; if (optlen && !mm_check_write(optval, *optlen)) return -L_EFAULT; struct file *f = vfs_get(sockfd); if (!f) return -L_EBADF; int r; if (!f->op_vtable->getsockopt) { log_error("getsockopt() not implemented."); r = -L_ENOTSOCK; } else r = f->op_vtable->getsockopt(f, level, optname, optval, optlen); vfs_release(f); return r; }
DEFINE_SYSCALL(time, intptr_t *, c) { log_info("time(%p)", c); if (c && !mm_check_write(c, sizeof(int))) return -L_EFAULT; FILETIME systime; GetSystemTimeAsFileTime(&systime); uint64_t t = filetime_to_unix_sec(&systime); if (c) *c = (intptr_t)t; return t; }
DEFINE_SYSCALL(getrusage, int, who, struct rusage *, usage) { log_info("getrusage(%d, %p)\n", who, usage); if (!mm_check_write(usage, sizeof(struct rusage))) return -EFAULT; ZeroMemory(usage, sizeof(struct rusage)); switch (who) { default: log_error("Unhandled who: %d.\n", who); return -EINVAL; } }
DEFINE_SYSCALL(oldolduname, struct oldold_utsname *, buf) { if (!mm_check_write(buf, sizeof(struct oldold_utsname))) return -EFAULT; struct utsname newbuf; sys_uname(&newbuf); strncpy(buf->sysname, newbuf.sysname, __OLD_UTS_LEN + 1); strncpy(buf->nodename, newbuf.nodename, __OLD_UTS_LEN + 1); strncpy(buf->release, newbuf.release, __OLD_UTS_LEN + 1); strncpy(buf->version, newbuf.version, __OLD_UTS_LEN + 1); strncpy(buf->machine, newbuf.machine, __OLD_UTS_LEN + 1); return 0; }
DEFINE_SYSCALL(time, intptr_t *, c) { log_info("time(%p)\n", c); if (c && !mm_check_write(c, sizeof(int))) return -EFAULT; SYSTEMTIME systime; GetSystemTime(&systime); uint64_t t = (uint64_t)systime.wSecond + (uint64_t)systime.wMinute * 60 + (uint64_t)systime.wHour * 3600 + (uint64_t)systime.wDay * 86400 + ((uint64_t)systime.wYear - 70) * 31536000 + (((uint64_t)systime.wYear - 69) / 4) * 86400 - (((uint64_t)systime.wYear - 1) / 100) * 86400 + (((uint64_t)systime.wYear + 299) / 400) * 86400; if (c) *c = (intptr_t)t; return t; }
DEFINE_SYSCALL(uname, struct utsname *, buf) { log_info("sys_uname(%p)\n", buf); if (!mm_check_write(buf, sizeof(struct utsname))) return -EFAULT; /* Just mimic a reasonable Linux uname */ strcpy(buf->sysname, "Linux"); strcpy(buf->nodename, "ForeignLinux"); strcpy(buf->release, "3.15.0"); strcpy(buf->version, "3.15.0"); #ifdef _WIN64 strcpy(buf->machine, "x86_64"); #else strcpy(buf->machine, "i686"); #endif strcpy(buf->domainname, "GNU/Linux"); return 0; }
DEFINE_SYSCALL(recv, int, sockfd, void *, buf, size_t, len, int, flags) { log_info("recv(%d, %p, %d, %x)", sockfd, buf, len, flags); if (!mm_check_write(buf, len)) return -L_EFAULT; struct file *f = vfs_get(sockfd); if (!f) return -L_EBADF; int r; if (!f->op_vtable->recvfrom) { log_error("recv() not implemented."); r = -L_ENOTSOCK; } else r = f->op_vtable->recvfrom(f, buf, len, flags, NULL, NULL); vfs_release(f); return r; }
DEFINE_SYSCALL(clock_getres, int, clk_id, struct timespec *, res) { log_info("clock_getres(%d, 0x%p)", clk_id, res); if (!mm_check_write(res, sizeof(struct timespec))) return -L_EFAULT; switch (clk_id) { case CLOCK_REALTIME: { ULONG coarse, fine, actual; NtQueryTimerResolution(&coarse, &fine, &actual); uint64_t ns = (uint64_t)actual * NANOSECONDS_PER_TICK; res->tv_sec = ns / NANOSECONDS_PER_SECOND; res->tv_nsec = ns % NANOSECONDS_PER_SECOND; return 0; } case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_COARSE: case CLOCK_MONOTONIC_RAW: { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); uint64_t ns = (double)1. / (double)freq.QuadPart; if (ns == 0) { res->tv_sec = 0; res->tv_nsec = 1; } else { res->tv_sec = ns / NANOSECONDS_PER_SECOND; res->tv_nsec = ns % NANOSECONDS_PER_SECOND; } return 0; } default: return -L_EINVAL; } }
DEFINE_SYSCALL(sysinfo, struct sysinfo *, info) { log_info("sysinfo(%p)\n", info); if (!mm_check_write(info, sizeof(*info))) return -EFAULT; MEMORYSTATUSEX memory; GlobalMemoryStatusEx(&memory); info->uptime = (intptr_t)(GetTickCount64() / 1000ULL); info->loads[0] = info->loads[1] = info->loads[2] = 0; /* TODO */ info->totalram = memory.ullTotalPhys / PAGE_SIZE; info->freeram = memory.ullAvailPhys / PAGE_SIZE; info->sharedram = 0; info->bufferram = 0; info->totalswap = memory.ullTotalPageFile / PAGE_SIZE; info->freeswap = memory.ullAvailPageFile / PAGE_SIZE; info->procs = 100; /* TODO */ info->totalhigh = 0; info->freehigh = 0; info->mem_unit = PAGE_SIZE; RtlSecureZeroMemory(info->_f, sizeof(info->_f)); return 0; }
static int dsp_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { AcquireSRWLockExclusive(&f->rw_lock); int r = 0; struct dsp_file *dsp = (struct dsp_file *)f; switch (cmd) { case SNDCTL_DSP_RESET: { log_info("SNDCTL_DSP_RESET."); dsp_reset(dsp); break; } case SNDCTL_DSP_SPEED: { if (!mm_check_read((int *)arg, sizeof(int))) { r = -L_EFAULT; break; } int speed = *(int *)arg; log_info("SNDCTL_DSP_SPEED: %d", speed); DWORD old_speed = dsp->format.nSamplesPerSec; dsp->format.nSamplesPerSec = speed; if (!dsp_test_format(&dsp->format)) { log_warning("Speed not supported."); dsp->format.nSamplesPerSec = old_speed; r = -L_EINVAL; } break; } case SNDCTL_DSP_STEREO: { if (!mm_check_read((int *)arg, sizeof(int))) { r = -L_EFAULT; break; } int c = *(int *)arg; log_info("SNDCTL_DSP_STEREO: %d", c); if (c == 0) dsp->format.nChannels = 1; else if (c == 1) dsp->format.nChannels = 2; else { log_warning("Invalid argument (can only be 0 or 1)."); r = -L_EINVAL; } break; } case SNDCTL_DSP_SETFMT: { if (!mm_check_read((int *)arg, sizeof(int))) { r = -L_EFAULT; break; } int fmt = *(int *)arg; log_info("SNDCTL_DSP_SETFMT: 0x%x", fmt); if (fmt == AFMT_S16_LE) dsp->format.wBitsPerSample = 16; else if (fmt == AFMT_U8) dsp->format.wBitsPerSample = 8; else { log_warning("Invalid argument (can only be AFMT_S16_LE or AFMT_U8)."); r = -L_EINVAL; } break; } case SNDCTL_DSP_GETFMTS: { if (!mm_check_write((int *)arg, sizeof(int))) { r = -L_EFAULT; break; } log_info("SNDCTL_DSP_GETFMTS"); *(int *)arg = AFMT_U8 | AFMT_S16_LE; break; } } ReleaseSRWLockExclusive(&f->rw_lock); return r; }
static int load_elf(struct file *f, struct binfmt *binary) { struct elf_header *elf = binary->has_interpreter ? binary->interpreter : binary->executable; /* Load ELF header */ f->op_vtable->pread(f, &elf->eh, sizeof(Elf_Ehdr), 0); if (elf->eh.e_type != ET_EXEC && elf->eh.e_type != ET_DYN) { log_error("Only ET_EXEC and ET_DYN executables can be loaded."); return -L_EACCES; } #ifdef _WIN64 if (elf->eh.e_machine != EM_X86_64) { log_error("Not an x86_64 executable."); #else if (elf->eh.e_machine != EM_386) { log_error("Not an i386 executable."); #endif return -L_EACCES; } /* Load program header table */ size_t phsize = (size_t)elf->eh.e_phentsize * (size_t)elf->eh.e_phnum; char *pht = pht_storage; f->op_vtable->pread(f, pht, phsize, elf->eh.e_phoff); /* TODO */ /* Find virtual address range */ elf->low = 0xFFFFFFFF; elf->high = 0; for (int i = 0; i < elf->eh.e_phnum; i++) { Elf_Phdr *ph = (Elf_Phdr *)&pht[elf->eh.e_phentsize * i]; if (ph->p_type == PT_LOAD) { elf->low = min(elf->low, ph->p_vaddr); elf->high = max(elf->high, ph->p_vaddr + ph->p_memsz); log_info("PT_LOAD: vaddr %p, size %p", ph->p_vaddr, ph->p_memsz); } else if (ph->p_type == PT_DYNAMIC) log_info("PT_DYNAMIC: vaddr %p, size %p", ph->p_vaddr, ph->p_memsz); else if (ph->p_type == PT_PHDR) /* Patch phdr pointer in PT_PHDR, glibc uses it to determine load offset */ ph->p_vaddr = (size_t)pht; } /* Find virtual address range for ET_DYN executable */ elf->load_base = 0; if (elf->eh.e_type == ET_DYN) { size_t free_addr = mm_find_free_pages(elf->high - elf->low) * PAGE_SIZE; if (!free_addr) return -L_ENOMEM; elf->load_base = free_addr - elf->low; log_info("ET_DYN load offset: %p, real range [%p, %p)", elf->load_base, elf->load_base + elf->low, elf->load_base + elf->high); } #ifdef _WIN64 /* Unmap the pre-reserved executable region (see fork_init() for details) */ size_t region_start = 0x400000; VirtualFree(region_start, 0, MEM_RELEASE); /* This will silently fail if it's not the intended case */ #endif /* Map executable segments */ /* TODO: Directly use mmap() */ int load_base_set = 0; for (int i = 0; i < elf->eh.e_phnum; i++) { Elf_Phdr *ph = (Elf_Phdr *)&pht[elf->eh.e_phentsize * i]; if (ph->p_type == PT_LOAD) { size_t addr = ph->p_vaddr & 0xFFFFF000; size_t size = ph->p_memsz + (ph->p_vaddr & 0x00000FFF); off_t offset_pages = ph->p_offset / PAGE_SIZE; /* Note: In ET_DYN executables, all address are based upon elf->load_base. * But in ET_EXEC executables, all address are absolute. */ int prot = 0; if (ph->p_flags & PF_R) prot |= PROT_READ; if (ph->p_flags & PF_W) prot |= PROT_WRITE; if (ph->p_flags & PF_X) prot |= PROT_EXEC; if (elf->eh.e_type == ET_DYN) addr += elf->load_base; mm_mmap((void*)addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED | MAP_POPULATE, 0, NULL, 0); char *vaddr = (char *)ph->p_vaddr; if (elf->eh.e_type == ET_DYN) vaddr += elf->load_base; mm_check_write(vaddr, ph->p_filesz); /* Populate the memory, otherwise pread() will fail */ f->op_vtable->pread(f, vaddr, ph->p_filesz, ph->p_offset); if (!binary->has_interpreter) /* This is not interpreter */ mm_update_brk((void*)(addr + size)); if (elf->eh.e_type == ET_EXEC && !load_base_set) { /* Record load base of first segment in ET_EXEC * load_base will be used in run() to calculate various auxiliary vector pointers */ load_base_set = 1; elf->load_base = addr; } } } /* Load interpreter if present */ for (int i = 0; i < elf->eh.e_phnum; i++) { Elf_Phdr *ph = (Elf_Phdr *)&pht[elf->eh.e_phentsize * i]; if (ph->p_type == PT_INTERP) { if (binary->has_interpreter) /* This is already an interpreter */ return -L_EACCES; /* Bad interpreter */ binary->has_interpreter = true; char path[MAX_PATH]; f->op_vtable->pread(f, path, ph->p_filesz, ph->p_offset); /* TODO */ path[ph->p_filesz] = 0; log_info("interpreter: %s", path); struct file *fi; int r = vfs_openat(AT_FDCWD, path, O_RDONLY, 0, 0, &fi); if (r < 0) return r; if (!winfs_is_winfile(fi)) { vfs_release(fi); return -L_EACCES; } r = load_elf(fi, binary); vfs_release(fi); if (r < 0) return -L_EACCES; /* Bad interpreter */ } } return 0; } #define MAX_SHEBANG_LINE 256 static int load_script(struct file *f, struct binfmt *binary) { /* Parse the shebang line */ int size = f->op_vtable->pread(f, binary->buffer_base, MAX_SHEBANG_LINE, 0); char *p = binary->buffer_base, *end = p + size; /* Skip shebang */ p += 2; /* Skip spaces */ while (p < end && *p == ' ') p++; if (p == end) return -L_EACCES; const char *executable = p; binary->argv0 = p; while (p < end && *p != ' ' && *p != '\n') p++; if (p == end) return -L_EACCES; if (*p == '\n') *p = 0; /* It has no argument */ else { *p++ = 0; while (p < end && *p == ' ') p++; if (p == end) return -L_EACCES; if (*p != '\n') { /* It has an argument */ binary->argv1 = p; while (p < end && *p != '\n') p++; if (p == end) return -L_EACCES; *p = 0; } } binary->replace_argv0 = TRUE; struct file *fe; int r = vfs_openat(AT_FDCWD, executable, O_RDONLY, 0, 0, &fe); if (r < 0) return r; if (!winfs_is_winfile(fe)) { vfs_release(fe); return -L_EACCES; } /* TODO: Recursive interpreters */ return load_elf(fe, binary); }