void server_forward_kmsg( Server *s, int priority, const char *identifier, const char *message, struct ucred *ucred) { struct iovec iovec[5]; char header_priority[6], header_pid[16]; int n = 0; char *ident_buf = NULL; assert(s); assert(priority >= 0); assert(priority <= 999); assert(message); if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg)) return; if (_unlikely_(s->dev_kmsg_fd < 0)) return; /* Never allow messages with kernel facility to be written to * kmsg, regardless where the data comes from. */ priority = syslog_fixup_facility(priority); /* First: priority field */ snprintf(header_priority, sizeof(header_priority), "<%i>", priority); char_array_0(header_priority); IOVEC_SET_STRING(iovec[n++], header_priority); /* Second: identifier and PID */ if (ucred) { if (!identifier) { get_process_comm(ucred->pid, &ident_buf); identifier = ident_buf; } snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid); char_array_0(header_pid); if (identifier) IOVEC_SET_STRING(iovec[n++], identifier); IOVEC_SET_STRING(iovec[n++], header_pid); } else if (identifier) { IOVEC_SET_STRING(iovec[n++], identifier); IOVEC_SET_STRING(iovec[n++], ": "); } /* Fourth: message */ IOVEC_SET_STRING(iovec[n++], message); IOVEC_SET_STRING(iovec[n++], "\n"); if (writev(s->dev_kmsg_fd, iovec, n) < 0) log_debug("Failed to write to /dev/kmsg for logging: %m"); free(ident_buf); }
void server_forward_kmsg( Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred) { _cleanup_free_ char *ident_buf = NULL; struct iovec iovec[5]; char header_priority[DECIMAL_STR_MAX(priority) + 3], header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1]; int n = 0; assert(s); assert(priority >= 0); assert(priority <= 999); assert(message); if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg)) return; if (_unlikely_(s->dev_kmsg_fd < 0)) return; /* Never allow messages with kernel facility to be written to * kmsg, regardless where the data comes from. */ priority = syslog_fixup_facility(priority); /* First: priority field */ xsprintf(header_priority, "<%i>", priority); iovec[n++] = IOVEC_MAKE_STRING(header_priority); /* Second: identifier and PID */ if (ucred) { if (!identifier) { get_process_comm(ucred->pid, &ident_buf); identifier = ident_buf; } xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid); if (identifier) iovec[n++] = IOVEC_MAKE_STRING(identifier); iovec[n++] = IOVEC_MAKE_STRING(header_pid); } else if (identifier) { iovec[n++] = IOVEC_MAKE_STRING(identifier); iovec[n++] = IOVEC_MAKE_STRING(": "); } /* Fourth: message */ iovec[n++] = IOVEC_MAKE_STRING(message); iovec[n++] = IOVEC_MAKE_STRING("\n"); if (writev(s->dev_kmsg_fd, iovec, n) < 0) log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m"); }
int sigbus_pop(void **ret) { assert(ret); for (;;) { unsigned u, c; __sync_synchronize(); c = n_sigbus_queue; if (_likely_(c == 0)) return 0; if (_unlikely_(c >= SIGBUS_QUEUE_MAX)) return -EOVERFLOW; for (u = 0; u < SIGBUS_QUEUE_MAX; u++) { void *addr; addr = sigbus_queue[u]; if (!addr) continue; if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) { __sync_fetch_and_sub(&n_sigbus_queue, 1); *ret = addr; return 1; } } } }
_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) { int r, i, j; va_list ap; struct iovec *iov = NULL; char *f; va_start(ap, format); i = fill_iovec_sprintf(format, ap, 3, &iov); va_end(ap); if (_unlikely_(i < 0)) { r = i; goto finish; } ALLOCA_CODE_FUNC(f, func); IOVEC_SET_STRING(iov[0], file); IOVEC_SET_STRING(iov[1], line); IOVEC_SET_STRING(iov[2], f); r = sd_journal_sendv(iov, i); finish: for (j = 3; j < i; j++) free(iov[j].iov_base); free(iov); return r; }
/** * c_variant_beginv() - begin a new container * @cv: variant to operate on, or NULL * @containers: containers to write, or NULL * @args: additional parameters * * This begins writing a new container to @cv, moving the iterator into the * container for following writes. The containers to enter have to be specified * via @containers (if NULL, the next container is entered). Whenever you enter * a variant, you must specify the type for the entire variant as another * argument in @args. * * Valid elements for @containers are: * 'v' to begin a variant * 'm' to begin a maybe * 'a' to begin an array * '(' to begin a tuple * '{' to begin a pair * * It is an programming error to call this on a sealed variant. * * Return: 0 on success, negative error code on failure. */ _public_ int c_variant_beginv(CVariant *cv, const char *containers, va_list args) { CVariantLevel *level; const char *type; int r; if (_unlikely_(!cv)) return -ENOTUNIQ; assert(!cv->sealed); if (containers) { for ( ; *containers; ++containers) { type = NULL; switch (*containers) { case C_VARIANT_VARIANT: type = va_arg(args, const char *); /* fallthrough */ case C_VARIANT_MAYBE: case C_VARIANT_ARRAY: case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: r = c_variant_begin_one(cv, *containers, type); if (r < 0) return r; break; default: return c_variant_poison(cv, -EMEDIUMTYPE); } } } else {
bool on_tty(void) { static int cached_on_tty = -1; if (_unlikely_(cached_on_tty < 0)) cached_on_tty = isatty(STDOUT_FILENO) > 0; return cached_on_tty; }
bool is_main_thread(void) { static thread_local int cached = 0; if (_unlikely_(cached == 0)) cached = getpid() == gettid() ? 1 : -1; return cached > 0; }
int getenv_for_pid(pid_t pid, const char *field, char **_value) { _cleanup_fclose_ FILE *f = NULL; char *value = NULL; int r; bool done = false; size_t l; const char *path; assert(pid >= 0); assert(field); assert(_value); path = procfs_file_alloca(pid, "environ"); f = fopen(path, "re"); if (!f) { if (errno == ENOENT) return -ESRCH; return -errno; } l = strlen(field); r = 0; do { char line[LINE_MAX]; unsigned i; for (i = 0; i < sizeof(line)-1; i++) { int c; c = getc(f); if (_unlikely_(c == EOF)) { done = true; break; } else if (c == 0) break; line[i] = c; } line[i] = 0; if (memcmp(line, field, l) == 0 && line[l] == '=') { value = strdup(line + l + 1); if (!value) return -ENOMEM; r = 1; break; } } while (!done); *_value = value; return r; }
void* mempool_alloc_tile(struct mempool *mp) { unsigned i; /* When a tile is released we add it to the list and simply * place the next pointer at its offset 0. */ assert(mp->tile_size >= sizeof(void*)); assert(mp->at_least > 0); if (mp->freelist) { void *r; r = mp->freelist; mp->freelist = * (void**) mp->freelist; return r; } if (_unlikely_(!mp->first_pool) || _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) { unsigned n; size_t size; struct pool *p; n = mp->first_pool ? mp->first_pool->n_tiles : 0; n = MAX(mp->at_least, n * 2); size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size); n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size; p = malloc(size); if (!p) return NULL; p->next = mp->first_pool; p->n_tiles = n; p->n_used = 0; mp->first_pool = p; } i = mp->first_pool->n_used++; return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size; }
static int nftw_cb( const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { /* No need to label /dev twice in a row... */ if (_unlikely_(ftwbuf->level == 0)) return FTW_CONTINUE; label_fix(fpath, false, false); /* /run/initramfs is static data and big, no need to * dynamically relabel its contents at boot... */ if (_unlikely_(ftwbuf->level == 1 && tflag == FTW_D && streq(fpath, "/run/initramfs"))) return FTW_SKIP_SUBTREE; return FTW_CONTINUE; };
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { const char *p; char *t; static __thread bool good = false; assert(controller); assert(fs); if (_unlikely_(!good)) { int r; r = path_is_mount_point("/sys/fs/cgroup", false); if (r <= 0) return r < 0 ? r : -ENOENT; /* Cache this to save a few stat()s */ good = true; } if (isempty(controller)) return -EINVAL; /* This is a very minimal lookup from controller names to * paths. Since we have mounted most hierarchies ourselves * should be kinda safe, but eventually we might want to * extend this to have a fallback to actually check * /proc/mounts. Might need caching then. */ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) p = "systemd"; else if (startswith(controller, "name=")) p = controller + 5; else p = controller; if (path && suffix) t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL); else if (path) t = join("/sys/fs/cgroup/", p, "/", path, NULL); else if (suffix) t = join("/sys/fs/cgroup/", p, "/", suffix, NULL); else t = join("/sys/fs/cgroup/", p, NULL); if (!t) return -ENOMEM; path_kill_slashes(t); *fs = t; return 0; }
static bool prefix_timestamp(void) { static int cached_printk_time = -1; if (_unlikely_(cached_printk_time < 0)) { _cleanup_free_ char *p = NULL; cached_printk_time = read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0 && parse_boolean(p) > 0; } return cached_printk_time; }
static int c_variant_insert_vecs(CVariant *cv, size_t idx, size_t num) { struct iovec *v; size_t n; /* * This reallocates the iovec array and adds @num new vectors at * position @idx. All new vectors are reset to 0. We expect the caller * to be aware of front/end iterators and adjust them, in case the * reallocation moves them. * * Note that this might allocate more than @num vectors, as a reserve * for future requests. The caller must treat @num as a minimum. * * This also adjusts the trailing state-array, to actually reflect the * extended iovec array. */ assert(idx <= cv->n_vecs); n = cv->n_vecs + num; if (_unlikely_(n < num || n > C_VARIANT_MAX_VECS)) return c_variant_poison(cv, -ENOBUFS); /* allocate some more, to serve future requests */ n = (n + 8 < C_VARIANT_MAX_VECS) ? n + 8 : C_VARIANT_MAX_VECS; num = n - cv->n_vecs; v = malloc(n * sizeof(*v) + n); if (!v) return c_variant_poison(cv, -ENOMEM); /* copy&extend trailing state-array */ memcpy((char *)(v + n), (char *)(cv->vecs + cv->n_vecs), idx); memset((char *)(v + n) + idx, 0, num); memcpy((char *)(v + n) + idx + num, (char *)(cv->vecs + cv->n_vecs) + idx, (cv->n_vecs - idx)); /* copy&extend actual iovec-array */ memcpy(v, cv->vecs, idx * sizeof(*v)); memset(v + idx, 0, num * sizeof(*v)); memcpy(v + idx + num, cv->vecs + idx, (cv->n_vecs - idx) * sizeof(*v)); if (cv->allocated_vecs) free(cv->vecs); else cv->allocated_vecs = true; cv->vecs = v; cv->n_vecs = n; return 0; }
static int buffer_extendby(buffer_t *buf, size_t extby) { char *data; size_t newlen = _unlikely_(!buf->buflen && extby < 64) ? 64 : buf->len + extby; if (newlen > buf->buflen) { newlen = next_power(newlen); data = realloc(buf->data, newlen); if (!data) return -errno; buf->buflen = newlen; buf->data = data; } return 0; }
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { const char *p; static __thread bool good = false; assert(fs); if (_unlikely_(!good)) { int r; r = path_is_mount_point("/sys/fs/cgroup", false); if (r <= 0) return r < 0 ? r : -ENOENT; /* Cache this to save a few stat()s */ good = true; } p = controller ? normalize_controller(controller) : NULL; return join_path(p, path, suffix, fs); }
int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { const uint8_t *p = buf; assert(fd >= 0); assert(buf); if (nbytes > (size_t) SSIZE_MAX) return -EINVAL; do { ssize_t k; k = write(fd, p, nbytes); if (k < 0) { if (errno == EINTR) continue; if (errno == EAGAIN && do_poll) { /* We knowingly ignore any return value here, * and expect that any error/EOF is reported * via write() */ (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY); continue; } return -errno; } if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */ return -EIO; assert((size_t) k <= nbytes); p += k; nbytes -= k; } while (nbytes > 0); return 0; }
static int c_variant_write_one(CVariant *cv, char basic, const void *arg, size_t n_arg) { CVariantLevel *level; CVariantType info; void *front; int r; assert(n_arg > 0); level = cv->state->levels + cv->state->i_levels; if (_unlikely_(level->n_type < 1)) return c_variant_poison(cv, -EBADRQC); r = c_variant_signature_next(level->type, level->n_type, &info); assert(r == 1); r = c_variant_append(cv, basic, &info, 0, n_arg, &front, 0, NULL); if (r < 0) return r; memcpy(front, arg, n_arg); return 0; }
_public_ int sd_journal_send(const char *format, ...) { int r, i, j; va_list ap; struct iovec *iov = NULL; va_start(ap, format); i = fill_iovec_sprintf(format, ap, 0, &iov); va_end(ap); if (_unlikely_(i < 0)) { r = i; goto finish; } r = sd_journal_sendv(iov, i); finish: for (j = 0; j < i; j++) free(iov[j].iov_base); free(iov); return r; }
_public_ int sd_journal_sendv(const struct iovec *iov, int n) { PROTECT_ERRNO; int fd, r; _cleanup_close_ int buffer_fd = -1; struct iovec *w; uint64_t *l; int i, j = 0; static const union sockaddr_union sa = { .un.sun_family = AF_UNIX, .un.sun_path = "/run/systemd/journal/socket", }; struct msghdr mh = { .msg_name = (struct sockaddr*) &sa.sa, .msg_namelen = SOCKADDR_UN_LEN(sa.un), }; ssize_t k; bool have_syslog_identifier = false; bool seal = true; assert_return(iov, -EINVAL); assert_return(n > 0, -EINVAL); w = newa(struct iovec, n * 5 + 3); l = newa(uint64_t, n); for (i = 0; i < n; i++) { char *c, *nl; if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1)) return -EINVAL; c = memchr(iov[i].iov_base, '=', iov[i].iov_len); if (_unlikely_(!c || c == iov[i].iov_base)) return -EINVAL; have_syslog_identifier = have_syslog_identifier || (c == (char *) iov[i].iov_base + 17 && startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER")); nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len); if (nl) { if (_unlikely_(nl < c)) return -EINVAL; /* Already includes a newline? Bummer, then * let's write the variable name, then a * newline, then the size (64bit LE), followed * by the data and a final newline */ w[j].iov_base = iov[i].iov_base; w[j].iov_len = c - (char*) iov[i].iov_base; j++; IOVEC_SET_STRING(w[j++], "\n"); l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1); w[j].iov_base = &l[i]; w[j].iov_len = sizeof(uint64_t); j++; w[j].iov_base = c + 1; w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1; j++; } else /* Nothing special? Then just add the line and * append a newline */ w[j++] = iov[i]; IOVEC_SET_STRING(w[j++], "\n"); } if (!have_syslog_identifier && string_is_safe(program_invocation_short_name)) { /* Implicitly add program_invocation_short_name, if it * is not set explicitly. We only do this for * program_invocation_short_name, and nothing else * since everything else is much nicer to retrieve * from the outside. */ IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER="); IOVEC_SET_STRING(w[j++], program_invocation_short_name); IOVEC_SET_STRING(w[j++], "\n"); } fd = journal_fd(); if (_unlikely_(fd < 0)) return fd; mh.msg_iov = w; mh.msg_iovlen = j; k = sendmsg(fd, &mh, MSG_NOSIGNAL); if (k >= 0) return 0; /* Fail silently if the journal is not available */ if (errno == ENOENT) return 0; if (errno != EMSGSIZE && errno != ENOBUFS) return -errno; /* Message doesn't fit... Let's dump the data in a memfd or * temporary file and just pass a file descriptor of it to the * other side. * * For the temporary files we use /dev/shm instead of /tmp * here, since we want this to be a tmpfs, and one that is * available from early boot on and where unprivileged users * can create files. */ buffer_fd = memfd_new(NULL); if (buffer_fd < 0) { if (buffer_fd == -ENOSYS) { buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC); if (buffer_fd < 0) return buffer_fd; seal = false; } else return buffer_fd; } n = writev(buffer_fd, w, j); if (n < 0) return -errno; if (seal) { r = memfd_set_sealed(buffer_fd); if (r < 0) return r; } r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0); if (r == -ENOENT) /* Fail silently if the journal is not available */ return 0; return r; } static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) { PROTECT_ERRNO; size_t n, k; k = isempty(message) ? 0 : strlen(message) + 2; n = 8 + k + 256 + 1; for (;;) { char buffer[n]; char* j; errno = 0; j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k); if (errno == 0) { char error[sizeof("ERRNO=")-1 + DECIMAL_STR_MAX(int) + 1]; if (j != buffer + 8 + k) memmove(buffer + 8 + k, j, strlen(j)+1); memcpy(buffer, "MESSAGE=", 8); if (k > 0) { memcpy(buffer + 8, message, k - 2); memcpy(buffer + 8 + k - 2, ": ", 2); } xsprintf(error, "ERRNO=%i", _saved_errno_); assert_cc(3 == LOG_ERR); IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3"); IOVEC_SET_STRING(iov[skip+1], buffer); IOVEC_SET_STRING(iov[skip+2], error); return sd_journal_sendv(iov, skip + 3); } if (errno != ERANGE) return -errno; n *= 2; } } _public_ int sd_journal_perror(const char *message) { struct iovec iovec[3]; return fill_iovec_perror_and_send(message, 0, iovec); } _public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) { static const union sockaddr_union sa = { .un.sun_family = AF_UNIX, .un.sun_path = "/run/systemd/journal/stdout", }; _cleanup_close_ int fd = -1; char *header; size_t l; int r; assert_return(priority >= 0, -EINVAL); assert_return(priority <= 7, -EINVAL); fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); if (fd < 0) return -errno; r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); if (r < 0) return -errno; if (shutdown(fd, SHUT_RD) < 0) return -errno; fd_inc_sndbuf(fd, SNDBUF_SIZE); if (!identifier) identifier = ""; l = strlen(identifier); header = alloca(l + 1 + 1 + 2 + 2 + 2 + 2 + 2); memcpy(header, identifier, l); header[l++] = '\n'; header[l++] = '\n'; /* unit id */ header[l++] = '0' + priority; header[l++] = '\n'; header[l++] = '0' + !!level_prefix; header[l++] = '\n'; header[l++] = '0'; header[l++] = '\n'; header[l++] = '0'; header[l++] = '\n'; header[l++] = '0'; header[l++] = '\n'; r = loop_write(fd, header, l, false); if (r < 0) return r; r = fd; fd = -1; return r; } _public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) { int r; va_list ap; va_start(ap, format); r = sd_journal_printv_with_location(priority, file, line, func, format, ap); va_end(ap); return r; } _public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) { char buffer[8 + LINE_MAX], p[sizeof("PRIORITY=")-1 + DECIMAL_STR_MAX(int) + 1]; struct iovec iov[5]; char *f; assert_return(priority >= 0, -EINVAL); assert_return(priority <= 7, -EINVAL); assert_return(format, -EINVAL); xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK); memcpy(buffer, "MESSAGE=", 8); vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap); /* Strip trailing whitespace, keep prefixing whitespace */ (void) strstrip(buffer); /* Suppress empty lines */ if (isempty(buffer+8)) return 0; /* func is initialized from __func__ which is not a macro, but * a static const char[], hence cannot easily be prefixed with * CODE_FUNC=, hence let's do it manually here. */ ALLOCA_CODE_FUNC(f, func); zero(iov); IOVEC_SET_STRING(iov[0], buffer); IOVEC_SET_STRING(iov[1], p); IOVEC_SET_STRING(iov[2], file); IOVEC_SET_STRING(iov[3], line); IOVEC_SET_STRING(iov[4], f); return sd_journal_sendv(iov, ELEMENTSOF(iov)); }
int mount_cgroup_controllers(char ***join_controllers) { _cleanup_set_free_free_ Set *controllers = NULL; int r; if (!cg_is_legacy_wanted()) return 0; /* Mount all available cgroup controllers that are built into the kernel. */ controllers = set_new(&string_hash_ops); if (!controllers) return log_oom(); r = cg_kernel_controllers(controllers); if (r < 0) return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); for (;;) { _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; MountPoint p = { .what = "cgroup", .type = "cgroup", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .mode = MNT_IN_CONTAINER, }; char ***k = NULL; controller = set_steal_first(controllers); if (!controller) break; if (join_controllers) for (k = join_controllers; *k; k++) if (strv_find(*k, controller)) break; if (k && *k) { char **i, **j; for (i = *k, j = *k; *i; i++) { if (!streq(*i, controller)) { _cleanup_free_ char *t; t = set_remove(controllers, *i); if (!t) { free(*i); continue; } } *(j++) = *i; } *j = NULL; options = strv_join(*k, ","); if (!options) return log_oom(); } else { options = controller; controller = NULL; } where = strappend("/sys/fs/cgroup/", options); if (!where) return log_oom(); p.where = where; p.options = options; r = mount_one(&p, true); if (r < 0) return r; if (r > 0 && k && *k) { char **i; for (i = *k; *i; i++) { _cleanup_free_ char *t = NULL; t = strappend("/sys/fs/cgroup/", *i); if (!t) return log_oom(); r = symlink(options, t); if (r < 0 && errno != EEXIST) return log_error_errno(errno, "Failed to create symlink %s: %m", t); #ifdef SMACK_RUN_LABEL r = mac_smack_copy(t, options); if (r < 0 && r != -EOPNOTSUPP) return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", options, t); #endif } } } /* Now that we mounted everything, let's make the tmpfs the * cgroup file systems are mounted into read-only. */ (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); return 0; } #if defined(HAVE_SELINUX) || defined(HAVE_SMACK) static int nftw_cb( const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { /* No need to label /dev twice in a row... */ if (_unlikely_(ftwbuf->level == 0)) return FTW_CONTINUE; label_fix(fpath, false, false); /* /run/initramfs is static data and big, no need to * dynamically relabel its contents at boot... */ if (_unlikely_(ftwbuf->level == 1 && tflag == FTW_D && streq(fpath, "/run/initramfs"))) return FTW_SKIP_SUBTREE; return FTW_CONTINUE; }; #endif int mount_setup(bool loaded_policy) { unsigned i; int r = 0; for (i = 0; i < ELEMENTSOF(mount_table); i ++) { int j; j = mount_one(mount_table + i, loaded_policy); if (j != 0 && r >= 0) r = j; } if (r < 0) return r; #if defined(HAVE_SELINUX) || defined(HAVE_SMACK) /* Nodes in devtmpfs and /run need to be manually updated for * the appropriate labels, after mounting. The other virtual * API file systems like /sys and /proc do not need that, they * use the same label for all their files. */ if (loaded_policy) { usec_t before_relabel, after_relabel; char timespan[FORMAT_TIMESPAN_MAX]; before_relabel = now(CLOCK_MONOTONIC); nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); after_relabel = now(CLOCK_MONOTONIC); log_info("Relabelled /dev and /run in %s.", format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0)); } #endif /* Create a few default symlinks, which are normally created * by udevd, but some scripts might need them before we start * udevd. */ dev_setup(NULL, UID_INVALID, GID_INVALID); /* Mark the root directory as shared in regards to mount * propagation. The kernel defaults to "private", but we think * it makes more sense to have a default of "shared" so that * nspawn and the container tools work out of the box. If * specific setups need other settings they can reset the * propagation mode to private if needed. */ if (detect_container() <= 0) if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0) log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m"); /* Create a few directories we always want around, Note that * sd_booted() checks for /run/systemd/system, so this mkdir * really needs to stay for good, otherwise software that * copied sd-daemon.c into their sources will misdetect * systemd. */ mkdir_label("/run/systemd", 0755); mkdir_label("/run/systemd/system", 0755); mkdir_label("/run/systemd/inaccessible", 0000); return 0; }
static int c_variant_insert_one(CVariant *cv, const char *type, const struct iovec *vecs, size_t n_vecs, size_t size) { CVariantLevel *level; CVariantType info; size_t n_type, i, idx; struct iovec *v; uint64_t frame; int r; level = cv->state->levels + cv->state->i_levels; if (_unlikely_(level->n_type < 1)) return c_variant_poison(cv, -EBADRQC); r = c_variant_signature_next(level->type, level->n_type, &info); assert(r == 1); n_type = strlen(type); if (_unlikely_(n_type != info.n_type || strncmp(type, info.type, n_type))) return c_variant_poison(cv, -EBADRQC); if (_unlikely_(info.size > 0 && size != info.size)) return c_variant_poison(cv, -EBADMSG); r = c_variant_append(cv, *type, &info, n_vecs + 1, 0, NULL, 0, NULL); if (r < 0) return r; /* make sure there are at least 'n_vecs + 1' unused vectors */ assert(cv->n_vecs - level->v_front - level->v_tail - 2U >= n_vecs + 1U); /* * Clip the current front and prepare the next vector with the * remaining buffer space. Then insert the requested vectors in between * both and verify alignment restrictions. */ v = cv->vecs + level->v_front; v[n_vecs + 1].iov_base = (char *)v->iov_base + level->i_front; v[n_vecs + 1].iov_len = v->iov_len - level->i_front; v->iov_len = level->i_front; for (i = 0; i < n_vecs; ++i) { idx = level->v_front + i + 1; if (((char *)(cv->vecs + cv->n_vecs))[idx]) { ((char *)(cv->vecs + cv->n_vecs))[idx] = false; free((cv->vecs + idx)->iov_base); } cv->vecs[idx] = vecs[i]; } level->v_front += n_vecs + 1; level->i_front = 0; level->offset += size; /* see c_variant_end_one(); we have to update the framing offset */ if (info.size < 1) { switch (level->enclosing) { case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: /* last element never stores framing offsets */ if (level->n_type < 1) break; /* fallthrough */ case C_VARIANT_ARRAY: assert(level->i_tail >= 8); assert(!(level->i_tail & 7)); v = cv->vecs + cv->n_vecs - level->v_tail - 1; frame = level->offset; memcpy((char *)v->iov_base + level->i_tail - 8, &frame, 8); break; } } return 0; }
static int c_variant_begin_one(CVariant *cv, char container, const char *variant) { CVariantLevel *next, *level; CVariantType info; size_t n_tail; void *tail; int r; r = c_variant_ensure_level(cv); if (r < 0) return r; if (container == C_VARIANT_VARIANT) n_tail = strlen(variant); else n_tail = 0; level = cv->state->levels + cv->state->i_levels; if (_unlikely_(level->n_type < 1)) return c_variant_poison(cv, -EBADRQC); r = c_variant_signature_next(level->type, level->n_type, &info); assert(r == 1); r = c_variant_append(cv, container, &info, 0, 0, NULL, n_tail, &tail); if (r < 0) return r; c_variant_push_level(cv); next = cv->state->levels + cv->state->i_levels; next->size = info.size; next->i_tail = level->i_tail; next->v_tail = level->v_tail; /* wordsize is unused */ next->enclosing = container; next->v_front = level->v_front; next->i_front = level->i_front; next->index = 0; next->offset = 0; switch (container) { case C_VARIANT_VARIANT: memcpy(tail, variant, n_tail); next->i_tail += n_tail; next->n_type = n_tail; next->index = n_tail; next->type = tail; break; case C_VARIANT_MAYBE: case C_VARIANT_ARRAY: next->n_type = info.n_type - 1; next->type = info.type + 1; break; case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: next->n_type = info.n_type - 2; next->type = info.type + 1; break; default: assert(0); break; } return 0; }
int mount_cgroup_controllers(char ***join_controllers) { int r; char buf[LINE_MAX]; _cleanup_set_free_free_ Set *controllers = NULL; _cleanup_fclose_ FILE *f; /* Mount all available cgroup controllers that are built into the kernel. */ f = fopen("/proc/cgroups", "re"); if (!f) { log_error("Failed to enumerate cgroup controllers: %m"); return 0; } controllers = set_new(string_hash_func, string_compare_func); if (!controllers) return log_oom(); /* Ignore the header line */ (void) fgets(buf, sizeof(buf), f); for (;;) { char *controller; int enabled = 0; if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) { if (feof(f)) break; log_error("Failed to parse /proc/cgroups."); return -EIO; } if (!enabled) { free(controller); continue; } r = set_consume(controllers, controller); if (r < 0) { log_error("Failed to add controller to set."); return r; } } for (;;) { MountPoint p = { .what = "cgroup", .type = "cgroup", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .mode = MNT_IN_CONTAINER, }; char ***k = NULL; _cleanup_free_ char *options = NULL, *controller; controller = set_steal_first(controllers); if (!controller) break; if (join_controllers) for (k = join_controllers; *k; k++) if (strv_find(*k, controller)) break; if (k && *k) { char **i, **j; for (i = *k, j = *k; *i; i++) { if (!streq(*i, controller)) { char _cleanup_free_ *t; t = set_remove(controllers, *i); if (!t) { free(*i); continue; } } *(j++) = *i; } *j = NULL; options = strv_join(*k, ","); if (!options) return log_oom(); } else { options = controller; controller = NULL; } p.where = strappenda("/sys/fs/cgroup/", options); p.options = options; r = mount_one(&p, true); if (r < 0) return r; if (r > 0 && k && *k) { char **i; for (i = *k; *i; i++) { char *t = strappenda("/sys/fs/cgroup/", *i); r = symlink(options, t); if (r < 0 && errno != EEXIST) { log_error("Failed to create symlink %s: %m", t); return -errno; } } } } return 0; } static int nftw_cb( const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { /* No need to label /dev twice in a row... */ if (_unlikely_(ftwbuf->level == 0)) return FTW_CONTINUE; label_fix(fpath, false, false); /* /run/initramfs is static data and big, no need to * dynamically relabel its contents at boot... */ if (_unlikely_(ftwbuf->level == 1 && tflag == FTW_D && streq(fpath, "/run/initramfs"))) return FTW_SKIP_SUBTREE; return FTW_CONTINUE; }; int mount_setup(bool loaded_policy) { int r; unsigned i; for (i = 0; i < ELEMENTSOF(mount_table); i ++) { r = mount_one(mount_table + i, true); if (r < 0) return r; } /* Nodes in devtmpfs and /run need to be manually updated for * the appropriate labels, after mounting. The other virtual * API file systems like /sys and /proc do not need that, they * use the same label for all their files. */ if (loaded_policy) { usec_t before_relabel, after_relabel; char timespan[FORMAT_TIMESPAN_MAX]; before_relabel = now(CLOCK_MONOTONIC); nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); after_relabel = now(CLOCK_MONOTONIC); log_info("Relabelled /dev and /run in %s.", format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0)); } /* Create a few default symlinks, which are normally created * by udevd, but some scripts might need them before we start * udevd. */ dev_setup(NULL); /* Mark the root directory as shared in regards to mount * propagation. The kernel defaults to "private", but we think * it makes more sense to have a default of "shared" so that * nspawn and the container tools work out of the box. If * specific setups need other settings they can reset the * propagation mode to private if needed. */ if (detect_container(NULL) <= 0) if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0) log_warning("Failed to set up the root directory for shared mount propagation: %m"); /* Create a few directories we always want around, Note that * sd_booted() checks for /run/systemd/system, so this mkdir * really needs to stay for good, otherwise software that * copied sd-daemon.c into their sources will misdetect * systemd. */ mkdir_label("/run/systemd", 0755); mkdir_label("/run/systemd/system", 0755); mkdir_label("/run/systemd/inaccessible", 0000); return 0; }
_public_ int sd_journal_sendv(const struct iovec *iov, int n) { PROTECT_ERRNO; int fd; struct iovec *w; uint64_t *l; int i, j = 0; struct sockaddr_un sa = { .sun_family = AF_UNIX, .sun_path = JOURNAL_RUNDIR "/socket", }; struct msghdr mh = { .msg_name = &sa, .msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(sa.sun_path), }; ssize_t k; union { struct cmsghdr cmsghdr; uint8_t buf[CMSG_SPACE(sizeof(int))]; } control; bool have_syslog_identifier = false; assert_return(iov, -EINVAL); assert_return(n > 0, -EINVAL); w = alloca(sizeof(struct iovec) * n * 5 + 3); l = alloca(sizeof(uint64_t) * n); for (i = 0; i < n; i++) { char *c, *nl; if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1)) return -EINVAL; c = memchr(iov[i].iov_base, '=', iov[i].iov_len); if (_unlikely_(!c || c == iov[i].iov_base)) return -EINVAL; have_syslog_identifier = have_syslog_identifier || (c == (char *) iov[i].iov_base + 17 && startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER")); nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len); if (nl) { if (_unlikely_(nl < c)) return -EINVAL; /* Already includes a newline? Bummer, then * let's write the variable name, then a * newline, then the size (64bit LE), followed * by the data and a final newline */ w[j].iov_base = iov[i].iov_base; w[j].iov_len = c - (char*) iov[i].iov_base; j++; IOVEC_SET_STRING(w[j++], "\n"); l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1); w[j].iov_base = &l[i]; w[j].iov_len = sizeof(uint64_t); j++; w[j].iov_base = c + 1; w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1; j++; } else /* Nothing special? Then just add the line and * append a newline */ w[j++] = iov[i]; IOVEC_SET_STRING(w[j++], "\n"); } if (!have_syslog_identifier) { /* Implicitly add program_invocation_short_name, if it * is not set explicitly. We only do this for * program_invocation_short_name, and nothing else * since everything else is much nicer to retrieve * from the outside. */ IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER="); IOVEC_SET_STRING(w[j++], program_invocation_short_name); IOVEC_SET_STRING(w[j++], "\n"); } fd = journal_fd(); if (_unlikely_(fd < 0)) return fd; mh.msg_iov = w; mh.msg_iovlen = j; k = sendmsg(fd, &mh, MSG_NOSIGNAL); if (k >= 0) return 0; /* Fail silently if the journal is not available */ if (errno == ENOENT) return 0; if (errno != EMSGSIZE && errno != ENOBUFS) return -errno; return 0; } static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) { PROTECT_ERRNO; size_t n, k; k = isempty(message) ? 0 : strlen(message) + 2; n = 8 + k + 256 + 1; for (;;) { char buffer[n]; char* j; errno = 0; j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k); if (errno == 0) { char error[6 + 10 + 1]; /* for a 32bit value */ if (j != buffer + 8 + k) memmove(buffer + 8 + k, j, strlen(j)+1); memcpy(buffer, "MESSAGE=", 8); if (k > 0) { memcpy(buffer + 8, message, k - 2); memcpy(buffer + 8 + k - 2, ": ", 2); } snprintf(error, sizeof(error), "ERRNO=%u", _saved_errno_); char_array_0(error); IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3"); IOVEC_SET_STRING(iov[skip+1], buffer); IOVEC_SET_STRING(iov[skip+2], error); return sd_journal_sendv(iov, skip + 3); } if (errno != ERANGE) return -errno; n *= 2; } } _public_ int sd_journal_perror(const char *message) { struct iovec iovec[3]; return fill_iovec_perror_and_send(message, 0, iovec); }
static int c_variant_append(CVariant *cv, char element, CVariantType *info, size_t n_extra_vecs, size_t n_front, void **frontp, size_t n_unaccounted_tail, void **tailp) { CVariantLevel *level = cv->state->levels + cv->state->i_levels; bool need_frame = false; void *tail; int r; /* * XXX */ if (_unlikely_(level->n_type < 1 || *level->type != element)) return c_variant_poison(cv, -EBADRQC); assert(info->size == 0 || n_front == 0 || n_front == info->size); switch (level->enclosing) { case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: if (info->n_type >= level->n_type) break; /* fallthrough */ case C_VARIANT_ARRAY: need_frame = (info->size < 1); break; } /* * If we need to store a frame pointer, we *must* guarantee 8-byte * alignment and allocate an extra 8 bytes at the tail. * We always additionally allocate @n_unaccounted_tail bytes at the * tail, which have *NO* alignment guarantees. But those bytes are * *unaccounted*, that is, we immediately subtract them from the tail * marker of this level again. */ r = c_variant_reserve(cv, n_extra_vecs, info->alignment, n_front, frontp, need_frame ? 3 : 0, n_unaccounted_tail + (need_frame ? 8 : 0), &tail); if (r < 0) return r; /* de-account extra tail-space */ assert(n_unaccounted_tail <= level->i_tail); level->i_tail -= n_unaccounted_tail; /* store frame */ if (need_frame) { ++level->index; *(uint64_t *)tail = level->offset; tail = (char *)tail + 8; } switch (level->enclosing) { case C_VARIANT_ARRAY: break; case C_VARIANT_MAYBE: /* write maybe-marker for non-empty, dynamic maybes */ if (info->size < 1) ++level->index; /* fallthrough */ default: level->type += info->n_type; level->n_type -= info->n_type; break; } if (tailp) *tailp = tail; return 0; }
static int c_variant_reserve(CVariant *cv, size_t n_extra_vecs, size_t front_alignment, size_t front_allocation, void **frontp, size_t tail_alignment, size_t tail_allocation, void **tailp) { CVariantLevel *level; size_t i, j, n, rem, n_front, n_tail; struct iovec *vec_front, *vec_tail; void *p; int r; /* * This advances the front and tail markers according to the requested * allocation size. If an alignment is given, the start is aligned * before the marker is advanced. If required, new buffer space is * allocated. * * On success, a pointer to the start of each reserved buffer space is * returned in @frontp and @tailp. On failure, both markers will stay * untouched. * * Note that front-alignment is always according to the global * alignment (i.e., it adheres to level->offset (and as such iov_base) * rather than level->i_front). But tail-alignment is always local-only * (adhering to level->i_tail). There is no global context for tail * space, so no way to align it as such. */ /* both are mapped, hence cannot overflow size_t (with alignment) */ assert(front_allocation + tail_allocation + 16 > front_allocation); level = cv->state->levels + cv->state->i_levels; n_front = front_allocation + ALIGN_TO(level->offset, 1 << front_alignment) - level->offset; n_tail = tail_allocation + ALIGN_TO(level->i_tail, 1 << tail_alignment) - level->i_tail; vec_front = cv->vecs + level->v_front; vec_tail = cv->vecs + cv->n_vecs - level->v_tail - 1; /* * If the remaining space is not enough to fullfill the request, search * through the unused vectors, in case there is unused buffer space * that is sufficient for the request. If we find one, move it directly * next to our current vector, so we can jump over. */ if (n_front > vec_front->iov_len - level->i_front) { for (i = 1; vec_front + i < vec_tail; ++i) { if (n_front > (vec_front + i)->iov_len) continue; c_variant_swap_vecs(cv, (vec_front + i) - cv->vecs, (vec_front + 1) - cv->vecs); ++vec_front; n_front = 0; break; } } else if (n_front > 0) { /* fits into @vec_front */ n_front = 0; } /* counter-part for tail-allocation */ if (n_tail > vec_tail->iov_len - level->i_tail) { for (i = 1; vec_tail - i > vec_front; ++i) { if (n_tail > (vec_tail - i)->iov_len) continue; c_variant_swap_vecs(cv, (vec_tail - i) - cv->vecs, (vec_tail - 1) - cv->vecs); --vec_tail; n_tail = 0; break; } } else if (n_tail > 0) { /* fits into @vec_tail */ n_tail = 0; } n = vec_tail - vec_front - 1; if (_unlikely_(n < n_extra_vecs + 2 * !!(n_front || n_tail))) { /* remember tail-index since realloc might move it */ j = vec_front - cv->vecs; i = cv->n_vecs - (vec_tail - cv->vecs); r = c_variant_insert_vecs(cv, j + 1, n_extra_vecs + 2); if (r < 0) return r; /* re-calculate vectors, as they might have moved */ vec_front = cv->vecs + j; vec_tail = cv->vecs + cv->n_vecs - i; } /* if either is non-zero, we need a new buffer allocation */ if (_unlikely_(n_front || n_tail)) { /* * Now that we have the iovecs, we need the actual buffer * space. We start with 2^12 bytes (4k / one page), and * increase it for each allocated buffer by a factor of 2, up * to an arbitrary limit of 2^31. */ n = 1 << (12 + ((cv->a_vecs > 19) ? 19 : cv->a_vecs)); if (n < n_front + n_tail + 16) n = n_front + n_tail + 16; p = malloc(n); if (!p) { n = n_front + n_tail + 16; p = malloc(n); if (!p) return c_variant_poison(cv, -ENOMEM); } /* count how often we allocated; protect against overflow */ if (++cv->a_vecs < 1) --cv->a_vecs; if (n_front) { ++vec_front; if (((char *)(cv->vecs + cv->n_vecs))[vec_front - cv->vecs]) free(vec_front->iov_base); vec_front->iov_base = p; vec_front->iov_len = n; ((char *)(cv->vecs + cv->n_vecs))[vec_front - cv->vecs] = true; } if (n_tail) { --vec_tail; if (((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs]) free(vec_tail->iov_base); vec_tail->iov_base = p; vec_tail->iov_len = n; ((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs] = true; } if (n_front && n_tail) { /* if both allocated, we need to split properly */ rem = n - n_front - n_tail - 16; vec_front->iov_len = n_front + 8 + (rem * C_VARIANT_FRONT_SHARE / 100); vec_tail->iov_base = (char *)p + vec_front->iov_len; vec_tail->iov_len = n - vec_front->iov_len; ((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs] = false; } } if (vec_front != cv->vecs + level->v_front) { /* vector was updated; clip previous and then advance */ assert(vec_front - 1 == cv->vecs + level->v_front); (vec_front - 1)->iov_len = level->i_front; ++level->v_front; level->i_front = 0; /* front vectors must be aligned according to current offset */ assert(vec_front->iov_base == ALIGN_PTR_TO(vec_front->iov_base, 8)); n = level->offset & 7; vec_front->iov_base = (char *)vec_front->iov_base + n; vec_front->iov_len -= n; } if (vec_tail != cv->vecs + cv->n_vecs - level->v_tail - 1) { /* vector was updated; clip previous and then advance */ assert(vec_tail + 1 == cv->vecs + cv->n_vecs - level->v_tail - 1); (vec_tail + 1)->iov_len = level->i_tail; ++level->v_tail; level->i_tail = 0; } /* * We are done! Apply alignment before returning a pointer to the * reserved space. Then advance the iterators, so the space is actually * reserved and will not get re-used. */ n = ALIGN_TO(level->offset, 1 << front_alignment) - level->offset; memset((char *)vec_front->iov_base + level->i_front, 0, n); level->i_front += n; level->offset += n; level->i_tail = ALIGN_TO(level->i_tail, 1 << tail_alignment); if (frontp) *frontp = (char *)vec_front->iov_base + level->i_front; if (tailp) *tailp = (char *)vec_tail->iov_base + level->i_tail; level->i_front += front_allocation; level->offset += front_allocation; level->i_tail += tail_allocation; return 0; }
static int c_variant_end_one(CVariant *cv) { CVariantLevel *prev, *level; size_t i, n, wz, rem; void *front, *tail; struct iovec *v; uint64_t frame; int r, step; if (_unlikely_(c_variant_on_root_level(cv))) return c_variant_poison(cv, -EBADRQC); prev = cv->state->levels + cv->state->i_levels; wz = c_variant_word_size(prev->offset, prev->index); switch (prev->enclosing) { case C_VARIANT_VARIANT: n = prev->index + 1; break; case C_VARIANT_MAYBE: n = !!(prev->index > 0); break; case C_VARIANT_ARRAY: case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: n = prev->index * (1 << wz); break; default: assert(0); return c_variant_poison(cv, -EFAULT); } if (prev->size < 1) { /* * Variable-size container which requires 'n' additional front * bytes for framing-offsets and other management data. No * alignment is enforced, not is trailing padding added. */ r = c_variant_reserve(cv, 0, 0, n, &front, 0, 0, &tail); if (r < 0) return r; } else { /* * Fixed-size container of size prev->size. We *must* ensure * the container has a size multiple of its alignment, hence, * we add trailing zero bytes as padding here. */ assert(!n); assert(prev->offset <= prev->size); n = prev->size - prev->offset; r = c_variant_reserve(cv, 0, 0, n, &front, 0, 0, &tail); if (r < 0) return r; memset(front, 0, n); } c_variant_pop_level(cv); level = cv->state->levels + cv->state->i_levels; switch (prev->enclosing) { case C_VARIANT_VARIANT: *(char *)front = 0; memcpy((char *)front + 1, (char *)tail - prev->index, prev->index); break; case C_VARIANT_MAYBE: if (prev->index > 0) *(char *)front = 0; break; case C_VARIANT_ARRAY: case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: /* backwards-iteration for arrays, to revert frame oder */ if (prev->enclosing == C_VARIANT_ARRAY) { i = prev->index - 1; step = -1; } else { i = 0; step = 1; } v = cv->vecs + cv->n_vecs - prev->v_tail - 1; rem = prev->i_tail; for (n = prev->index; n-- > 0; i += step) { while (_unlikely_(rem < 8)) { assert(rem == 0); ++v; rem = v->iov_len; assert(!(rem & 7)); } rem -= 8; c_variant_word_store((char *)front + i * (1 << wz), wz, *(uint64_t *)((char *)v->iov_base + rem)); } break; } /* * Advance parent level by the size of the completed child. Note that * the parent-level was already aligned correctly when entered. Hence, * prev->offset correctly reflects the difference in bytes between * both fronts. */ level->v_front = prev->v_front; level->i_front = prev->i_front; level->offset += prev->offset; /* * If this was a dynamic-sized type, we must store the framing-offset * at the tail. Memory for it was already reserved when the container * was created, we just recover the pointer to it and write the now * known framing offset. * Note that for tuples we never write a framing offset for the last * type. This also guarantees that the root-level never writes framing * offsets (root-level can only be a single type, rather than a full * signature). * Containers with a single entry never store framing offsets. In those * cases we can skip the operation. * * This only stores out internal state-tracking at the tail buffer. * This is *not* the final serialized data. Only once the full * container is closed, the state-array is properly serialized. */ if (prev->size < 1) { switch (level->enclosing) { case C_VARIANT_TUPLE_OPEN: case C_VARIANT_PAIR_OPEN: /* last element never stores framing offsets */ if (level->n_type < 1) break; /* fallthrough */ case C_VARIANT_ARRAY: assert(level->i_tail >= 8); assert(!(level->i_tail & 7)); v = cv->vecs + cv->n_vecs - level->v_tail - 1; frame = level->offset; memcpy((char *)v->iov_base + level->i_tail - 8, &frame, 8); break; } } return 0; }