Ejemplo n.º 1
0
void server_forward_kmsg(
        Server *s,
        int priority,
        const char *identifier,
        const char *message,
        struct ucred *ucred) {

        struct iovec iovec[5];
        char header_priority[6], header_pid[16];
        int n = 0;
        char *ident_buf = NULL;

        assert(s);
        assert(priority >= 0);
        assert(priority <= 999);
        assert(message);

        if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
                return;

        if (_unlikely_(s->dev_kmsg_fd < 0))
                return;

        /* Never allow messages with kernel facility to be written to
         * kmsg, regardless where the data comes from. */
        priority = syslog_fixup_facility(priority);

        /* First: priority field */
        snprintf(header_priority, sizeof(header_priority), "<%i>", priority);
        char_array_0(header_priority);
        IOVEC_SET_STRING(iovec[n++], header_priority);

        /* Second: identifier and PID */
        if (ucred) {
                if (!identifier) {
                        get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

                snprintf(header_pid, sizeof(header_pid), "[%lu]: ", (unsigned long) ucred->pid);
                char_array_0(header_pid);

                if (identifier)
                        IOVEC_SET_STRING(iovec[n++], identifier);

                IOVEC_SET_STRING(iovec[n++], header_pid);
        } else if (identifier) {
                IOVEC_SET_STRING(iovec[n++], identifier);
                IOVEC_SET_STRING(iovec[n++], ": ");
        }

        /* Fourth: message */
        IOVEC_SET_STRING(iovec[n++], message);
        IOVEC_SET_STRING(iovec[n++], "\n");

        if (writev(s->dev_kmsg_fd, iovec, n) < 0)
                log_debug("Failed to write to /dev/kmsg for logging: %m");

        free(ident_buf);
}
Ejemplo n.º 2
0
void server_forward_kmsg(
        Server *s,
        int priority,
        const char *identifier,
        const char *message,
        const struct ucred *ucred) {

        _cleanup_free_ char *ident_buf = NULL;
        struct iovec iovec[5];
        char header_priority[DECIMAL_STR_MAX(priority) + 3],
             header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1];
        int n = 0;

        assert(s);
        assert(priority >= 0);
        assert(priority <= 999);
        assert(message);

        if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
                return;

        if (_unlikely_(s->dev_kmsg_fd < 0))
                return;

        /* Never allow messages with kernel facility to be written to
         * kmsg, regardless where the data comes from. */
        priority = syslog_fixup_facility(priority);

        /* First: priority field */
        xsprintf(header_priority, "<%i>", priority);
        iovec[n++] = IOVEC_MAKE_STRING(header_priority);

        /* Second: identifier and PID */
        if (ucred) {
                if (!identifier) {
                        get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

                xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);

                if (identifier)
                        iovec[n++] = IOVEC_MAKE_STRING(identifier);

                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
        } else if (identifier) {
                iovec[n++] = IOVEC_MAKE_STRING(identifier);
                iovec[n++] = IOVEC_MAKE_STRING(": ");
        }

        /* Fourth: message */
        iovec[n++] = IOVEC_MAKE_STRING(message);
        iovec[n++] = IOVEC_MAKE_STRING("\n");

        if (writev(s->dev_kmsg_fd, iovec, n) < 0)
                log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
}
Ejemplo n.º 3
0
int sigbus_pop(void **ret) {
        assert(ret);

        for (;;) {
                unsigned u, c;

                __sync_synchronize();
                c = n_sigbus_queue;

                if (_likely_(c == 0))
                        return 0;

                if (_unlikely_(c >= SIGBUS_QUEUE_MAX))
                        return -EOVERFLOW;

                for (u = 0; u < SIGBUS_QUEUE_MAX; u++) {
                        void *addr;

                        addr = sigbus_queue[u];
                        if (!addr)
                                continue;

                        if (__sync_bool_compare_and_swap(&sigbus_queue[u], addr, NULL)) {
                                __sync_fetch_and_sub(&n_sigbus_queue, 1);
                                *ret = addr;
                                return 1;
                        }
                }
        }
}
Ejemplo n.º 4
0
_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
        int r, i, j;
        va_list ap;
        struct iovec *iov = NULL;
        char *f;

        va_start(ap, format);
        i = fill_iovec_sprintf(format, ap, 3, &iov);
        va_end(ap);

        if (_unlikely_(i < 0)) {
                r = i;
                goto finish;
        }

        ALLOCA_CODE_FUNC(f, func);

        IOVEC_SET_STRING(iov[0], file);
        IOVEC_SET_STRING(iov[1], line);
        IOVEC_SET_STRING(iov[2], f);

        r = sd_journal_sendv(iov, i);

finish:
        for (j = 3; j < i; j++)
                free(iov[j].iov_base);

        free(iov);

        return r;
}
Ejemplo n.º 5
0
/**
 * c_variant_beginv() - begin a new container
 * @cv:         variant to operate on, or NULL
 * @containers: containers to write, or NULL
 * @args:       additional parameters
 *
 * This begins writing a new container to @cv, moving the iterator into the
 * container for following writes. The containers to enter have to be specified
 * via @containers (if NULL, the next container is entered). Whenever you enter
 * a variant, you must specify the type for the entire variant as another
 * argument in @args.
 *
 * Valid elements for @containers are:
 *   'v' to begin a variant
 *   'm' to begin a maybe
 *   'a' to begin an array
 *   '(' to begin a tuple
 *   '{' to begin a pair
 *
 * It is an programming error to call this on a sealed variant.
 *
 * Return: 0 on success, negative error code on failure.
 */
_public_ int c_variant_beginv(CVariant *cv, const char *containers, va_list args) {
        CVariantLevel *level;
        const char *type;
        int r;

        if (_unlikely_(!cv))
                return -ENOTUNIQ;

        assert(!cv->sealed);

        if (containers) {
                for ( ; *containers; ++containers) {
                        type = NULL;
                        switch (*containers) {
                        case C_VARIANT_VARIANT:
                                type = va_arg(args, const char *);
                                /* fallthrough */
                        case C_VARIANT_MAYBE:
                        case C_VARIANT_ARRAY:
                        case C_VARIANT_TUPLE_OPEN:
                        case C_VARIANT_PAIR_OPEN:
                                r = c_variant_begin_one(cv, *containers, type);
                                if (r < 0)
                                        return r;
                                break;
                        default:
                                return c_variant_poison(cv, -EMEDIUMTYPE);
                        }
                }
        } else {
Ejemplo n.º 6
0
bool on_tty(void) {
        static int cached_on_tty = -1;

        if (_unlikely_(cached_on_tty < 0))
                cached_on_tty = isatty(STDOUT_FILENO) > 0;

        return cached_on_tty;
}
Ejemplo n.º 7
0
bool is_main_thread(void) {
        static thread_local int cached = 0;

        if (_unlikely_(cached == 0))
                cached = getpid() == gettid() ? 1 : -1;

        return cached > 0;
}
Ejemplo n.º 8
0
int getenv_for_pid(pid_t pid, const char *field, char **_value) {
        _cleanup_fclose_ FILE *f = NULL;
        char *value = NULL;
        int r;
        bool done = false;
        size_t l;
        const char *path;

        assert(pid >= 0);
        assert(field);
        assert(_value);

        path = procfs_file_alloca(pid, "environ");

        f = fopen(path, "re");
        if (!f) {
                if (errno == ENOENT)
                        return -ESRCH;
                return -errno;
        }

        l = strlen(field);
        r = 0;

        do {
                char line[LINE_MAX];
                unsigned i;

                for (i = 0; i < sizeof(line)-1; i++) {
                        int c;

                        c = getc(f);
                        if (_unlikely_(c == EOF)) {
                                done = true;
                                break;
                        } else if (c == 0)
                                break;

                        line[i] = c;
                }
                line[i] = 0;

                if (memcmp(line, field, l) == 0 && line[l] == '=') {
                        value = strdup(line + l + 1);
                        if (!value)
                                return -ENOMEM;

                        r = 1;
                        break;
                }

        } while (!done);

        *_value = value;
        return r;
}
Ejemplo n.º 9
0
void* mempool_alloc_tile(struct mempool *mp) {
        unsigned i;

        /* When a tile is released we add it to the list and simply
         * place the next pointer at its offset 0. */

        assert(mp->tile_size >= sizeof(void*));
        assert(mp->at_least > 0);

        if (mp->freelist) {
                void *r;

                r = mp->freelist;
                mp->freelist = * (void**) mp->freelist;
                return r;
        }

        if (_unlikely_(!mp->first_pool) ||
            _unlikely_(mp->first_pool->n_used >= mp->first_pool->n_tiles)) {
                unsigned n;
                size_t size;
                struct pool *p;

                n = mp->first_pool ? mp->first_pool->n_tiles : 0;
                n = MAX(mp->at_least, n * 2);
                size = PAGE_ALIGN(ALIGN(sizeof(struct pool)) + n*mp->tile_size);
                n = (size - ALIGN(sizeof(struct pool))) / mp->tile_size;

                p = malloc(size);
                if (!p)
                        return NULL;

                p->next = mp->first_pool;
                p->n_tiles = n;
                p->n_used = 0;

                mp->first_pool = p;
        }

        i = mp->first_pool->n_used++;

        return ((uint8_t*) mp->first_pool) + ALIGN(sizeof(struct pool)) + i*mp->tile_size;
}
Ejemplo n.º 10
0
static int nftw_cb(
                const char *fpath,
                const struct stat *sb,
                int tflag,
                struct FTW *ftwbuf) {

        /* No need to label /dev twice in a row... */
        if (_unlikely_(ftwbuf->level == 0))
                return FTW_CONTINUE;

        label_fix(fpath, false, false);

        /* /run/initramfs is static data and big, no need to
         * dynamically relabel its contents at boot... */
        if (_unlikely_(ftwbuf->level == 1 &&
                      tflag == FTW_D &&
                      streq(fpath, "/run/initramfs")))
                return FTW_SKIP_SUBTREE;

        return FTW_CONTINUE;
};
Ejemplo n.º 11
0
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
        const char *p;
        char *t;
        static __thread bool good = false;

        assert(controller);
        assert(fs);

        if (_unlikely_(!good)) {
                int r;

                r = path_is_mount_point("/sys/fs/cgroup", false);
                if (r <= 0)
                        return r < 0 ? r : -ENOENT;

                /* Cache this to save a few stat()s */
                good = true;
        }

        if (isempty(controller))
                return -EINVAL;

        /* This is a very minimal lookup from controller names to
         * paths. Since we have mounted most hierarchies ourselves
         * should be kinda safe, but eventually we might want to
         * extend this to have a fallback to actually check
         * /proc/mounts. Might need caching then. */

        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
                p = "systemd";
        else if (startswith(controller, "name="))
                p = controller + 5;
        else
                p = controller;

        if (path && suffix)
                t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL);
        else if (path)
                t = join("/sys/fs/cgroup/", p, "/", path, NULL);
        else if (suffix)
                t = join("/sys/fs/cgroup/", p, "/", suffix, NULL);
        else
                t = join("/sys/fs/cgroup/", p, NULL);

        if (!t)
                return -ENOMEM;

        path_kill_slashes(t);

        *fs = t;
        return 0;
}
Ejemplo n.º 12
0
static bool prefix_timestamp(void) {

        static int cached_printk_time = -1;

        if (_unlikely_(cached_printk_time < 0)) {
                _cleanup_free_ char *p = NULL;

                cached_printk_time =
                        read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0
                        && parse_boolean(p) > 0;
        }

        return cached_printk_time;
}
Ejemplo n.º 13
0
static int c_variant_insert_vecs(CVariant *cv, size_t idx, size_t num) {
        struct iovec *v;
        size_t n;

        /*
         * This reallocates the iovec array and adds @num new vectors at
         * position @idx. All new vectors are reset to 0. We expect the caller
         * to be aware of front/end iterators and adjust them, in case the
         * reallocation moves them.
         *
         * Note that this might allocate more than @num vectors, as a reserve
         * for future requests. The caller must treat @num as a minimum.
         *
         * This also adjusts the trailing state-array, to actually reflect the
         * extended iovec array.
         */

        assert(idx <= cv->n_vecs);

        n = cv->n_vecs + num;
        if (_unlikely_(n < num || n > C_VARIANT_MAX_VECS))
                return c_variant_poison(cv, -ENOBUFS);

        /* allocate some more, to serve future requests */
        n = (n + 8 < C_VARIANT_MAX_VECS) ? n + 8 : C_VARIANT_MAX_VECS;
        num = n - cv->n_vecs;

        v = malloc(n * sizeof(*v) + n);
        if (!v)
                return c_variant_poison(cv, -ENOMEM);

        /* copy&extend trailing state-array */
        memcpy((char *)(v + n), (char *)(cv->vecs + cv->n_vecs), idx);
        memset((char *)(v + n) + idx, 0, num);
        memcpy((char *)(v + n) + idx + num, (char *)(cv->vecs + cv->n_vecs) + idx, (cv->n_vecs - idx));

        /* copy&extend actual iovec-array */
        memcpy(v, cv->vecs, idx * sizeof(*v));
        memset(v + idx, 0, num * sizeof(*v));
        memcpy(v + idx + num, cv->vecs + idx, (cv->n_vecs - idx) * sizeof(*v));

        if (cv->allocated_vecs)
                free(cv->vecs);
        else
                cv->allocated_vecs = true;

        cv->vecs = v;
        cv->n_vecs = n;
        return 0;
}
Ejemplo n.º 14
0
static int buffer_extendby(buffer_t *buf, size_t extby)
{
    char *data;
    size_t newlen = _unlikely_(!buf->buflen && extby < 64)
                    ? 64 : buf->len + extby;

    if (newlen > buf->buflen) {
        newlen = next_power(newlen);
        data = realloc(buf->data, newlen);
        if (!data)
            return -errno;

        buf->buflen = newlen;
        buf->data = data;
    }

    return 0;
}
Ejemplo n.º 15
0
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
        const char *p;
        static __thread bool good = false;

        assert(fs);

        if (_unlikely_(!good)) {
                int r;

                r = path_is_mount_point("/sys/fs/cgroup", false);
                if (r <= 0)
                        return r < 0 ? r : -ENOENT;

                /* Cache this to save a few stat()s */
                good = true;
        }

        p = controller ? normalize_controller(controller) : NULL;
        return join_path(p, path, suffix, fs);
}
Ejemplo n.º 16
0
int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) {
        const uint8_t *p = buf;

        assert(fd >= 0);
        assert(buf);

        if (nbytes > (size_t) SSIZE_MAX)
                return -EINVAL;

        do {
                ssize_t k;

                k = write(fd, p, nbytes);
                if (k < 0) {
                        if (errno == EINTR)
                                continue;

                        if (errno == EAGAIN && do_poll) {
                                /* We knowingly ignore any return value here,
                                 * and expect that any error/EOF is reported
                                 * via write() */

                                (void) fd_wait_for_event(fd, POLLOUT, USEC_INFINITY);
                                continue;
                        }

                        return -errno;
                }

                if (_unlikely_(nbytes > 0 && k == 0)) /* Can't really happen */
                        return -EIO;

                assert((size_t) k <= nbytes);

                p += k;
                nbytes -= k;
        } while (nbytes > 0);

        return 0;
}
Ejemplo n.º 17
0
static int c_variant_write_one(CVariant *cv, char basic, const void *arg, size_t n_arg) {
        CVariantLevel *level;
        CVariantType info;
        void *front;
        int r;

        assert(n_arg > 0);

        level = cv->state->levels + cv->state->i_levels;
        if (_unlikely_(level->n_type < 1))
                return c_variant_poison(cv, -EBADRQC);

        r = c_variant_signature_next(level->type, level->n_type, &info);
        assert(r == 1);

        r = c_variant_append(cv, basic, &info, 0, n_arg, &front, 0, NULL);
        if (r < 0)
                return r;

        memcpy(front, arg, n_arg);
        return 0;
}
Ejemplo n.º 18
0
_public_ int sd_journal_send(const char *format, ...) {
        int r, i, j;
        va_list ap;
        struct iovec *iov = NULL;

        va_start(ap, format);
        i = fill_iovec_sprintf(format, ap, 0, &iov);
        va_end(ap);

        if (_unlikely_(i < 0)) {
                r = i;
                goto finish;
        }

        r = sd_journal_sendv(iov, i);

finish:
        for (j = 0; j < i; j++)
                free(iov[j].iov_base);

        free(iov);

        return r;
}
Ejemplo n.º 19
0
_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
        PROTECT_ERRNO;
        int fd, r;
        _cleanup_close_ int buffer_fd = -1;
        struct iovec *w;
        uint64_t *l;
        int i, j = 0;
        static const union sockaddr_union sa = {
                .un.sun_family = AF_UNIX,
                .un.sun_path = "/run/systemd/journal/socket",
        };
        struct msghdr mh = {
                .msg_name = (struct sockaddr*) &sa.sa,
                .msg_namelen = SOCKADDR_UN_LEN(sa.un),
        };
        ssize_t k;
        bool have_syslog_identifier = false;
        bool seal = true;

        assert_return(iov, -EINVAL);
        assert_return(n > 0, -EINVAL);

        w = newa(struct iovec, n * 5 + 3);
        l = newa(uint64_t, n);

        for (i = 0; i < n; i++) {
                char *c, *nl;

                if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1))
                        return -EINVAL;

                c = memchr(iov[i].iov_base, '=', iov[i].iov_len);
                if (_unlikely_(!c || c == iov[i].iov_base))
                        return -EINVAL;

                have_syslog_identifier = have_syslog_identifier ||
                        (c == (char *) iov[i].iov_base + 17 &&
                         startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER"));

                nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len);
                if (nl) {
                        if (_unlikely_(nl < c))
                                return -EINVAL;

                        /* Already includes a newline? Bummer, then
                         * let's write the variable name, then a
                         * newline, then the size (64bit LE), followed
                         * by the data and a final newline */

                        w[j].iov_base = iov[i].iov_base;
                        w[j].iov_len = c - (char*) iov[i].iov_base;
                        j++;

                        IOVEC_SET_STRING(w[j++], "\n");

                        l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
                        w[j].iov_base = &l[i];
                        w[j].iov_len = sizeof(uint64_t);
                        j++;

                        w[j].iov_base = c + 1;
                        w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
                        j++;

                } else
                        /* Nothing special? Then just add the line and
                         * append a newline */
                        w[j++] = iov[i];

                IOVEC_SET_STRING(w[j++], "\n");
        }

        if (!have_syslog_identifier &&
            string_is_safe(program_invocation_short_name)) {

                /* Implicitly add program_invocation_short_name, if it
                 * is not set explicitly. We only do this for
                 * program_invocation_short_name, and nothing else
                 * since everything else is much nicer to retrieve
                 * from the outside. */

                IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
                IOVEC_SET_STRING(w[j++], program_invocation_short_name);
                IOVEC_SET_STRING(w[j++], "\n");
        }

        fd = journal_fd();
        if (_unlikely_(fd < 0))
                return fd;

        mh.msg_iov = w;
        mh.msg_iovlen = j;

        k = sendmsg(fd, &mh, MSG_NOSIGNAL);
        if (k >= 0)
                return 0;

        /* Fail silently if the journal is not available */
        if (errno == ENOENT)
                return 0;

        if (errno != EMSGSIZE && errno != ENOBUFS)
                return -errno;

        /* Message doesn't fit... Let's dump the data in a memfd or
         * temporary file and just pass a file descriptor of it to the
         * other side.
         *
         * For the temporary files we use /dev/shm instead of /tmp
         * here, since we want this to be a tmpfs, and one that is
         * available from early boot on and where unprivileged users
         * can create files. */
        buffer_fd = memfd_new(NULL);
        if (buffer_fd < 0) {
                if (buffer_fd == -ENOSYS) {
                        buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC);
                        if (buffer_fd < 0)
                                return buffer_fd;

                        seal = false;
                } else
                        return buffer_fd;
        }

        n = writev(buffer_fd, w, j);
        if (n < 0)
                return -errno;

        if (seal) {
                r = memfd_set_sealed(buffer_fd);
                if (r < 0)
                        return r;
        }

        r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0);
        if (r == -ENOENT)
                /* Fail silently if the journal is not available */
                return 0;
        return r;
}

static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) {
        PROTECT_ERRNO;
        size_t n, k;

        k = isempty(message) ? 0 : strlen(message) + 2;
        n = 8 + k + 256 + 1;

        for (;;) {
                char buffer[n];
                char* j;

                errno = 0;
                j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k);
                if (errno == 0) {
                        char error[sizeof("ERRNO=")-1 + DECIMAL_STR_MAX(int) + 1];

                        if (j != buffer + 8 + k)
                                memmove(buffer + 8 + k, j, strlen(j)+1);

                        memcpy(buffer, "MESSAGE=", 8);

                        if (k > 0) {
                                memcpy(buffer + 8, message, k - 2);
                                memcpy(buffer + 8 + k - 2, ": ", 2);
                        }

                        xsprintf(error, "ERRNO=%i", _saved_errno_);

                        assert_cc(3 == LOG_ERR);
                        IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
                        IOVEC_SET_STRING(iov[skip+1], buffer);
                        IOVEC_SET_STRING(iov[skip+2], error);

                        return sd_journal_sendv(iov, skip + 3);
                }

                if (errno != ERANGE)
                        return -errno;

                n *= 2;
        }
}

_public_ int sd_journal_perror(const char *message) {
        struct iovec iovec[3];

        return fill_iovec_perror_and_send(message, 0, iovec);
}

_public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) {
        static const union sockaddr_union sa = {
                .un.sun_family = AF_UNIX,
                .un.sun_path = "/run/systemd/journal/stdout",
        };
        _cleanup_close_ int fd = -1;
        char *header;
        size_t l;
        int r;

        assert_return(priority >= 0, -EINVAL);
        assert_return(priority <= 7, -EINVAL);

        fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
        if (fd < 0)
                return -errno;

        r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
        if (r < 0)
                return -errno;

        if (shutdown(fd, SHUT_RD) < 0)
                return -errno;

        fd_inc_sndbuf(fd, SNDBUF_SIZE);

        if (!identifier)
                identifier = "";

        l = strlen(identifier);
        header = alloca(l + 1 + 1 + 2 + 2 + 2 + 2 + 2);

        memcpy(header, identifier, l);
        header[l++] = '\n';
        header[l++] = '\n'; /* unit id */
        header[l++] = '0' + priority;
        header[l++] = '\n';
        header[l++] = '0' + !!level_prefix;
        header[l++] = '\n';
        header[l++] = '0';
        header[l++] = '\n';
        header[l++] = '0';
        header[l++] = '\n';
        header[l++] = '0';
        header[l++] = '\n';

        r = loop_write(fd, header, l, false);
        if (r < 0)
                return r;

        r = fd;
        fd = -1;
        return r;
}

_public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) {
        int r;
        va_list ap;

        va_start(ap, format);
        r = sd_journal_printv_with_location(priority, file, line, func, format, ap);
        va_end(ap);

        return r;
}

_public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) {
        char buffer[8 + LINE_MAX], p[sizeof("PRIORITY=")-1 + DECIMAL_STR_MAX(int) + 1];
        struct iovec iov[5];
        char *f;

        assert_return(priority >= 0, -EINVAL);
        assert_return(priority <= 7, -EINVAL);
        assert_return(format, -EINVAL);

        xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);

        memcpy(buffer, "MESSAGE=", 8);
        vsnprintf(buffer+8, sizeof(buffer) - 8, format, ap);

        /* Strip trailing whitespace, keep prefixing whitespace */
        (void) strstrip(buffer);

        /* Suppress empty lines */
        if (isempty(buffer+8))
                return 0;

        /* func is initialized from __func__ which is not a macro, but
         * a static const char[], hence cannot easily be prefixed with
         * CODE_FUNC=, hence let's do it manually here. */
        ALLOCA_CODE_FUNC(f, func);

        zero(iov);
        IOVEC_SET_STRING(iov[0], buffer);
        IOVEC_SET_STRING(iov[1], p);
        IOVEC_SET_STRING(iov[2], file);
        IOVEC_SET_STRING(iov[3], line);
        IOVEC_SET_STRING(iov[4], f);

        return sd_journal_sendv(iov, ELEMENTSOF(iov));
}
Ejemplo n.º 20
0
int mount_cgroup_controllers(char ***join_controllers) {
        _cleanup_set_free_free_ Set *controllers = NULL;
        int r;

        if (!cg_is_legacy_wanted())
                return 0;

        /* Mount all available cgroup controllers that are built into the kernel. */

        controllers = set_new(&string_hash_ops);
        if (!controllers)
                return log_oom();

        r = cg_kernel_controllers(controllers);
        if (r < 0)
                return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");

        for (;;) {
                _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
                MountPoint p = {
                        .what = "cgroup",
                        .type = "cgroup",
                        .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
                        .mode = MNT_IN_CONTAINER,
                };
                char ***k = NULL;

                controller = set_steal_first(controllers);
                if (!controller)
                        break;

                if (join_controllers)
                        for (k = join_controllers; *k; k++)
                                if (strv_find(*k, controller))
                                        break;

                if (k && *k) {
                        char **i, **j;

                        for (i = *k, j = *k; *i; i++) {

                                if (!streq(*i, controller)) {
                                        _cleanup_free_ char *t;

                                        t = set_remove(controllers, *i);
                                        if (!t) {
                                                free(*i);
                                                continue;
                                        }
                                }

                                *(j++) = *i;
                        }

                        *j = NULL;

                        options = strv_join(*k, ",");
                        if (!options)
                                return log_oom();
                } else {
                        options = controller;
                        controller = NULL;
                }

                where = strappend("/sys/fs/cgroup/", options);
                if (!where)
                        return log_oom();

                p.where = where;
                p.options = options;

                r = mount_one(&p, true);
                if (r < 0)
                        return r;

                if (r > 0 && k && *k) {
                        char **i;

                        for (i = *k; *i; i++) {
                                _cleanup_free_ char *t = NULL;

                                t = strappend("/sys/fs/cgroup/", *i);
                                if (!t)
                                        return log_oom();

                                r = symlink(options, t);
                                if (r < 0 && errno != EEXIST)
                                        return log_error_errno(errno, "Failed to create symlink %s: %m", t);
#ifdef SMACK_RUN_LABEL
                                r = mac_smack_copy(t, options);
                                if (r < 0 && r != -EOPNOTSUPP)
                                        return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", options, t);
#endif
                        }
                }
        }

        /* Now that we mounted everything, let's make the tmpfs the
         * cgroup file systems are mounted into read-only. */
        (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");

        return 0;
}

#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
static int nftw_cb(
                const char *fpath,
                const struct stat *sb,
                int tflag,
                struct FTW *ftwbuf) {

        /* No need to label /dev twice in a row... */
        if (_unlikely_(ftwbuf->level == 0))
                return FTW_CONTINUE;

        label_fix(fpath, false, false);

        /* /run/initramfs is static data and big, no need to
         * dynamically relabel its contents at boot... */
        if (_unlikely_(ftwbuf->level == 1 &&
                      tflag == FTW_D &&
                      streq(fpath, "/run/initramfs")))
                return FTW_SKIP_SUBTREE;

        return FTW_CONTINUE;
};
#endif

int mount_setup(bool loaded_policy) {
        unsigned i;
        int r = 0;

        for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
                int j;

                j = mount_one(mount_table + i, loaded_policy);
                if (j != 0 && r >= 0)
                        r = j;
        }

        if (r < 0)
                return r;

#if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
        /* Nodes in devtmpfs and /run need to be manually updated for
         * the appropriate labels, after mounting. The other virtual
         * API file systems like /sys and /proc do not need that, they
         * use the same label for all their files. */
        if (loaded_policy) {
                usec_t before_relabel, after_relabel;
                char timespan[FORMAT_TIMESPAN_MAX];

                before_relabel = now(CLOCK_MONOTONIC);

                nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
                nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);

                after_relabel = now(CLOCK_MONOTONIC);

                log_info("Relabelled /dev and /run in %s.",
                         format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
        }
#endif

        /* Create a few default symlinks, which are normally created
         * by udevd, but some scripts might need them before we start
         * udevd. */
        dev_setup(NULL, UID_INVALID, GID_INVALID);

        /* Mark the root directory as shared in regards to mount
         * propagation. The kernel defaults to "private", but we think
         * it makes more sense to have a default of "shared" so that
         * nspawn and the container tools work out of the box. If
         * specific setups need other settings they can reset the
         * propagation mode to private if needed. */
        if (detect_container() <= 0)
                if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
                        log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");

        /* Create a few directories we always want around, Note that
         * sd_booted() checks for /run/systemd/system, so this mkdir
         * really needs to stay for good, otherwise software that
         * copied sd-daemon.c into their sources will misdetect
         * systemd. */
        mkdir_label("/run/systemd", 0755);
        mkdir_label("/run/systemd/system", 0755);
        mkdir_label("/run/systemd/inaccessible", 0000);

        return 0;
}
Ejemplo n.º 21
0
static int c_variant_insert_one(CVariant *cv, const char *type, const struct iovec *vecs, size_t n_vecs, size_t size) {
        CVariantLevel *level;
        CVariantType info;
        size_t n_type, i, idx;
        struct iovec *v;
        uint64_t frame;
        int r;

        level = cv->state->levels + cv->state->i_levels;
        if (_unlikely_(level->n_type < 1))
                return c_variant_poison(cv, -EBADRQC);

        r = c_variant_signature_next(level->type, level->n_type, &info);
        assert(r == 1);

        n_type = strlen(type);
        if (_unlikely_(n_type != info.n_type || strncmp(type, info.type, n_type)))
                return c_variant_poison(cv, -EBADRQC);

        if (_unlikely_(info.size > 0 && size != info.size))
                return c_variant_poison(cv, -EBADMSG);

        r = c_variant_append(cv, *type, &info, n_vecs + 1, 0, NULL, 0, NULL);
        if (r < 0)
                return r;

        /* make sure there are at least 'n_vecs + 1' unused vectors */
        assert(cv->n_vecs - level->v_front - level->v_tail - 2U >= n_vecs + 1U);

        /*
         * Clip the current front and prepare the next vector with the
         * remaining buffer space. Then insert the requested vectors in between
         * both and verify alignment restrictions.
         */
        v = cv->vecs + level->v_front;
        v[n_vecs + 1].iov_base = (char *)v->iov_base + level->i_front;
        v[n_vecs + 1].iov_len = v->iov_len - level->i_front;
        v->iov_len = level->i_front;

        for (i = 0; i < n_vecs; ++i) {
                idx = level->v_front + i + 1;
                if (((char *)(cv->vecs + cv->n_vecs))[idx]) {
                        ((char *)(cv->vecs + cv->n_vecs))[idx] = false;
                        free((cv->vecs + idx)->iov_base);
                }
                cv->vecs[idx] = vecs[i];
        }

        level->v_front += n_vecs + 1;
        level->i_front = 0;
        level->offset += size;

        /* see c_variant_end_one(); we have to update the framing offset */
        if (info.size < 1) {
                switch (level->enclosing) {
                case C_VARIANT_TUPLE_OPEN:
                case C_VARIANT_PAIR_OPEN:
                        /* last element never stores framing offsets */
                        if (level->n_type < 1)
                                break;
                        /* fallthrough */
                case C_VARIANT_ARRAY:
                        assert(level->i_tail >= 8);
                        assert(!(level->i_tail & 7));

                        v = cv->vecs + cv->n_vecs - level->v_tail - 1;
                        frame = level->offset;
                        memcpy((char *)v->iov_base + level->i_tail - 8, &frame, 8);
                        break;
                }
        }

        return 0;
}
Ejemplo n.º 22
0
static int c_variant_begin_one(CVariant *cv, char container, const char *variant) {
        CVariantLevel *next, *level;
        CVariantType info;
        size_t n_tail;
        void *tail;
        int r;

        r = c_variant_ensure_level(cv);
        if (r < 0)
                return r;

        if (container == C_VARIANT_VARIANT)
                n_tail = strlen(variant);
        else
                n_tail = 0;

        level = cv->state->levels + cv->state->i_levels;
        if (_unlikely_(level->n_type < 1))
                return c_variant_poison(cv, -EBADRQC);

        r = c_variant_signature_next(level->type, level->n_type, &info);
        assert(r == 1);

        r = c_variant_append(cv, container, &info, 0, 0, NULL, n_tail, &tail);
        if (r < 0)
                return r;

        c_variant_push_level(cv);
        next = cv->state->levels + cv->state->i_levels;

        next->size = info.size;
        next->i_tail = level->i_tail;
        next->v_tail = level->v_tail;
        /* wordsize is unused */
        next->enclosing = container;
        next->v_front = level->v_front;
        next->i_front = level->i_front;
        next->index = 0;
        next->offset = 0;

        switch (container) {
        case C_VARIANT_VARIANT:
                memcpy(tail, variant, n_tail);
                next->i_tail += n_tail;
                next->n_type = n_tail;
                next->index = n_tail;
                next->type = tail;
                break;
        case C_VARIANT_MAYBE:
        case C_VARIANT_ARRAY:
                next->n_type = info.n_type - 1;
                next->type = info.type + 1;
                break;
        case C_VARIANT_TUPLE_OPEN:
        case C_VARIANT_PAIR_OPEN:
                next->n_type = info.n_type - 2;
                next->type = info.type + 1;
                break;
        default:
                assert(0);
                break;
        }

        return 0;
}
Ejemplo n.º 23
0
int mount_cgroup_controllers(char ***join_controllers) {
    int r;
    char buf[LINE_MAX];
    _cleanup_set_free_free_ Set *controllers = NULL;
    _cleanup_fclose_ FILE *f;

    /* Mount all available cgroup controllers that are built into the kernel. */

    f = fopen("/proc/cgroups", "re");
    if (!f) {
        log_error("Failed to enumerate cgroup controllers: %m");
        return 0;
    }

    controllers = set_new(string_hash_func, string_compare_func);
    if (!controllers)
        return log_oom();

    /* Ignore the header line */
    (void) fgets(buf, sizeof(buf), f);

    for (;;) {
        char *controller;
        int enabled = 0;

        if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {

            if (feof(f))
                break;

            log_error("Failed to parse /proc/cgroups.");
            return -EIO;
        }

        if (!enabled) {
            free(controller);
            continue;
        }

        r = set_consume(controllers, controller);
        if (r < 0) {
            log_error("Failed to add controller to set.");
            return r;
        }
    }

    for (;;) {
        MountPoint p = {
            .what = "cgroup",
            .type = "cgroup",
            .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
            .mode = MNT_IN_CONTAINER,
        };
        char ***k = NULL;
        _cleanup_free_ char *options = NULL, *controller;

        controller = set_steal_first(controllers);
        if (!controller)
            break;

        if (join_controllers)
            for (k = join_controllers; *k; k++)
                if (strv_find(*k, controller))
                    break;

        if (k && *k) {
            char **i, **j;

            for (i = *k, j = *k; *i; i++) {

                if (!streq(*i, controller)) {
                    char _cleanup_free_ *t;

                    t = set_remove(controllers, *i);
                    if (!t) {
                        free(*i);
                        continue;
                    }
                }

                *(j++) = *i;
            }

            *j = NULL;

            options = strv_join(*k, ",");
            if (!options)
                return log_oom();
        } else {
            options = controller;
            controller = NULL;
        }

        p.where = strappenda("/sys/fs/cgroup/", options);
        p.options = options;

        r = mount_one(&p, true);
        if (r < 0)
            return r;

        if (r > 0 && k && *k) {
            char **i;

            for (i = *k; *i; i++) {
                char *t = strappenda("/sys/fs/cgroup/", *i);

                r = symlink(options, t);
                if (r < 0 && errno != EEXIST) {
                    log_error("Failed to create symlink %s: %m", t);
                    return -errno;
                }
            }
        }
    }

    return 0;
}

static int nftw_cb(
    const char *fpath,
    const struct stat *sb,
    int tflag,
    struct FTW *ftwbuf) {

    /* No need to label /dev twice in a row... */
    if (_unlikely_(ftwbuf->level == 0))
        return FTW_CONTINUE;

    label_fix(fpath, false, false);

    /* /run/initramfs is static data and big, no need to
     * dynamically relabel its contents at boot... */
    if (_unlikely_(ftwbuf->level == 1 &&
                   tflag == FTW_D &&
                   streq(fpath, "/run/initramfs")))
        return FTW_SKIP_SUBTREE;

    return FTW_CONTINUE;
};

int mount_setup(bool loaded_policy) {
    int r;
    unsigned i;

    for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
        r = mount_one(mount_table + i, true);

        if (r < 0)
            return r;
    }

    /* Nodes in devtmpfs and /run need to be manually updated for
     * the appropriate labels, after mounting. The other virtual
     * API file systems like /sys and /proc do not need that, they
     * use the same label for all their files. */
    if (loaded_policy) {
        usec_t before_relabel, after_relabel;
        char timespan[FORMAT_TIMESPAN_MAX];

        before_relabel = now(CLOCK_MONOTONIC);

        nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
        nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);

        after_relabel = now(CLOCK_MONOTONIC);

        log_info("Relabelled /dev and /run in %s.",
                 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
    }

    /* Create a few default symlinks, which are normally created
     * by udevd, but some scripts might need them before we start
     * udevd. */
    dev_setup(NULL);

    /* Mark the root directory as shared in regards to mount
     * propagation. The kernel defaults to "private", but we think
     * it makes more sense to have a default of "shared" so that
     * nspawn and the container tools work out of the box. If
     * specific setups need other settings they can reset the
     * propagation mode to private if needed. */
    if (detect_container(NULL) <= 0)
        if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
            log_warning("Failed to set up the root directory for shared mount propagation: %m");

    /* Create a few directories we always want around, Note that
     * sd_booted() checks for /run/systemd/system, so this mkdir
     * really needs to stay for good, otherwise software that
     * copied sd-daemon.c into their sources will misdetect
     * systemd. */
    mkdir_label("/run/systemd", 0755);
    mkdir_label("/run/systemd/system", 0755);
    mkdir_label("/run/systemd/inaccessible", 0000);

    return 0;
}
Ejemplo n.º 24
0
_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
        PROTECT_ERRNO;
        int fd;
        struct iovec *w;
        uint64_t *l;
        int i, j = 0;
        struct sockaddr_un sa = {
                .sun_family = AF_UNIX,
                .sun_path = JOURNAL_RUNDIR "/socket",
        };
        struct msghdr mh = {
                .msg_name = &sa,
                .msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(sa.sun_path),
        };
        ssize_t k;
        union {
                struct cmsghdr cmsghdr;
                uint8_t buf[CMSG_SPACE(sizeof(int))];
        } control;
        bool have_syslog_identifier = false;

        assert_return(iov, -EINVAL);
        assert_return(n > 0, -EINVAL);

        w = alloca(sizeof(struct iovec) * n * 5 + 3);
        l = alloca(sizeof(uint64_t) * n);

        for (i = 0; i < n; i++) {
                char *c, *nl;

                if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1))
                        return -EINVAL;

                c = memchr(iov[i].iov_base, '=', iov[i].iov_len);
                if (_unlikely_(!c || c == iov[i].iov_base))
                        return -EINVAL;

                have_syslog_identifier = have_syslog_identifier ||
                        (c == (char *) iov[i].iov_base + 17 &&
                         startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER"));

                nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len);
                if (nl) {
                        if (_unlikely_(nl < c))
                                return -EINVAL;

                        /* Already includes a newline? Bummer, then
                         * let's write the variable name, then a
                         * newline, then the size (64bit LE), followed
                         * by the data and a final newline */

                        w[j].iov_base = iov[i].iov_base;
                        w[j].iov_len = c - (char*) iov[i].iov_base;
                        j++;

                        IOVEC_SET_STRING(w[j++], "\n");

                        l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
                        w[j].iov_base = &l[i];
                        w[j].iov_len = sizeof(uint64_t);
                        j++;

                        w[j].iov_base = c + 1;
                        w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
                        j++;

                } else
                        /* Nothing special? Then just add the line and
                         * append a newline */
                        w[j++] = iov[i];

                IOVEC_SET_STRING(w[j++], "\n");
        }

        if (!have_syslog_identifier) {

                /* Implicitly add program_invocation_short_name, if it
                 * is not set explicitly. We only do this for
                 * program_invocation_short_name, and nothing else
                 * since everything else is much nicer to retrieve
                 * from the outside. */

                IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
                IOVEC_SET_STRING(w[j++], program_invocation_short_name);
                IOVEC_SET_STRING(w[j++], "\n");
        }

        fd = journal_fd();
        if (_unlikely_(fd < 0))
                return fd;

        mh.msg_iov = w;
        mh.msg_iovlen = j;

        k = sendmsg(fd, &mh, MSG_NOSIGNAL);
        if (k >= 0)
                return 0;

        /* Fail silently if the journal is not available */
        if (errno == ENOENT)
                return 0;

        if (errno != EMSGSIZE && errno != ENOBUFS)
                return -errno;

        return 0;
}

static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) {
        PROTECT_ERRNO;
        size_t n, k;

        k = isempty(message) ? 0 : strlen(message) + 2;
        n = 8 + k + 256 + 1;

        for (;;) {
                char buffer[n];
                char* j;

                errno = 0;
                j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k);
                if (errno == 0) {
                        char error[6 + 10 + 1]; /* for a 32bit value */

                        if (j != buffer + 8 + k)
                                memmove(buffer + 8 + k, j, strlen(j)+1);

                        memcpy(buffer, "MESSAGE=", 8);

                        if (k > 0) {
                                memcpy(buffer + 8, message, k - 2);
                                memcpy(buffer + 8 + k - 2, ": ", 2);
                        }

                        snprintf(error, sizeof(error), "ERRNO=%u", _saved_errno_);
                        char_array_0(error);

                        IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
                        IOVEC_SET_STRING(iov[skip+1], buffer);
                        IOVEC_SET_STRING(iov[skip+2], error);

                        return sd_journal_sendv(iov, skip + 3);
                }

                if (errno != ERANGE)
                        return -errno;

                n *= 2;
        }
}

_public_ int sd_journal_perror(const char *message) {
        struct iovec iovec[3];

        return fill_iovec_perror_and_send(message, 0, iovec);
}
Ejemplo n.º 25
0
static int c_variant_append(CVariant *cv,
                            char element,
                            CVariantType *info,
                            size_t n_extra_vecs,
                            size_t n_front,
                            void **frontp,
                            size_t n_unaccounted_tail,
                            void **tailp) {
        CVariantLevel *level = cv->state->levels + cv->state->i_levels;
        bool need_frame = false;
        void *tail;
        int r;

        /*
         * XXX
         */

        if (_unlikely_(level->n_type < 1 || *level->type != element))
                return c_variant_poison(cv, -EBADRQC);

        assert(info->size == 0 || n_front == 0 || n_front == info->size);

        switch (level->enclosing) {
        case C_VARIANT_TUPLE_OPEN:
        case C_VARIANT_PAIR_OPEN:
                if (info->n_type >= level->n_type)
                        break;
                /* fallthrough */
        case C_VARIANT_ARRAY:
                need_frame = (info->size < 1);
                break;
        }

        /*
         * If we need to store a frame pointer, we *must* guarantee 8-byte
         * alignment and allocate an extra 8 bytes at the tail.
         * We always additionally allocate @n_unaccounted_tail bytes at the
         * tail, which have *NO* alignment guarantees. But those bytes are
         * *unaccounted*, that is, we immediately subtract them from the tail
         * marker of this level again.
         */
        r = c_variant_reserve(cv, n_extra_vecs,
                              info->alignment, n_front, frontp,
                              need_frame ? 3 : 0,
                              n_unaccounted_tail + (need_frame ? 8 : 0),
                              &tail);
        if (r < 0)
                return r;

        /* de-account extra tail-space */
        assert(n_unaccounted_tail <= level->i_tail);
        level->i_tail -= n_unaccounted_tail;

        /* store frame */
        if (need_frame) {
                ++level->index;
                *(uint64_t *)tail = level->offset;
                tail = (char *)tail + 8;
        }

        switch (level->enclosing) {
        case C_VARIANT_ARRAY:
                break;
        case C_VARIANT_MAYBE:
                /* write maybe-marker for non-empty, dynamic maybes */
                if (info->size < 1)
                        ++level->index;
                /* fallthrough */
        default:
                level->type += info->n_type;
                level->n_type -= info->n_type;
                break;
        }

        if (tailp)
                *tailp = tail;
        return 0;
}
Ejemplo n.º 26
0
static int c_variant_reserve(CVariant *cv,
                             size_t n_extra_vecs,
                             size_t front_alignment,
                             size_t front_allocation,
                             void **frontp,
                             size_t tail_alignment,
                             size_t tail_allocation,
                             void **tailp) {
        CVariantLevel *level;
        size_t i, j, n, rem, n_front, n_tail;
        struct iovec *vec_front, *vec_tail;
        void *p;
        int r;

        /*
         * This advances the front and tail markers according to the requested
         * allocation size. If an alignment is given, the start is aligned
         * before the marker is advanced. If required, new buffer space is
         * allocated.
         *
         * On success, a pointer to the start of each reserved buffer space is
         * returned in @frontp and @tailp. On failure, both markers will stay
         * untouched.
         *
         * Note that front-alignment is always according to the global
         * alignment (i.e., it adheres to level->offset (and as such iov_base)
         * rather than level->i_front). But tail-alignment is always local-only
         * (adhering to level->i_tail). There is no global context for tail
         * space, so no way to align it as such.
         */

        /* both are mapped, hence cannot overflow size_t (with alignment) */
        assert(front_allocation + tail_allocation + 16 > front_allocation);

        level = cv->state->levels + cv->state->i_levels;
        n_front = front_allocation + ALIGN_TO(level->offset, 1 << front_alignment) - level->offset;
        n_tail = tail_allocation + ALIGN_TO(level->i_tail, 1 << tail_alignment) - level->i_tail;
        vec_front = cv->vecs + level->v_front;
        vec_tail = cv->vecs + cv->n_vecs - level->v_tail - 1;

        /*
         * If the remaining space is not enough to fullfill the request, search
         * through the unused vectors, in case there is unused buffer space
         * that is sufficient for the request. If we find one, move it directly
         * next to our current vector, so we can jump over.
         */
        if (n_front > vec_front->iov_len - level->i_front) {
                for (i = 1; vec_front + i < vec_tail; ++i) {
                        if (n_front > (vec_front + i)->iov_len)
                                continue;

                        c_variant_swap_vecs(cv,
                                            (vec_front + i) - cv->vecs,
                                            (vec_front + 1) - cv->vecs);
                        ++vec_front;
                        n_front = 0;
                        break;
                }
        } else if (n_front > 0) {
                /* fits into @vec_front */
                n_front = 0;
        }

        /* counter-part for tail-allocation */
        if (n_tail > vec_tail->iov_len - level->i_tail) {
                for (i = 1; vec_tail - i > vec_front; ++i) {
                        if (n_tail > (vec_tail - i)->iov_len)
                                continue;

                        c_variant_swap_vecs(cv,
                                            (vec_tail - i) - cv->vecs,
                                            (vec_tail - 1) - cv->vecs);
                        --vec_tail;
                        n_tail = 0;
                        break;
                }
        } else if (n_tail > 0) {
                /* fits into @vec_tail */
                n_tail = 0;
        }

        n = vec_tail - vec_front - 1;
        if (_unlikely_(n < n_extra_vecs + 2 * !!(n_front || n_tail))) {
                /* remember tail-index since realloc might move it */
                j = vec_front - cv->vecs;
                i = cv->n_vecs - (vec_tail - cv->vecs);

                r = c_variant_insert_vecs(cv, j + 1, n_extra_vecs + 2);
                if (r < 0)
                        return r;

                /* re-calculate vectors, as they might have moved */
                vec_front = cv->vecs + j;
                vec_tail = cv->vecs + cv->n_vecs - i;
        }

        /* if either is non-zero, we need a new buffer allocation */
        if (_unlikely_(n_front || n_tail)) {
                /*
                 * Now that we have the iovecs, we need the actual buffer
                 * space. We start with 2^12 bytes (4k / one page), and
                 * increase it for each allocated buffer by a factor of 2, up
                 * to an arbitrary limit of 2^31.
                 */
                n = 1 << (12 + ((cv->a_vecs > 19) ? 19 : cv->a_vecs));
                if (n < n_front + n_tail + 16)
                        n = n_front + n_tail + 16;

                p = malloc(n);
                if (!p) {
                        n = n_front + n_tail + 16;
                        p = malloc(n);
                        if (!p)
                                return c_variant_poison(cv, -ENOMEM);
                }

                /* count how often we allocated; protect against overflow */
                if (++cv->a_vecs < 1)
                        --cv->a_vecs;

                if (n_front) {
                        ++vec_front;
                        if (((char *)(cv->vecs + cv->n_vecs))[vec_front - cv->vecs])
                                free(vec_front->iov_base);

                        vec_front->iov_base = p;
                        vec_front->iov_len = n;
                        ((char *)(cv->vecs + cv->n_vecs))[vec_front - cv->vecs] = true;
                }

                if (n_tail) {
                        --vec_tail;
                        if (((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs])
                                free(vec_tail->iov_base);

                        vec_tail->iov_base = p;
                        vec_tail->iov_len = n;
                        ((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs] = true;
                }

                if (n_front && n_tail) {
                        /* if both allocated, we need to split properly */
                        rem = n - n_front - n_tail - 16;
                        vec_front->iov_len = n_front + 8 + (rem * C_VARIANT_FRONT_SHARE / 100);
                        vec_tail->iov_base = (char *)p + vec_front->iov_len;
                        vec_tail->iov_len = n - vec_front->iov_len;
                        ((char *)(cv->vecs + cv->n_vecs))[vec_tail - cv->vecs] = false;
                }
        }

        if (vec_front != cv->vecs + level->v_front) {
                /* vector was updated; clip previous and then advance */
                assert(vec_front - 1 == cv->vecs + level->v_front);

                (vec_front - 1)->iov_len = level->i_front;
                ++level->v_front;
                level->i_front = 0;

                /* front vectors must be aligned according to current offset */
                assert(vec_front->iov_base == ALIGN_PTR_TO(vec_front->iov_base, 8));
                n = level->offset & 7;
                vec_front->iov_base = (char *)vec_front->iov_base + n;
                vec_front->iov_len -= n;
        }

        if (vec_tail != cv->vecs + cv->n_vecs - level->v_tail - 1) {
                /* vector was updated; clip previous and then advance */
                assert(vec_tail + 1 == cv->vecs + cv->n_vecs - level->v_tail - 1);

                (vec_tail + 1)->iov_len = level->i_tail;
                ++level->v_tail;
                level->i_tail = 0;
        }

        /*
         * We are done! Apply alignment before returning a pointer to the
         * reserved space. Then advance the iterators, so the space is actually
         * reserved and will not get re-used.
         */

        n = ALIGN_TO(level->offset, 1 << front_alignment) - level->offset;
        memset((char *)vec_front->iov_base + level->i_front, 0, n);
        level->i_front += n;
        level->offset += n;
        level->i_tail = ALIGN_TO(level->i_tail, 1 << tail_alignment);

        if (frontp)
                *frontp = (char *)vec_front->iov_base + level->i_front;
        if (tailp)
                *tailp = (char *)vec_tail->iov_base + level->i_tail;

        level->i_front += front_allocation;
        level->offset += front_allocation;
        level->i_tail += tail_allocation;

        return 0;
}
Ejemplo n.º 27
0
static int c_variant_end_one(CVariant *cv) {
        CVariantLevel *prev, *level;
        size_t i, n, wz, rem;
        void *front, *tail;
        struct iovec *v;
        uint64_t frame;
        int r, step;

        if (_unlikely_(c_variant_on_root_level(cv)))
                return c_variant_poison(cv, -EBADRQC);

        prev = cv->state->levels + cv->state->i_levels;
        wz = c_variant_word_size(prev->offset, prev->index);

        switch (prev->enclosing) {
        case C_VARIANT_VARIANT:
                n = prev->index + 1;
                break;
        case C_VARIANT_MAYBE:
                n = !!(prev->index > 0);
                break;
        case C_VARIANT_ARRAY:
        case C_VARIANT_TUPLE_OPEN:
        case C_VARIANT_PAIR_OPEN:
                n = prev->index * (1 << wz);
                break;
        default:
                assert(0);
                return c_variant_poison(cv, -EFAULT);
        }

        if (prev->size < 1) {
                /*
                 * Variable-size container which requires 'n' additional front
                 * bytes for framing-offsets and other management data. No
                 * alignment is enforced, not is trailing padding added.
                 */
                r = c_variant_reserve(cv, 0, 0, n, &front, 0, 0, &tail);
                if (r < 0)
                        return r;
        } else {
                /*
                 * Fixed-size container of size prev->size. We *must* ensure
                 * the container has a size multiple of its alignment, hence,
                 * we add trailing zero bytes as padding here.
                 */
                assert(!n);
                assert(prev->offset <= prev->size);

                n = prev->size - prev->offset;
                r = c_variant_reserve(cv, 0, 0, n, &front, 0, 0, &tail);
                if (r < 0)
                        return r;

                memset(front, 0, n);
        }

        c_variant_pop_level(cv);
        level = cv->state->levels + cv->state->i_levels;

        switch (prev->enclosing) {
        case C_VARIANT_VARIANT:
                *(char *)front = 0;
                memcpy((char *)front + 1, (char *)tail - prev->index, prev->index);
                break;
        case C_VARIANT_MAYBE:
                if (prev->index > 0)
                        *(char *)front = 0;
                break;
        case C_VARIANT_ARRAY:
        case C_VARIANT_TUPLE_OPEN:
        case C_VARIANT_PAIR_OPEN:
                /* backwards-iteration for arrays, to revert frame oder */
                if (prev->enclosing == C_VARIANT_ARRAY) {
                        i = prev->index - 1;
                        step = -1;
                } else {
                        i = 0;
                        step = 1;
                }

                v = cv->vecs + cv->n_vecs - prev->v_tail - 1;
                rem = prev->i_tail;

                for (n = prev->index; n-- > 0; i += step) {
                        while (_unlikely_(rem < 8)) {
                                assert(rem == 0);
                                ++v;
                                rem = v->iov_len;
                                assert(!(rem & 7));
                        }

                        rem -= 8;
                        c_variant_word_store((char *)front + i * (1 << wz), wz,
                                             *(uint64_t *)((char *)v->iov_base + rem));
                }

                break;
        }

        /*
         * Advance parent level by the size of the completed child. Note that
         * the parent-level was already aligned correctly when entered. Hence,
         * prev->offset correctly reflects the difference in bytes between
         * both fronts.
         */
        level->v_front = prev->v_front;
        level->i_front = prev->i_front;
        level->offset += prev->offset;

        /*
         * If this was a dynamic-sized type, we must store the framing-offset
         * at the tail. Memory for it was already reserved when the container
         * was created, we just recover the pointer to it and write the now
         * known framing offset.
         * Note that for tuples we never write a framing offset for the last
         * type. This also guarantees that the root-level never writes framing
         * offsets (root-level can only be a single type, rather than a full
         * signature).
         * Containers with a single entry never store framing offsets. In those
         * cases we can skip the operation.
         *
         * This only stores out internal state-tracking at the tail buffer.
         * This is *not* the final serialized data. Only once the full
         * container is closed, the state-array is properly serialized.
         */
        if (prev->size < 1) {
                switch (level->enclosing) {
                case C_VARIANT_TUPLE_OPEN:
                case C_VARIANT_PAIR_OPEN:
                        /* last element never stores framing offsets */
                        if (level->n_type < 1)
                                break;
                        /* fallthrough */
                case C_VARIANT_ARRAY:
                        assert(level->i_tail >= 8);
                        assert(!(level->i_tail & 7));

                        v = cv->vecs + cv->n_vecs - level->v_tail - 1;
                        frame = level->offset;
                        memcpy((char *)v->iov_base + level->i_tail - 8, &frame, 8);
                        break;
                }
        }

        return 0;
}