static void disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; int i; /* Stop main loop collection */ vm->vlib_node_runtime_perf_counter_cb = 0; for (i = 0; i < pm->n_active; i++) { if (pm->pm_fds[i][my_thread_index] == 0) continue; if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) clib_unix_warning ("disable ioctl"); if (pm->perf_event_pages[i][my_thread_index]) if (munmap (pm->perf_event_pages[i][my_thread_index], pm->page_size) < 0) clib_unix_warning ("munmap"); (void) close (pm->pm_fds[i][my_thread_index]); pm->pm_fds[i][my_thread_index] = 0; } }
/** * Initialize memfd segment slave * * Subtly different than svm_slave_init. The caller needs to acquire * a usable file descriptor for the memfd segment e.g. via * vppinfra/socket.c:default_socket_recvmsg */ int ssvm_slave_init_memfd (ssvm_private_t * memfd) { clib_mem_vm_map_t mapa = { 0 }; ssvm_shared_header_t *sh; uword page_size; memfd->i_am_master = 0; page_size = clib_mem_get_fd_page_size (memfd->fd); if (!page_size) { clib_unix_warning ("page size unknown"); return SSVM_API_ERROR_MMAP; } /* * Map the segment once, to look at the shared header */ mapa.fd = memfd->fd; mapa.size = page_size; if (clib_mem_vm_ext_map (&mapa)) { clib_unix_warning ("slave research mmap (fd %d)", mapa.fd); close (memfd->fd); return SSVM_API_ERROR_MMAP; } sh = mapa.addr; memfd->requested_va = sh->ssvm_va; memfd->ssvm_size = sh->ssvm_size; clib_mem_vm_free (sh, page_size); /* * Remap the segment at the 'right' address */ mapa.requested_va = memfd->requested_va; mapa.size = memfd->ssvm_size; if (clib_mem_vm_ext_map (&mapa)) { clib_unix_warning ("slave final mmap"); close (memfd->fd); return SSVM_API_ERROR_MMAP; } sh = mapa.addr; sh->slave_pid = getpid (); memfd->sh = sh; return 0; }
static clib_error_t * sendmsg_helper (mc_socket_main_t * msm, int socket, struct sockaddr_in * tx_addr, u32 buffer_index) { vlib_main_t * vm = msm->mc_main.vlib_main; struct msghdr h; word n_bytes, n_bytes_tx, n_retries; memset (&h, 0, sizeof (h)); h.msg_name = tx_addr; h.msg_namelen = sizeof (tx_addr[0]); if (msm->iovecs) _vec_len (msm->iovecs) = 0; n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs); ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size); if (n_bytes > msm->mc_main.transport.max_packet_size) clib_error ("sending packet larger than interace MTU %d bytes", n_bytes); h.msg_iov = msm->iovecs; h.msg_iovlen = vec_len (msm->iovecs); n_retries = 0; while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes && errno == EAGAIN) n_retries++; if (n_bytes_tx != n_bytes) { clib_unix_warning ("sendmsg"); return 0; } if (n_retries) { ELOG_TYPE_DECLARE (e) = { .format = "sendmsg-helper: %d retries", .format_args = "i4", }; struct { u32 retries; } * ed = 0; ed = ELOG_DATA (&vm->elog_main, e); ed->retries = n_retries; } return 0; } static clib_error_t * tx_buffer (void * transport, mc_transport_type_t type, u32 buffer_index) { mc_socket_main_t *msm = (mc_socket_main_t *)transport; vlib_main_t * vm = msm->mc_main.vlib_main; mc_multicast_socket_t * ms = &msm->multicast_sockets[type]; clib_error_t * error; error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index); if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY) vlib_buffer_free_one (vm, buffer_index); return error; }
static void read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1) { int i; u64 *cc; perfmon_main_t *pm = &perfmon_main; uword my_thread_index = vm->thread_index; *c0 = *c1 = 0; for (i = 0; i < pm->n_active; i++) { cc = (i == 0) ? c0 : c1; if (pm->rdpmc_indices[i][my_thread_index] != ~0) *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); else { u64 sw_value; if (read (pm->pm_fds[i][my_thread_index], &sw_value, sizeof (sw_value)) != sizeof (sw_value)) { clib_unix_warning ("counter read failed, disable collection..."); vm->vlib_node_runtime_perf_counter_cb = 0; return; } *cc = sw_value; } } }
u8 * vlib_thread_stack_init (uword thread_index) { ASSERT (thread_index < vec_len (vlib_thread_stacks)); vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned (VLIB_THREAD_STACK_SIZE, clib_mem_get_page_size ()); /* * Disallow writes to the bottom page of the stack, to * catch stack overflows. */ if (mprotect (vlib_thread_stacks[thread_index], clib_mem_get_page_size (), PROT_READ) < 0) clib_unix_warning ("thread stack"); return vlib_thread_stacks[thread_index]; }
/** * Initialize segment in a private heap */ int ssvm_master_init_private (ssvm_private_t * ssvm) { ssvm_shared_header_t *sh; u32 pagesize = clib_mem_get_page_size (); u32 rnd_size = 0; u8 *heap; rnd_size = clib_max (ssvm->ssvm_size + (pagesize - 1), ssvm->ssvm_size); rnd_size &= ~(pagesize - 1); #if USE_DLMALLOC == 0 { mheap_t *heap_header; heap = mheap_alloc (0, rnd_size); if (heap == 0) { clib_unix_warning ("mheap alloc"); return -1; } heap_header = mheap_header (heap); heap_header->flags |= MHEAP_FLAG_THREAD_SAFE; } #else heap = create_mspace (rnd_size, 1 /* locked */ ); #endif ssvm->ssvm_size = rnd_size; ssvm->i_am_master = 1; ssvm->my_pid = getpid (); ssvm->requested_va = ~0; /* Allocate a [sic] shared memory header, in process memory... */ sh = clib_mem_alloc_aligned (sizeof (*sh), CLIB_CACHE_LINE_BYTES); ssvm->sh = sh; clib_memset (sh, 0, sizeof (*sh)); sh->heap = heap; sh->ssvm_va = pointer_to_uword (heap); sh->type = SSVM_SEGMENT_PRIVATE; return 0; }
void ssvm_delete_shm (ssvm_private_t * ssvm) { u8 *fn; fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); if (CLIB_DEBUG > 1) clib_warning ("[%d] unlinking ssvm (%s) backing file '%s'", getpid (), ssvm->name, fn); /* Throw away the backing file */ if (unlink ((char *) fn) < 0) clib_unix_warning ("unlink segment '%s'", ssvm->name); vec_free (fn); vec_free (ssvm->name); munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); }
static void setup_signal_handlers (void) { uword i; struct sigaction sa; for (i = 1; i < 32; i++) { clib_memset (&sa, 0, sizeof (sa)); sa.sa_sigaction = (void *) unix_signal_handler; sa.sa_flags = SA_SIGINFO; switch (i) { /* these signals take the default action */ case SIGABRT: case SIGKILL: case SIGSTOP: case SIGUSR1: case SIGUSR2: continue; /* ignore SIGPIPE, SIGCHLD */ case SIGPIPE: case SIGCHLD: sa.sa_sigaction = (void *) SIG_IGN; break; /* catch and handle all other signals */ default: break; } if (sigaction (i, &sa, 0) < 0) return clib_unix_warning (0, "sigaction %U", format_signal, i); } }
static clib_error_t * unix_config (vlib_main_t * vm, unformat_input_t * input) { unix_main_t *um = &unix_main; clib_error_t *error = 0; gid_t gid; int pidfd = -1; /* Defaults */ um->cli_pager_buffer_limit = UNIX_CLI_DEFAULT_PAGER_LIMIT; um->cli_history_limit = UNIX_CLI_DEFAULT_HISTORY; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { char *cli_prompt; if (unformat (input, "interactive")) um->flags |= UNIX_FLAG_INTERACTIVE; else if (unformat (input, "nodaemon")) um->flags |= UNIX_FLAG_NODAEMON; else if (unformat (input, "cli-prompt %s", &cli_prompt)) vlib_unix_cli_set_prompt (cli_prompt); else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config)) ; else if (unformat (input, "runtime-dir %s", &um->runtime_dir)) ; else if (unformat (input, "cli-line-mode")) um->cli_line_mode = 1; else if (unformat (input, "cli-no-banner")) um->cli_no_banner = 1; else if (unformat (input, "cli-no-pager")) um->cli_no_pager = 1; else if (unformat (input, "poll-sleep-usec %d", &um->poll_sleep_usec)) ; else if (unformat (input, "cli-pager-buffer-limit %d", &um->cli_pager_buffer_limit)) ; else if (unformat (input, "cli-history-limit %d", &um->cli_history_limit)) ; else if (unformat (input, "coredump-size")) { uword coredump_size = 0; if (unformat (input, "unlimited")) { coredump_size = RLIM_INFINITY; } else if (!unformat (input, "%U", unformat_memory_size, &coredump_size)) { return clib_error_return (0, "invalid coredump-size parameter `%U'", format_unformat_error, input); } const struct rlimit new_limit = { coredump_size, coredump_size }; if (0 != setrlimit (RLIMIT_CORE, &new_limit)) { clib_unix_warning ("prlimit() failed"); } } else if (unformat (input, "full-coredump")) { int fd; fd = open ("/proc/self/coredump_filter", O_WRONLY); if (fd >= 0) { if (write (fd, "0x6f\n", 5) != 5) clib_unix_warning ("coredump filter write failed!"); close (fd); } else clib_unix_warning ("couldn't open /proc/self/coredump_filter"); } else if (unformat (input, "startup-config %s", &um->startup_config_filename)) ; else if (unformat (input, "exec %s", &um->startup_config_filename)) ; else if (unformat (input, "log %s", &um->log_filename)) { um->log_fd = open ((char *) um->log_filename, O_CREAT | O_WRONLY | O_APPEND, 0644); if (um->log_fd < 0) { clib_warning ("couldn't open log '%s'\n", um->log_filename); um->log_fd = 0; } else { u8 *lv = 0; lv = format (0, "%U: ***** Start: PID %d *****\n", format_timeval, 0 /* current bat-time */ , 0 /* current bat-format */ , getpid ()); { int rv __attribute__ ((unused)) = write (um->log_fd, lv, vec_len (lv)); } vec_free (lv); } } else if (unformat (input, "gid %U", unformat_unix_gid, &gid)) { if (setegid (gid) == -1) return clib_error_return_unix (0, "setegid"); } else if (unformat (input, "pidfile %s", &um->pidfile)) ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } if (um->runtime_dir == 0) { uid_t uid = geteuid (); if (uid == 00) um->runtime_dir = format (0, "/run/%s%c", vlib_default_runtime_dir, 0); else um->runtime_dir = format (0, "/run/user/%u/%s%c", uid, vlib_default_runtime_dir, 0); } error = setup_signal_handlers (um); if (error) return error; if (um->pidfile) { if ((error = vlib_unix_validate_runtime_file (um, (char *) um->pidfile, &um->pidfile))) return error; if (((pidfd = open ((char *) um->pidfile, O_CREAT | O_WRONLY | O_TRUNC, 0644)) < 0)) { return clib_error_return_unix (0, "open"); } } if (!(um->flags & UNIX_FLAG_INTERACTIVE)) { openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON); clib_error_register_handler (unix_error_handler, um); if (!(um->flags & UNIX_FLAG_NODAEMON) && daemon ( /* chdir to / */ 0, /* stdin/stdout/stderr -> /dev/null */ 0) < 0) clib_error_return (0, "daemon () fails"); } if (pidfd >= 0) { u8 *lv = format (0, "%d", getpid ()); if (write (pidfd, (char *) lv, vec_len (lv)) != vec_len (lv)) { vec_free (lv); close (pidfd); return clib_error_return_unix (0, "write"); } vec_free (lv); close (pidfd); } um->unix_config_complete = 1; return 0; }
void kelog_init (elog_main_t * em, char * kernel_tracer, u32 n_events) { int enable_fd, current_tracer_fd, data_fd; int len; struct timespec ts, ts2; char *trace_enable = "/debug/tracing/tracing_enabled"; char *current_tracer = "/debug/tracing/current_tracer"; char *trace_data = "/debug/tracing/trace"; f64 realtime, monotonic; f64 freq, secs_per_clock; ASSERT (kernel_tracer); /*$$$$ fixme */ n_events = 1<<18; /* init first so we won't hurt ourselves if we bail */ elog_init (em, n_events); enable_fd = open (trace_enable, O_RDWR); if (enable_fd < 0) { clib_warning ("Couldn't open %s", trace_enable); return; } /* disable kernel tracing */ if (write (enable_fd, "0\n", 2) != 2) { clib_unix_warning ("disable tracing"); close(enable_fd); return; } /* * open + clear the data buffer. * see .../linux/kernel/trace/trace.c:tracing_open() */ data_fd = open (trace_data, O_RDWR | O_TRUNC); if (data_fd < 0) { clib_warning ("Couldn't open+clear %s", trace_data); return; } close(data_fd); /* configure tracing */ current_tracer_fd = open (current_tracer, O_RDWR); if (current_tracer_fd < 0) { clib_warning ("Couldn't open %s", current_tracer); close(enable_fd); return; } len = strlen(kernel_tracer); if (write (current_tracer_fd, kernel_tracer, len) != len) { clib_unix_warning ("configure trace"); close(current_tracer_fd); close(enable_fd); return; } close(current_tracer_fd); /* * The kernel event log uses CLOCK_MONOTONIC timestamps, * not CLOCK_REALTIME timestamps. These differ by a constant * but the constant is not available in user mode. * This estimate will be off by one syscall round-trip. */ clib_time_init (&em->cpu_timer); em->init_time.cpu = em->cpu_timer.init_cpu_time; syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts); /* enable kernel tracing */ if (write (enable_fd, "1\n", 2) != 2) { clib_unix_warning ("enable tracing"); close(enable_fd); return; } close(enable_fd); }
static void enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; struct perf_event_mmap_page *p = 0; perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; u32 index; int i, limit = 1; int cpu; if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) limit = 2; for (i = 0; i < limit; i++) { c = vec_elt_at_index (pm->single_events_to_collect, pm->current_event + i); memset (&pe, 0, sizeof (struct perf_event_attr)); pe.type = c->pe_type; pe.size = sizeof (struct perf_event_attr); pe.config = c->pe_config; pe.disabled = 1; pe.pinned = 1; /* * Note: excluding the kernel makes the * (software) context-switch counter read 0... */ if (pe.type != PERF_TYPE_SOFTWARE) { /* Exclude kernel and hypervisor */ pe.exclude_kernel = 1; pe.exclude_hv = 1; } cpu = vm->cpu_id; fd = perf_event_open (&pe, 0, cpu, -1, 0); if (fd == -1) { clib_unix_warning ("event open: type %d config %d", c->pe_type, c->pe_config); return; } if (pe.type != PERF_TYPE_SOFTWARE) { p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { clib_unix_warning ("mmap"); close (fd); return; } } else p = 0; if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) clib_unix_warning ("reset ioctl"); if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) clib_unix_warning ("enable ioctl"); /* * Software event counters - and others not capable of being * read via the "rdpmc" instruction - will be read * by system calls. */ if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) index = ~0; else index = p->index - 1; pm->rdpmc_indices[i][my_thread_index] = index; pm->perf_event_pages[i][my_thread_index] = (void *) p; pm->pm_fds[i][my_thread_index] = fd; } pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters; }
int ssvm_master_init_shm (ssvm_private_t * ssvm) { int ssvm_fd; #if USE_DLMALLOC == 0 int mh_flags = MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE; #endif clib_mem_vm_map_t mapa = { 0 }; u8 junk = 0, *ssvm_filename; ssvm_shared_header_t *sh; uword page_size, requested_va = 0; void *oldheap; if (ssvm->ssvm_size == 0) return SSVM_API_ERROR_NO_SIZE; if (CLIB_DEBUG > 1) clib_warning ("[%d] creating segment '%s'", getpid (), ssvm->name); ASSERT (vec_c_string_is_terminated (ssvm->name)); ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0); unlink ((char *) ssvm_filename); vec_free (ssvm_filename); ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR | O_CREAT | O_EXCL, 0777); if (ssvm_fd < 0) { clib_unix_warning ("create segment '%s'", ssvm->name); return SSVM_API_ERROR_CREATE_FAILURE; } if (fchmod (ssvm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0) clib_unix_warning ("ssvm segment chmod"); if (svm_get_root_rp ()) { /* TODO: is this really needed? */ svm_main_region_t *smr = svm_get_root_rp ()->data_base; if (fchown (ssvm_fd, smr->uid, smr->gid) < 0) clib_unix_warning ("ssvm segment chown"); } if (lseek (ssvm_fd, ssvm->ssvm_size, SEEK_SET) < 0) { clib_unix_warning ("lseek"); close (ssvm_fd); return SSVM_API_ERROR_SET_SIZE; } if (write (ssvm_fd, &junk, 1) != 1) { clib_unix_warning ("set ssvm size"); close (ssvm_fd); return SSVM_API_ERROR_SET_SIZE; } page_size = clib_mem_get_fd_page_size (ssvm_fd); if (ssvm->requested_va) { requested_va = ssvm->requested_va; clib_mem_vm_randomize_va (&requested_va, min_log2 (page_size)); } mapa.requested_va = requested_va; mapa.size = ssvm->ssvm_size; mapa.fd = ssvm_fd; if (clib_mem_vm_ext_map (&mapa)) { clib_unix_warning ("mmap"); close (ssvm_fd); return SSVM_API_ERROR_MMAP; } close (ssvm_fd); sh = mapa.addr; sh->master_pid = ssvm->my_pid; sh->ssvm_size = ssvm->ssvm_size; sh->ssvm_va = pointer_to_uword (sh); sh->type = SSVM_SEGMENT_SHM; #if USE_DLMALLOC == 0 sh->heap = mheap_alloc_with_flags (((u8 *) sh) + page_size, ssvm->ssvm_size - page_size, mh_flags); #else sh->heap = create_mspace_with_base (((u8 *) sh) + page_size, ssvm->ssvm_size - page_size, 1 /* locked */ ); mspace_disable_expand (sh->heap); #endif oldheap = ssvm_push_heap (sh); sh->name = format (0, "%s", ssvm->name, 0); ssvm_pop_heap (oldheap); ssvm->sh = sh; ssvm->my_pid = getpid (); ssvm->i_am_master = 1; /* The application has to set set sh->ready... */ return 0; }
int ssvm_slave_init_shm (ssvm_private_t * ssvm) { struct stat stat; int ssvm_fd = -1; ssvm_shared_header_t *sh; ASSERT (vec_c_string_is_terminated (ssvm->name)); ssvm->i_am_master = 0; while (ssvm->attach_timeout-- > 0) { if (ssvm_fd < 0) ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR, 0777); if (ssvm_fd < 0) { sleep (1); continue; } if (fstat (ssvm_fd, &stat) < 0) { sleep (1); continue; } if (stat.st_size > 0) goto map_it; } clib_warning ("slave timeout"); return SSVM_API_ERROR_SLAVE_TIMEOUT; map_it: sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, ssvm_fd, 0); if (sh == MAP_FAILED) { clib_unix_warning ("slave research mmap"); close (ssvm_fd); return SSVM_API_ERROR_MMAP; } while (ssvm->attach_timeout-- > 0) { if (sh->ready) goto re_map_it; } close (ssvm_fd); munmap (sh, MMAP_PAGESIZE); clib_warning ("slave timeout 2"); return SSVM_API_ERROR_SLAVE_TIMEOUT; re_map_it: ssvm->requested_va = sh->ssvm_va; ssvm->ssvm_size = sh->ssvm_size; munmap (sh, MMAP_PAGESIZE); sh = ssvm->sh = (void *) mmap ((void *) ssvm->requested_va, ssvm->ssvm_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, ssvm_fd, 0); if (sh == MAP_FAILED) { clib_unix_warning ("slave final mmap"); close (ssvm_fd); return SSVM_API_ERROR_MMAP; } sh->slave_pid = getpid (); return 0; }