static int _stp_data_open_trace(struct inode *inode, struct file *file) { struct _stp_iterator *iter = inode->i_private; #ifdef STP_BULKMODE int cpu_file = iter->cpu_file; #endif /* We only allow for one reader per cpu */ dbug_trans(1, "trace attach\n"); #ifdef STP_BULKMODE if (!cpumask_test_cpu(cpu_file, _stp_relay_data.trace_reader_cpumask)) cpumask_set_cpu(cpu_file, _stp_relay_data.trace_reader_cpumask); else { dbug_trans(1, "returning EBUSY\n"); return -EBUSY; } #else if (!cpumask_empty(_stp_relay_data.trace_reader_cpumask)) { dbug_trans(1, "returning EBUSY\n"); return -EBUSY; } cpumask_setall(_stp_relay_data.trace_reader_cpumask); #endif file->private_data = inode->i_private; return 0; }
/** * _stp_transport_close - close ctl and relayfs channels * * This is called automatically when the module is unloaded. * */ static void _stp_transport_close(void) { dbug_trans(1, "%d: ************** transport_close *************\n", current->pid); _stp_cleanup_and_exit(0); _stp_unregister_ctl_channel(); _stp_transport_fs_close(); _stp_print_cleanup(); /* free print buffers */ _stp_mem_debug_done(); dbug_trans(1, "---- CLOSED ----\n"); }
static void _stp_request_exit(void) { static int called = 0; if (!called) { /* we only want to do this once */ called = 1; dbug_trans(1, "ctl_send STP_REQUEST_EXIT\n"); /* Called from the timer when _stp_exit_flag has been been set. So safe to immediately notify any readers. */ _stp_ctl_send_notify(STP_REQUEST_EXIT, NULL, 0); dbug_trans(1, "done with ctl_send STP_REQUEST_EXIT\n"); } }
static int __stp_alloc_ring_buffer(void) { int i; unsigned long buffer_size = _stp_bufsize * 1024 * 1024; if (!alloc_cpumask_var(&_stp_relay_data.trace_reader_cpumask, (GFP_KERNEL & ~__GFP_WAIT))) goto fail; cpumask_clear(_stp_relay_data.trace_reader_cpumask); if (buffer_size == 0) { dbug_trans(1, "using default buffer size...\n"); buffer_size = _stp_nsubbufs * _stp_subbuf_size; } dbug_trans(1, "using buffer size %lu...\n", buffer_size); /* The number passed to ring_buffer_alloc() is per cpu. Our * 'buffer_size' is a total number of bytes to allocate. So, * we need to divide buffer_size by the number of cpus. */ buffer_size /= num_online_cpus(); dbug_trans(1, "%lu\n", buffer_size); _stp_relay_data.rb = ring_buffer_alloc(buffer_size, 0); if (!_stp_relay_data.rb) goto fail; /* Increment _stp_allocated_memory and _stp_allocated_net_memory to approximately account for buffers allocated by ring_buffer_alloc. */ { #ifndef DIV_ROUND_UP #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #endif u64 relay_pages; relay_pages = DIV_ROUND_UP (buffer_size, PAGE_SIZE); if (relay_pages < 2) relay_pages = 2; relay_pages *= num_online_cpus(); _stp_allocated_net_memory += relay_pages * PAGE_SIZE; _stp_allocated_memory += relay_pages * PAGE_SIZE; } dbug_trans(0, "size = %lu\n", ring_buffer_size(_stp_relay_data.rb)); return 0; fail: __stp_free_ring_buffer(); return -ENOMEM; }
size_t _stp_event_to_user(struct ring_buffer_event *event, char __user *ubuf, size_t cnt) { int ret; struct _stp_data_entry *entry; dbug_trans(1, "event(%p), ubuf(%p), cnt(%lu)\n", event, ubuf, cnt); if (event == NULL || ubuf == NULL) { dbug_trans(1, "returning -EFAULT(1)\n"); return -EFAULT; } entry = ring_buffer_event_data(event); if (entry == NULL) { dbug_trans(1, "returning -EFAULT(2)\n"); return -EFAULT; } /* We don't do partial entries - just fail. */ if (entry->len > cnt) { dbug_trans(1, "returning -EBUSY\n"); return -EBUSY; } #if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2) { char *last = entry->buf + (entry->len - 5); dbug_trans2("copying %.5s...%.5s\n", entry->buf, last); } #endif if (cnt > entry->len) cnt = entry->len; ret = copy_to_user(ubuf, entry->buf, cnt); if (ret) { dbug_trans(1, "returning -EFAULT(3)\n"); return -EFAULT; } return cnt; }
/* * Called when stapio opens the control channel. */ static void _stp_attach(void) { dbug_trans(1, "attach\n"); _stp_pid = current->pid; _stp_transport_data_fs_overwrite(0); init_timer(&_stp_ctl_work_timer); _stp_ctl_work_timer.expires = jiffies + STP_CTL_TIMER_INTERVAL; _stp_ctl_work_timer.function = _stp_ctl_work_callback; _stp_ctl_work_timer.data= 0; add_timer(&_stp_ctl_work_timer); }
/* * Called when stapio closes the control channel. */ static void _stp_detach(void) { dbug_trans(1, "detach\n"); _stp_pid = 0; if (!_stp_exit_flag) _stp_transport_data_fs_overwrite(1); del_timer_sync(&_stp_ctl_work_timer); wake_up_interruptible(&_stp_ctl_wq); }
static void _stp_handle_start(struct _stp_msg_start *st) { int handle_startup; mutex_lock(&_stp_transport_mutex); handle_startup = (! _stp_start_called && ! _stp_exit_called); _stp_start_called = 1; mutex_unlock(&_stp_transport_mutex); if (handle_startup) { dbug_trans(1, "stp_handle_start\n"); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) // linux commit #5f4352fb #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) // linux commit #9be260a6 #ifdef STAPCONF_VM_AREA { /* PR9740: workaround for kernel valloc bug. */ /* PR14611: not required except within above kernel range. */ void *dummy; #ifdef STAPCONF_VM_AREA_PTE dummy = alloc_vm_area (PAGE_SIZE, NULL); #else dummy = alloc_vm_area (PAGE_SIZE); #endif free_vm_area (dummy); } #endif #endif #endif _stp_target = st->target; st->res = systemtap_module_init(); if (st->res == 0) _stp_probes_started = 1; /* Register the module notifier. */ if (!_stp_module_notifier_active) { int rc = register_module_notifier(& _stp_module_notifier_nb); if (rc == 0) _stp_module_notifier_active = 1; else _stp_warn ("Cannot register module notifier (%d)\n", rc); } /* Called from the user context in response to a proc file write (in _stp_ctl_write_cmd), so may notify the reader directly. */ _stp_ctl_send_notify(STP_START, st, sizeof(*st)); /* Register the panic notifier. */ #if STP_TRANSPORT_VERSION == 2 atomic_notifier_chain_register(&panic_notifier_list, &_stp_module_panic_notifier_nb); #endif } }
static ssize_t _stp_tracing_wait_pipe(struct file *filp) { struct _stp_iterator *iter = filp->private_data; if (atomic_read(&iter->nr_events) == 0) { if ((filp->f_flags & O_NONBLOCK)) { dbug_trans(1, "returning -EAGAIN\n"); return -EAGAIN; } if (signal_pending(current)) { dbug_trans(1, "returning -EINTR\n"); return -EINTR; } dbug_trans(1, "returning 0\n"); return 0; } dbug_trans(1, "returning 1\n"); return 1; }
static int _stp_data_release_trace(struct inode *inode, struct file *file) { struct _stp_iterator *iter = inode->i_private; dbug_trans(1, "trace detach\n"); #ifdef STP_BULKMODE cpumask_clear_cpu(iter->cpu_file, _stp_relay_data.trace_reader_cpumask); #else cpumask_clear(_stp_relay_data.trace_reader_cpumask); #endif return 0; }
static void _stp_unregister_ctl_channel_fs(void) { #ifdef STP_BULKMODE char buf[32]; int i; struct proc_dir_entry *de; dbug_trans(1, "unregistering procfs\n"); for (de = _stp_proc_root->subdir; de; de = de->next) _stp_kfree(de->data); stp_for_each_cpu(i) { sprintf(buf, "%d", i); remove_proc_entry(buf, _stp_proc_root); } remove_proc_entry("bufsize", _stp_proc_root); #endif /* STP_BULKMODE */ remove_proc_entry(".cmd", _stp_proc_root); _stp_rmdir_proc_module(); }
static void _stp_buffer_iter_finish(struct _stp_iterator *iter) { #ifdef STP_BULKMODE int cpu_file = iter->cpu_file; if (iter->buffer_iter[cpu_file]) { ring_buffer_read_finish(iter->buffer_iter[cpu_file]); iter->buffer_iter[cpu_file] = NULL; } #else int cpu; for_each_possible_cpu(cpu) { if (iter->buffer_iter[cpu]) { ring_buffer_read_finish(iter->buffer_iter[cpu]); iter->buffer_iter[cpu] = NULL; } } #endif dbug_trans(0, "iterator(s) finished\n"); }
/** * _stp_transport_init() is called from the module initialization. * It does the bare minimum to exchange commands with staprun */ static int _stp_transport_init(void) { dbug_trans(1, "transport_init\n"); #ifdef STAPCONF_TASK_UID _stp_uid = current->uid; _stp_gid = current->gid; #else #ifdef CONFIG_UIDGID_STRICT_TYPE_CHECKS _stp_uid = from_kuid_munged(current_user_ns(), current_uid()); _stp_gid = from_kgid_munged(current_user_ns(), current_gid()); #else _stp_uid = current_uid(); _stp_gid = current_gid(); #endif #endif /* PR13489, missing inode-uprobes symbol-export workaround */ #if !defined(STAPCONF_TASK_USER_REGSET_VIEW_EXPORTED) && !defined(STAPCONF_UTRACE_REGSET) /* RHEL5 era utrace */ kallsyms_task_user_regset_view = (void*) kallsyms_lookup_name ("task_user_regset_view"); /* There exist interesting kernel versions without task_user_regset_view(), like ARM before 3.0. For these kernels, uprobes etc. are out of the question, but plain kernel stap works fine. All we have to accomplish is have the loc2c runtime code compile. For that, it's enough to leave this pointer zero. */ if (kallsyms_task_user_regset_view == NULL) { ; } #endif #if defined(CONFIG_UPROBES) // i.e., kernel-embedded uprobes #if !defined(STAPCONF_UPROBE_REGISTER_EXPORTED) kallsyms_uprobe_register = (void*) kallsyms_lookup_name ("uprobe_register"); if (kallsyms_uprobe_register == NULL) { kallsyms_uprobe_register = (void*) kallsyms_lookup_name ("register_uprobe"); } if (kallsyms_uprobe_register == NULL) { printk(KERN_ERR "%s can't resolve uprobe_register!", THIS_MODULE->name); goto err0; } #endif #if !defined(STAPCONF_UPROBE_UNREGISTER_EXPORTED) kallsyms_uprobe_unregister = (void*) kallsyms_lookup_name ("uprobe_unregister"); if (kallsyms_uprobe_unregister == NULL) { kallsyms_uprobe_unregister = (void*) kallsyms_lookup_name ("unregister_uprobe"); } if (kallsyms_uprobe_unregister == NULL) { printk(KERN_ERR "%s can't resolve uprobe_unregister!", THIS_MODULE->name); goto err0; } #endif #if !defined(STAPCONF_UPROBE_GET_SWBP_ADDR_EXPORTED) kallsyms_uprobe_get_swbp_addr = (void*) kallsyms_lookup_name ("uprobe_get_swbp_addr"); if (kallsyms_uprobe_get_swbp_addr == NULL) { printk(KERN_ERR "%s can't resolve uprobe_get_swbp_addr!", THIS_MODULE->name); goto err0; } #endif #endif #ifdef RELAY_GUEST /* Guest scripts use relay only for reporting warnings and errors */ _stp_subbuf_size = 65536; _stp_nsubbufs = 2; #endif if (_stp_bufsize) { unsigned size = _stp_bufsize * 1024 * 1024; _stp_subbuf_size = 65536; while (size / _stp_subbuf_size > 64 && _stp_subbuf_size < 1024 * 1024) { _stp_subbuf_size <<= 1; } _stp_nsubbufs = size / _stp_subbuf_size; dbug_trans(1, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size); } if (_stp_transport_fs_init(THIS_MODULE->name) != 0) goto err0; /* create control channel */ if (_stp_register_ctl_channel() < 0) goto err1; /* create print buffers */ if (_stp_print_init() < 0) goto err2; /* start transport */ _stp_transport_data_fs_start(); /* Signal stapio to send us STP_START back. This is an historic convention. This was called STP_TRANSPORT_INFO and had a payload that described the transport buffering, this is no longer the case. Called during module initialization time, so safe to immediately notify reader we are ready. */ _stp_ctl_send_notify(STP_TRANSPORT, NULL, 0); dbug_trans(1, "returning 0...\n"); return 0; err3: _stp_print_cleanup(); err2: _stp_unregister_ctl_channel(); err1: _stp_transport_fs_close(); err0: return -1; }
/* when someone does /sbin/rmmod on a loaded systemtap module. */ static void _stp_cleanup_and_exit(int send_exit) { int handle_exit; int start_finished; mutex_lock(&_stp_transport_mutex); handle_exit = (_stp_start_called && ! _stp_exit_called); _stp_exit_called = 1; mutex_unlock(&_stp_transport_mutex); /* Note, we can be sure that the startup sequence has finished if handle_exit is true because it depends on _stp_start_called being set to true. _stp_start_called can only be set to true in _stp_handle_start() in response to a _STP_START message on the control channel. Only one writer can have the control channel open at a time, so the whole startup sequence in _stp_handle_start() has to be completed before another message can be send. _stp_cleanup_and_exit() can only be called through either a _STP_EXIT message, which cannot arrive while _STP_START is still being handled, or when the module is unloaded. The module can only be unloaded when there are no more users that keep the control channel open. */ if (handle_exit) { int failures; /* Unregister the module notifier. */ if (_stp_module_notifier_active) { _stp_module_notifier_active = 0; (void) unregister_module_notifier(& _stp_module_notifier_nb); /* -ENOENT is possible, if we were not already registered */ } dbug_trans(1, "cleanup_and_exit (%d)\n", send_exit); _stp_exit_flag = 1; if (_stp_probes_started) { dbug_trans(1, "calling systemtap_module_exit\n"); /* tell the stap-generated code to unload its probes, etc */ systemtap_module_exit(); dbug_trans(1, "done with systemtap_module_exit\n"); } failures = atomic_read(&_stp_transport_failures); if (failures) _stp_warn("There were %d transport failures.\n", failures); dbug_trans(1, "*** calling _stp_transport_data_fs_stop ***\n"); _stp_transport_data_fs_stop(); dbug_trans(1, "ctl_send STP_EXIT\n"); if (send_exit) { /* send_exit is only set to one if called from _stp_ctl_write_cmd() in response to a write to the proc cmd file, so in user context. It is safe to immediately notify the reader. */ _stp_ctl_send_notify(STP_EXIT, NULL, 0); } dbug_trans(1, "done with ctl_send STP_EXIT\n"); /* Unregister the panic notifier. */ #if STP_TRANSPORT_VERSION == 2 atomic_notifier_chain_unregister(&panic_notifier_list, &_stp_module_panic_notifier_nb); #endif } }