/** * rvt_driver_cq_init - Init cq resources on behalf of driver * @rdi: rvt dev structure * * Return: 0 on success */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { int ret = 0; int cpu; struct task_struct *task; if (rdi->worker) return 0; spin_lock_init(&rdi->n_cqs_lock); rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); if (!rdi->worker) return -ENOMEM; init_kthread_worker(rdi->worker); task = kthread_create_on_node( kthread_worker_fn, rdi->worker, rdi->dparms.node, "%s", rdi->dparms.cq_name); if (IS_ERR(task)) { kfree(rdi->worker); rdi->worker = NULL; return PTR_ERR(task); } set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); kthread_bind(task, cpu); wake_up_process(task); return ret; }
/* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv * has been created using svc_create_pooled(). * * Based on code that used to be in nfsd_svc() but tweaked * to be pool-aware. */ int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { struct svc_rqst *rqstp; struct task_struct *task; struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; int node; if (pool == NULL) { /* The -1 assumes caller has done a svc_get() */ nrservs -= (serv->sv_nrthreads-1); } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; spin_unlock_bh(&pool->sp_lock); } /* create new threads */ while (nrservs > 0) { nrservs--; chosen_pool = choose_pool(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) { error = PTR_ERR(rqstp); break; } __module_get(serv->sv_ops->svo_module); task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); break; } rqstp->rq_task = task; if (serv->sv_nrpools > 1) svc_pool_map_set_cpumask(task, chosen_pool->sp_id); svc_sock_update_bufs(serv); wake_up_process(task); } /* destroy old threads */ while (nrservs < 0 && (task = choose_victim(serv, pool, &state)) != NULL) { send_sig(SIGINT, task, 1); nrservs++; } return error; }
static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, int cpu) { struct ehca_cpu_comp_task *cct; cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); spin_lock_init(&cct->task_lock); INIT_LIST_HEAD(&cct->cq_list); init_waitqueue_head(&cct->wait_queue); cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), "ehca_comp/%d", cpu); return cct->task; }
static int __init ts_test_init(void) { struct task_struct *test_task; TS_LOG_INFO("ts_test_init called here\n"); return 0; test_task = kthread_create_on_node(ts_test_thread, NULL, cpu_to_node(2), "ts_test_thread"); if (IS_ERR(test_task)) TS_LOG_ERR("create ts_thread failed\n"); else wake_up_process(test_task); return 0; }
/** * kthread_create_on_cpu - Create a cpu bound kthread * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. * @cpu: The cpu on which the thread should be bound, * @namefmt: printf-style name for the thread. Format is restricted * to "name.*%u". Code fills in cpu number. * * Description: This helper function creates and names a kernel thread * The thread will be woken and put into park mode. */ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), void *data, unsigned int cpu, const char *namefmt) { struct task_struct *p; p = kthread_create_on_node(threadfn, data, cpu_to_mem(cpu), namefmt, cpu); if (IS_ERR(p)) return p; set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); to_kthread(p)->cpu = cpu; /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */ kthread_park(p); return p; }
int pfq_start_all_tx_threads(void) { int err = 0; if (tx_thread_nr) { int n; printk(KERN_INFO "[PFQ] starting %d Tx thread(s)...\n", tx_thread_nr); for(n = 0; n < tx_thread_nr; n++) { struct pfq_thread_tx_data *data = &pfq_thread_tx_pool[n]; data->id = n; data->cpu = tx_affinity[n]; data->node = cpu_online(tx_affinity[n]) ? cpu_to_node(tx_affinity[n]) : NUMA_NO_NODE; data->task = kthread_create_on_node(pfq_tx_thread, data, data->node, "kpfq/%d:%d", n, data->cpu); if (IS_ERR(data->task)) { printk(KERN_INFO "[PFQ] kernel_thread: create failed on cpu %d!\n", data->cpu); err = PTR_ERR(data->task); data->task = NULL; return err; } kthread_bind(data->task, data->cpu); pr_devel("[PFQ] created Tx[%d] kthread on cpu %d...\n", data->id, data->cpu); wake_up_process(data->task); } } return err; }
static int clamp_thread(void *arg) { int cpunr = (unsigned long)arg; DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; unsigned int count = 0; unsigned int target_ratio; set_bit(cpunr, cpu_clamping_mask); set_freezable(); init_timer_on_stack(&wakeup_timer); sched_setscheduler(current, SCHED_FIFO, ¶m); while (true == clamping && !kthread_should_stop() && cpu_online(cpunr)) { int sleeptime; unsigned long target_jiffies; unsigned int guard; unsigned int compensated_ratio; int interval; /* jiffies to sleep for each attempt */ unsigned int duration_jiffies = msecs_to_jiffies(duration); unsigned int window_size_now; try_to_freeze(); /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ target_ratio = set_target_ratio; guard = 1 + target_ratio/20; window_size_now = window_size; count++; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ compensated_ratio = target_ratio + get_compensation(target_ratio); if (compensated_ratio <= 0) compensated_ratio = 1; interval = duration_jiffies * 100 / compensated_ratio; /* align idle time */ target_jiffies = roundup(jiffies, interval); sleeptime = target_jiffies - jiffies; if (sleeptime <= 0) sleeptime = 1; schedule_timeout_interruptible(sleeptime); /* * only elected controlling cpu can collect stats and update * control parameters. */ if (cpunr == control_cpu && !(count%window_size_now)) { should_skip = powerclamp_adjust_controls(target_ratio, guard, window_size_now); smp_mb(); } if (should_skip) continue; target_jiffies = jiffies + duration_jiffies; mod_timer(&wakeup_timer, target_jiffies); if (unlikely(local_softirq_pending())) continue; /* * stop tick sched during idle time, interrupts are still * allowed. thus jiffies are updated properly. */ preempt_disable(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; unsigned long eax = target_mwait; /* * REVISIT: may call enter_idle() to notify drivers who * can save power during cpu idle. same for exit_idle() */ local_touch_nmi(); stop_critical_timings(); mwait_idle_with_hints(eax, ecx); start_critical_timings(); atomic_inc(&idle_wakeup_counter); } preempt_enable(); } del_timer_sync(&wakeup_timer); clear_bit(cpunr, cpu_clamping_mask); return 0; } /* * 1 HZ polling while clamping is active, useful for userspace * to monitor actual idle ratio. */ static void poll_pkg_cstate(struct work_struct *dummy); static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); static void poll_pkg_cstate(struct work_struct *dummy) { static u64 msr_last; static u64 tsc_last; static unsigned long jiffies_last; u64 msr_now; unsigned long jiffies_now; u64 tsc_now; u64 val64; msr_now = pkg_state_counter(); tsc_now = rdtsc(); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) pkg_cstate_ratio_cur = 1; else { if (tsc_now - tsc_last) { val64 = 100 * (msr_now - msr_last); do_div(val64, (tsc_now - tsc_last)); pkg_cstate_ratio_cur = val64; } } /* update record */ msr_last = msr_now; jiffies_last = jiffies_now; tsc_last = tsc_now; if (true == clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); } static int start_power_clamp(void) { unsigned long cpu; struct task_struct *thread; set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); /* prevent cpu hotplug */ get_online_cpus(); /* prefer BSP */ control_cpu = 0; if (!cpu_online(control_cpu)) control_cpu = smp_processor_id(); clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); /* start one thread per online cpu */ for_each_online_cpu(cpu) { struct task_struct **p = per_cpu_ptr(powerclamp_thread, cpu); thread = kthread_create_on_node(clamp_thread, (void *) cpu, cpu_to_node(cpu), "kidle_inject/%ld", cpu); /* bind to cpu here */ if (likely(!IS_ERR(thread))) { kthread_bind(thread, cpu); wake_up_process(thread); *p = thread; } } put_online_cpus(); return 0; }
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; DECLARE_WAIT_QUEUE_HEAD(wq); if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) return -ENOLINK; if (perf->perf_threads == 0) return -EINVAL; if (!mutex_trylock(&perf->run_mutex)) return -EBUSY; perf_clear_thread_status(perf); if (perf->perf_threads > MAX_THREADS) { perf->perf_threads = MAX_THREADS; pr_info("Reset total threads to: %u\n", MAX_THREADS); } /* no greater than 1M */ if (seg_order > MAX_SEG_ORDER) { seg_order = MAX_SEG_ORDER; pr_info("Fix seg_order to %u\n", seg_order); } if (run_order < seg_order) { run_order = seg_order; pr_info("Fix run_order to %u\n", run_order); } node = dev_to_node(&perf->ntb->pdev->dev); atomic_set(&perf->tdone, 0); /* launch kernel thread */ for (i = 0; i < perf->perf_threads; i++) { struct pthr_ctx *pctx; pctx = &perf->pthr_ctx[i]; atomic_set(&pctx->dma_sync, 0); pctx->perf = perf; pctx->wq = &wq; pctx->thread = kthread_create_on_node(ntb_perf_thread, (void *)pctx, node, "ntb_perf %d", i); if (IS_ERR(pctx->thread)) { pctx->thread = NULL; goto err; } else { wake_up_process(pctx->thread); } } wait_event_interruptible(wq, atomic_read(&perf->tdone) == perf->perf_threads); threads_cleanup(perf); mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); mutex_unlock(&perf->run_mutex); return -ENXIO; }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); struct pfq_rx_opt * ro; struct pfq_tx_opt * to; bool found = true; if (so == NULL) return -EINVAL; ro = &so->rx_opt; to = &so->tx_opt; switch(optname) { case Q_SO_TOGGLE_QUEUE: { int active; if (optlen != sizeof(active)) return -EINVAL; if (copy_from_user(&active, optval, optlen)) return -EFAULT; if (active) { if (!so->mem_addr) { struct pfq_queue_hdr * queue; /* alloc queue memory */ if (pfq_shared_queue_alloc(so, pfq_queue_total_mem(so)) < 0) { return -ENOMEM; } /* so->mem_addr and so->mem_size are correctly configured */ /* initialize queues headers */ queue = (struct pfq_queue_hdr *)so->mem_addr; /* initialize rx queue header */ queue->rx.data = (1L << 24); queue->rx.poll_wait = 0; queue->rx.size = so->rx_opt.size; queue->rx.slot_size = so->rx_opt.slot_size; queue->tx.producer.index = 0; queue->tx.producer.cache = 0; queue->tx.consumer.index = 0; queue->tx.consumer.cache = 0; queue->tx.size_mask = so->tx_opt.size - 1; queue->tx.max_len = so->tx_opt.maxlen; queue->tx.size = so->tx_opt.size; queue->tx.slot_size = so->tx_opt.slot_size; /* update the queues base_addr */ so->rx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr); so->tx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr) + pfq_queue_mpdb_mem(so); /* commit both the queues */ smp_wmb(); so->rx_opt.queue_ptr = &queue->rx; so->tx_opt.queue_ptr = &queue->tx; pr_devel("[PFQ|%d] queue: rx_size:%d rx_slot_size:%d tx_size:%d tx_slot_size:%d\n", so->id, queue->rx.size, queue->rx.slot_size, queue->tx.size, queue->tx.slot_size); } } else { if (so->tx_opt.thread) { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); kthread_stop(so->tx_opt.thread); so->tx_opt.thread = NULL; } msleep(Q_GRACE_PERIOD); pfq_shared_queue_free(so); } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "add binding"); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "remove binding"); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: if_index:%d hw_queue:%d\n", so->id, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; /* update the timestamp_enabled counter */ atomic_add(tstamp - so->rx_opt.tstamp, ×tamp_enabled); so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp_enabled counter: %d\n", so->id, atomic_read(×tamp_enabled)); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { pr_devel("[PFQ|%d] invalid caplen:%zu (max: %d)\n", so->id, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen:%zu -> slot_size:%zu\n", so->id, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)rx_queue_slots) { pr_devel("[PFQ|%d] invalid rx slots:%zu (max: %d)\n", so->id, slots, rx_queue_slots); return -EPERM; } so->rx_opt.size = slots; pr_devel("[PFQ|%d] rx_queue_slots:%zu\n", so->id, so->rx_opt.size); } break; case Q_SO_SET_TX_MAXLEN: { typeof (so->tx_opt.maxlen) maxlen; if (optlen != sizeof(maxlen)) return -EINVAL; if (copy_from_user(&maxlen, optval, optlen)) return -EFAULT; if (maxlen > (size_t)max_len) { pr_devel("[PFQ|%d] invalid maxlen:%zu (max: %d)\n", so->id, maxlen, max_len); return -EPERM; } so->tx_opt.maxlen = maxlen; so->tx_opt.slot_size = SPSC_QUEUE_SLOT_SIZE(so->tx_opt.maxlen); /* max_len: max length */ pr_devel("[PFQ|%d] tx_slot_size:%zu\n", so->id, so->rx_opt.slot_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots & (slots-1)) { pr_devel("[PFQ|%d] tx slots must be a power of two.\n", so->id); return -EINVAL; } if (slots > (size_t)tx_queue_slots) { pr_devel("[PFQ|%d] invalid tx slots:%zu (max: %d)\n", so->id, slots, tx_queue_slots); return -EPERM; } so->tx_opt.size = slots; pr_devel("[PFQ|%d] tx_queue_slots:%zu\n", so->id, so->tx_opt.size); } break; case Q_SO_GROUP_LEAVE: { int gid; if (optlen != sizeof(gid)) return -EINVAL; if (copy_from_user(&gid, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) { return -EFAULT; } pr_devel("[PFQ|%d] leave: gid:%d\n", so->id, gid); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, fprog.gid, "group fprog"); if (fprog.fcode.len > 0) /* set the filter */ { struct sk_filter *filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { pr_devel("[PFQ|%d] fprog error: alloc_sk_filter for gid:%d\n", so->id, fprog.gid); return -EINVAL; } __pfq_set_group_filter(fprog.gid, filter); pr_devel("[PFQ|%d] fprog: gid:%d (fprog len %d bytes)\n", so->id, fprog.gid, fprog.fcode.len); } else /* reset the filter */ { __pfq_set_group_filter(fprog.gid, NULL); pr_devel("[PFQ|%d] fprog: gid:%d (resetting filter)\n", so->id, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, vlan.gid, "group vlan filt toggle"); __pfq_toggle_group_vlan_filters(vlan.gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid:%d\n", so->id, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, filt.gid, "group vlan filt"); if (filt.vid < -1 || filt.vid > 4094) { pr_devel("[PFQ|%d] vlan_set error: gid:%d invalid vid:%d!\n", so->id, filt.gid, filt.vid); return -EINVAL; } if (!__pfq_vlan_filters_enabled(filt.gid)) { pr_devel("[PFQ|%d] vlan_set error: vlan filters disabled for gid:%d!\n", so->id, filt.gid); return -EPERM; } if (filt.vid == -1) /* any */ { int i; for(i = 1; i < 4095; i++) __pfq_set_group_vlan_filter(filt.gid, filt.toggle, i); } else { __pfq_set_group_vlan_filter(filt.gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan_set filter vid %d for gid:%d\n", so->id, filt.vid, filt.gid); } break; case Q_SO_TX_THREAD_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } to->if_index = info.if_index; to->hw_queue = info.hw_queue; pr_devel("[PFQ|%d] TX bind: if_index:%d hw_queue:%d\n", so->id, to->if_index, to->hw_queue); } break; case Q_SO_TX_THREAD_START: { int cpu; if (to->thread) { pr_devel("[PFQ|%d] TX thread already created on cpu %d!\n", so->id, to->cpu); return -EPERM; } if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } if (optlen != sizeof(cpu)) return -EINVAL; if (copy_from_user(&cpu, optval, optlen)) return -EFAULT; if (cpu < -1 || (cpu > -1 && !cpu_online(cpu))) { pr_devel("[PFQ|%d] invalid cpu (%d)!\n", so->id, cpu); return -EPERM; } to->cpu = cpu; pr_devel("[PFQ|%d] creating TX thread on cpu %d -> if_index:%d hw_queue:%d\n", so->id, to->cpu, to->if_index, to->hw_queue); to->thread = kthread_create_on_node(pfq_tx_thread, so, to->cpu == -1 ? -1 : cpu_to_node(to->cpu), "pfq_tx_%d", so->id); if (IS_ERR(to->thread)) { printk(KERN_INFO "[PFQ] kernel_thread() create failed on cpu %d!\n", to->cpu); return PTR_ERR(to->thread); } if (to->cpu != -1) kthread_bind(to->thread, to->cpu); } break; case Q_SO_TX_THREAD_STOP: { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } kthread_stop(to->thread); to->thread = NULL; pr_devel("[PFQ|%d] stop TX thread: done.\n", so->id); } break; case Q_SO_TX_THREAD_WAKEUP: { if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } wake_up_process(to->thread); } break; case Q_SO_TX_QUEUE_FLUSH: { struct net_device *dev; if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->thread && to->thread->state == TASK_RUNNING) { pr_devel("[PFQ|%d] TX thread is running!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } dev = dev_get_by_index(sock_net(&so->sk), to->if_index); if (!dev) { pr_devel("[PFQ|%d] No such device (if_index = %d)\n", so->id, to->if_index); return -EPERM; } pfq_tx_queue_flush(to, dev, get_cpu(), NUMA_NO_NODE); put_cpu(); dev_put(dev); } break; case Q_SO_GROUP_FUNCTION: { struct pfq_group_computation tmp; struct pfq_computation_descr *descr; size_t psize, ucsize; struct pfq_computation_tree *comp; void *context; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, tmp.gid, "group computation"); if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { pr_devel("[PFQ|%d] computation: out of memory!\n", so->id); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { pr_devel("[PFQ|%d] computation: copy_from_user error!\n", so->id); kfree(descr); return -EFAULT; } /* print user computation */ pr_devel_computation_descr(descr); /* ensure the correctness of the specified functional computation */ if (pfq_validate_computation_descr(descr) < 0) { pr_devel("[PFQ|%d] invalid expression!\n", so->id); return -EFAULT; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { pr_devel("[PFQ|%d] context: alloc error!\n", so->id); kfree(descr); return -EFAULT; } /* allocate struct pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { pr_devel("[PFQ|%d] computation: alloc error!\n", so->id); kfree(context); kfree(descr); return -EFAULT; } /* link the functional computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { pr_devel("[PFQ|%d] computation aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* exec init functions */ if (pfq_computation_init(comp) < 0) { pr_devel("[PFQ|%d] computation initialization aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* set the new program */ if (pfq_set_group_prog(tmp.gid, comp, context) < 0) { pr_devel("[PFQ|%d] set group program error!\n", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } kfree(descr); return 0; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); bool found = true; if (so == NULL) return -EINVAL; switch(optname) { case Q_SO_ENABLE: { unsigned long addr; int err = 0; if (optlen != sizeof(addr)) return -EINVAL; if (copy_from_user(&addr, optval, optlen)) return -EFAULT; err = pfq_shared_queue_enable(so, addr); if (err < 0) { printk(KERN_INFO "[PFQ|%d] enable error!\n", so->id.value); return err; } return 0; } break; case Q_SO_DISABLE: { int err = 0; size_t n; for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } err = pfq_shared_queue_disable(so); if (err < 0) { printk(KERN_INFO "[PFQ|%d] disable error!\n", so->id.value); return err; } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] add bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] bind: invalid if_index=%d!\n", so->id.value, bind.if_index); return -EACCES; } rcu_read_unlock(); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] remove bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] unbind: invalid if_index=%d\n", so->id.value, bind.if_index); return -EPERM; } rcu_read_unlock(); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] egress bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } so->egress_type = pfq_endpoint_device; so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: device if_index=%d hw_queue=%d\n", so->id.value, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_type = pfq_endpoint_socket; so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id.value); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp enabled.\n", so->id.value); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { printk(KERN_INFO "[PFQ|%d] invalid caplen=%zu (max %d)\n", so->id.value, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = Q_MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen=%zu, slot_size=%zu\n", so->id.value, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Rx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->rx_opt.queue_size = slots; pr_devel("[PFQ|%d] rx_queue slots=%zu\n", so->id.value, so->rx_opt.queue_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Tx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->tx_opt.queue_size = slots; pr_devel("[PFQ|%d] tx_queue slots=%zu\n", so->id.value, so->tx_opt.queue_size); } break; case Q_SO_GROUP_LEAVE: { pfq_gid_t gid; if (optlen != sizeof(gid.value)) return -EINVAL; if (copy_from_user(&gid.value, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) return -EFAULT; pr_devel("[PFQ|%d] leave: gid=%d\n", so->id.value, gid.value); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; pfq_gid_t gid; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; gid.value = fprog.gid; if (!pfq_has_joined_group(gid, so->id)) { /* don't set the first and return */ return 0; } if (fprog.fcode.len > 0) { /* set the filter */ struct sk_filter *filter; if (fprog.fcode.len == 1) { /* check for dummey BPF_CLASS == BPF_RET */ if (BPF_CLASS(fprog.fcode.filter[0].code) == BPF_RET) { pr_devel("[PFQ|%d] fprog: BPF_RET optimized out!\n", so->id.value); return 0; } } filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { printk(KERN_INFO "[PFQ|%d] fprog error: alloc_sk_filter for gid=%d\n", so->id.value, fprog.gid); return -EINVAL; } pfq_set_group_filter(gid, filter); pr_devel("[PFQ|%d] fprog: gid=%d (fprog len %d bytes)\n", so->id.value, fprog.gid, fprog.fcode.len); } else { /* reset the filter */ pfq_set_group_filter(gid, NULL); pr_devel("[PFQ|%d] fprog: gid=%d (resetting filter)\n", so->id.value, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; pfq_gid_t gid; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; gid.value = vlan.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter toggle: gid=%d not joined!\n", so->id.value, vlan.gid); return -EACCES; } pfq_toggle_group_vlan_filters(gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid=%d\n", so->id.value, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; pfq_gid_t gid; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; gid.value = filt.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter: gid=%d not joined!\n", so->id.value, filt.gid); return -EACCES; } if (filt.vid < -1 || filt.vid > 4094) { printk(KERN_INFO "[PFQ|%d] vlan error: invalid vid=%d for gid=%d!\n", so->id.value, filt.vid, filt.gid); return -EINVAL; } if (!pfq_vlan_filters_enabled(gid)) { printk(KERN_INFO "[PFQ|%d] vlan error: vlan filters disabled for gid=%d!\n", so->id.value, filt.gid); return -EPERM; } if (filt.vid == -1) { /* any */ int i; for(i = 1; i < 4095; i++) { pfq_set_group_vlan_filter(gid, filt.toggle, i); } } else { pfq_set_group_vlan_filter(gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan filter vid %d set for gid=%d\n", so->id.value, filt.vid, filt.gid); } break; case Q_SO_TX_BIND: { struct pfq_binding info; size_t i; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; if (so->tx_opt.num_queues >= Q_MAX_TX_QUEUES) { printk(KERN_INFO "[PFQ|%d] Tx bind: max number of queues exceeded!\n", so->id.value); return -EPERM; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] Tx bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] Tx bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } i = so->tx_opt.num_queues; if (info.cpu < -1) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] thread: invalid cpu (%d)!\n", so->id.value, i, info.cpu); return -EPERM; } so->tx_opt.queue[i].if_index = info.if_index; so->tx_opt.queue[i].hw_queue = info.hw_queue; so->tx_opt.queue[i].cpu = info.cpu; so->tx_opt.num_queues++; pr_devel("[PFQ|%d] Tx[%zu] bind: if_index=%d hw_queue=%d cpu=%d\n", so->id.value, i, so->tx_opt.queue[i].if_index, so->tx_opt.queue[i].hw_queue, info.cpu); } break; case Q_SO_TX_UNBIND: { size_t n; for(n = 0; n < Q_MAX_TX_QUEUES; ++n) { so->tx_opt.queue[n].if_index = -1; so->tx_opt.queue[n].hw_queue = -1; so->tx_opt.queue[n].cpu = -1; } } break; case Q_SO_TX_FLUSH: { int queue, err = 0; size_t n; if (optlen != sizeof(queue)) return -EINVAL; if (copy_from_user(&queue, optval, optlen)) return -EFAULT; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } if (queue < -1 || (queue > 0 && queue >= so->tx_opt.num_queues)) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: bad queue %d (num_queue=%zu)!\n", so->id.value, queue, so->tx_opt.num_queues); return -EPERM; } if (queue != -1) { pr_devel("[PFQ|%d] flushing Tx queue %d...\n", so->id.value, queue); return pfq_queue_flush(so, queue); } for(n = 0; n < so->tx_opt.num_queues; n++) { if (pfq_queue_flush(so, n) != 0) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] queue flush: flush error (if_index=%d)!\n", so->id.value, n, so->tx_opt.queue[n].if_index); err = -EPERM; } } if (err) return err; } break; case Q_SO_TX_ASYNC: { int toggle, err = 0; size_t n; if (optlen != sizeof(toggle)) return -EINVAL; if (copy_from_user(&toggle, optval, optlen)) return -EFAULT; if (toggle) { size_t started = 0; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } /* start Tx kernel threads */ for(n = 0; n < Q_MAX_TX_QUEUES; n++) { struct pfq_thread_data *data; int node; if (so->tx_opt.queue[n].if_index == -1) break; if (so->tx_opt.queue[n].cpu == Q_NO_KTHREAD) continue; if (so->tx_opt.queue[n].task) { printk(KERN_INFO "[PFQ|%d] kernel_thread: Tx[%zu] thread already running!\n", so->id.value, n); continue; } data = kmalloc(sizeof(struct pfq_thread_data), GFP_KERNEL); if (!data) { printk(KERN_INFO "[PFQ|%d] kernel_thread: could not allocate thread_data! Failed starting thread on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = -EPERM; continue; } data->so = so; data->id = n; node = cpu_online(so->tx_opt.queue[n].cpu) ? cpu_to_node(so->tx_opt.queue[n].cpu) : NUMA_NO_NODE; pr_devel("[PFQ|%d] creating Tx[%zu] thread on cpu %d: if_index=%d hw_queue=%d\n", so->id.value, n, so->tx_opt.queue[n].cpu, so->tx_opt.queue[n].if_index, so->tx_opt.queue[n].hw_queue); so->tx_opt.queue[n].task = kthread_create_on_node(pfq_tx_thread, data, node, "pfq_tx_%d#%zu", so->id.value, n); if (IS_ERR(so->tx_opt.queue[n].task)) { printk(KERN_INFO "[PFQ|%d] kernel_thread: create failed on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = PTR_ERR(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; kfree (data); continue; } /* bind the thread */ kthread_bind(so->tx_opt.queue[n].task, so->tx_opt.queue[n].cpu); /* start it */ wake_up_process(so->tx_opt.queue[n].task); started++; } if (started == 0) { printk(KERN_INFO "[PFQ|%d] no kernel thread started!\n", so->id.value); err = -EPERM; } } else { /* stop running threads */ for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] kernel thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } } return err; } break; case Q_SO_GROUP_FUNCTION: { struct pfq_computation_descr *descr = NULL; struct pfq_computation_tree *comp = NULL; struct pfq_group_computation tmp; size_t psize, ucsize; void *context = NULL; pfq_gid_t gid; int err = 0; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; gid.value = tmp.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] group computation: gid=%d not joined!\n", so->id.value, tmp.gid); return -EACCES; } if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id.value, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { printk(KERN_INFO "[PFQ|%d] computation: out of memory!\n", so->id.value); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { printk(KERN_INFO "[PFQ|%d] computation: copy_from_user error!\n", so->id.value); err = -EFAULT; goto error; } /* print user computation */ pr_devel_computation_descr(descr); /* check the correctness of computation */ if (pfq_check_computation_descr(descr) < 0) { printk(KERN_INFO "[PFQ|%d] invalid expression!\n", so->id.value); err = -EFAULT; goto error; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { printk(KERN_INFO "[PFQ|%d] context: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* allocate a pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { printk(KERN_INFO "[PFQ|%d] computation: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* link functions of computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] computation aborted!", so->id.value); err = -EPERM; goto error; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* run init functions */ if (pfq_computation_init(comp) < 0) { printk(KERN_INFO "[PFQ|%d] initialization of computation aborted!", so->id.value); pfq_computation_fini(comp); err = -EPERM; goto error; } /* enable functional program */ if (pfq_set_group_prog(gid, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] set group program error!\n", so->id.value); err = -EPERM; goto error; } kfree(descr); return 0; error: kfree(comp); kfree(context); kfree(descr); return err; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; if (!perf->link_is_up) return 0; if (perf->perf_threads == 0) return 0; if (atomic_read(&perf->tsync) == 0) perf->run = false; if (perf->run) threads_cleanup(perf); else { perf->run = true; if (perf->perf_threads > MAX_THREADS) { perf->perf_threads = MAX_THREADS; pr_info("Reset total threads to: %u\n", MAX_THREADS); } /* no greater than 1M */ if (seg_order > MAX_SEG_ORDER) { seg_order = MAX_SEG_ORDER; pr_info("Fix seg_order to %u\n", seg_order); } if (run_order < seg_order) { run_order = seg_order; pr_info("Fix run_order to %u\n", run_order); } node = dev_to_node(&perf->ntb->pdev->dev); /* launch kernel thread */ for (i = 0; i < perf->perf_threads; i++) { struct pthr_ctx *pctx; pctx = &perf->pthr_ctx[i]; atomic_set(&pctx->dma_sync, 0); pctx->perf = perf; pctx->thread = kthread_create_on_node(ntb_perf_thread, (void *)pctx, node, "ntb_perf %d", i); if (IS_ERR(pctx->thread)) { pctx->thread = NULL; goto err; } else wake_up_process(pctx->thread); if (perf->run == false) return -ENXIO; } } return count; err: threads_cleanup(perf); return -ENXIO; }