/** * eth_process_reclaim - processs packets that have completed sending */ void eth_process_reclaim(void) { int i; struct eth_tx_queue *txq; for (i = 0; i < percpu_get(eth_num_queues); i++) { txq = percpu_get(eth_txqs[i]); txq->cap = eth_tx_reclaim(txq); } }
int set_current_queue_by_index(unsigned int index) { if (index >= percpu_get(eth_num_queues)) { unset_current_queue(); return 1; } set_current_queue(percpu_get(eth_rxqs)[index]); return 0; }
/** * eth_process_poll - polls HW for new packets * * Returns the number of new packets received. */ int eth_process_poll(void) { int i, count = 0; struct eth_rx_queue *rxq; for (i = 0; i < percpu_get(eth_num_queues); i++) { rxq = percpu_get(eth_rxqs[i]); count += eth_rx_poll(rxq); } return count; }
/* * Check the disassembly and assure that GCC does not cache a * read result in registers for other reads. */ uint64_t percpu_inspect_caching(void) { uint32_t x32; x32 = percpu_get(x32); x32 = percpu_get(x32); x32 = percpu_get(x32); x32 = percpu_get(x32); x32 = percpu_get(x32); return x32; }
/** * eth_process_send - processes packets pending to be sent */ void eth_process_send(void) { int i, nr; struct eth_tx_queue *txq; for (i = 0; i < percpu_get(eth_num_queues); i++) { txq = percpu_get(eth_txqs[i]); nr = eth_tx_xmit(txq, txq->len, txq->bufs); if (unlikely(nr != txq->len)) panic("transmit buffer size mismatch\n"); txq->len = 0; } }
void logk(int level, const char *fmt, ...) { va_list ptr; char buf[MAX_LOG_LEN]; time_t ts; off_t off = 0; if (level > max_loglevel) return; if (!log_is_early_boot) { snprintf(buf, 9, "CPU %02d| ", percpu_get(cpu_id)); off = strlen(buf); } time(&ts); off += strftime(buf + off, 32, "%H:%M:%S ", localtime(&ts)); snprintf(buf + off, 6, "<%d>: ", level); off = strlen(buf); va_start(ptr, fmt); vsnprintf(buf + off, MAX_LOG_LEN - off, fmt, ptr); va_end(ptr); printf("%s", buf); }
/* * Check the disassembly output of below commands and assure * that GCC sizes the resuling opcodes correctly ('mov/q/w/l/b'). */ uint64_t percpu_inspect_size(void) { uint64_t x64; x64 = percpu_get(x64); x64 = percpu_get(x32); x64 = percpu_get(x16); x64 = percpu_get(x8); percpu_set(x64, 0x1111); percpu_set(x32, 5); percpu_set(x16, 0xdead); percpu_set(x8, 0xff); return x64; }
int memp_init_cpu(void) { int cpu = percpu_get(cpu_id); if (mempool_create(&percpu_get(pbuf_mempool),&pbuf_ds,MEMPOOL_SANITY_PERCPU, cpu)) return 1; if (mempool_create(&percpu_get(pbuf_with_payload_mempool), &pbuf_with_payload_ds, MEMPOOL_SANITY_PERCPU, cpu)) return 1; if (mempool_create(&percpu_get(tcp_pcb_mempool), &tcp_pcb_ds, MEMPOOL_SANITY_PERCPU, cpu)) return 1; if (mempool_create(&percpu_get(tcp_seg_mempool), &tcp_seg_ds, MEMPOOL_SANITY_PERCPU, cpu)) return 1; return 0; }
/** * cpu_init_one - initializes a CPU core * @cpu: the CPU core number * * Typically one should call this right after * creating a new thread. Initialization includes * binding the thread to the appropriate core, * setting up per-cpu memory, and enabling Dune. * * Returns 0 if successful, otherwise fail. */ int cpu_init_one(unsigned int cpu) { int ret; cpu_set_t mask; unsigned int tmp, numa_node; void *pcpu; if (cpu >= cpu_count) return -EINVAL; CPU_ZERO(&mask); CPU_SET(cpu, &mask); ret = sched_setaffinity(0, sizeof(mask), &mask); if (ret) return -EPERM; ret = syscall(SYS_getcpu, &tmp, &numa_node, NULL); if (ret) return -ENOSYS; if (cpu != tmp) { log_err("cpu: couldn't migrate to the correct core\n"); return -EINVAL; } pcpu = cpu_init_percpu(cpu, numa_node); if (!pcpu) return -ENOMEM; ret = dune_enter_ex(pcpu); if (ret) { log_err("cpu: failed to initialize Dune\n"); return ret; } percpu_get(cpu_id) = cpu; percpu_get(cpu_numa_node) = numa_node; log_is_early_boot = false; log_info("cpu: started core %d, numa node %d\n", cpu, numa_node); return 0; }
/* * Quickly disable system interrupts upon entrance! Now the * kernel is in an inconsistent state, just gracefully stop * the machine and halt :-( * * An interesting case faced from not disabling interrupts * early on (disabling them at the function end instead) was * having other threads getting scheduled between printk() * and disabling interrupts, scrolling-away the caller panic * message and losing information FOREVER. */ void __no_return panic(const char *fmt, ...) { va_list args; int n; /* NOTE! Do not put anything above this */ local_irq_disable(); /* * NOTE! Manually assure that all the functions called * below are void of any asserts or panics. */ /* Avoid concurrent panic()s: first call holds the most * important facts; the rest are usually side-effects. */ if (!spin_trylock(&panic_lock)) goto halt; /* If other cores are alive, send them a fixed IPI, which * intentionally avoids interrupting cores with IF=0 till * they re-accept interrupts. Why? * * An interrupted critical region may deadlock our panic * code if we tried to acquire the same lock. The other * cores may not also be in long-mode before they enable * interrupts (e.g. in the 16-bit SMP trampoline step.) * * IPIs are sent only if more than one core is alive: we * might be on so early a stage that our APIC registers * are not yet memory mapped, leading to memory faults if * locally accessed! * * If destination CPUs were alive but have not yet inited * their local APICs, they will not be able to catch this * IPI and will normally continue execution. Beware. */ if (smpboot_get_nr_alive_cpus() > 1) apic_broadcast_ipi(APIC_DELMOD_FIXED, HALT_CPU_IPI_VECTOR); va_start(args, fmt); n = vsnprintf(buf, sizeof(buf) - 1, fmt, args); va_end(args); buf[n] = 0; printk("\nCPU#%d-PANIC: %s", percpu_get(apic_id), buf); /* Since the other cores are stopped only after they re- * accept interrupts, they may print on-screen and scroll * away our message. Acquire all screen locks, forever. */ printk_bust_all_locks(); halt: halt(); }
/** * queue_init_one - initializes a queue * @queue: the queue number * * Returns 0 if successful, otherwise fail. */ int queue_init_one(struct eth_rx_queue *rx_queue) { void *pqueue; pqueue = queue_init_perqueue(percpu_get(cpu_numa_node)); if (!pqueue) return -ENOMEM; rx_queue->perqueue_offset = pqueue; return 0; }
/* * Blackbox testing of the per-CPU accessors. */ void percpu_run_tests(void) { int id; uintptr_t self, gs; id = percpu_get(apic_id); self = percpu_get(self); gs = get_gs(); printk("_PerCPU#%d: area address: self = 0x%lx, %%gs = 0x%lx\n", id, self, gs); if (self != gs) panic("_PerCPU#%d: self reference '0x%lx' != %%gs", id, self); *percpu_addr(x64) = 0x6464646464646464; percpu_set(x32, 0x32323232); percpu_set(x16, 0x1616); percpu_set(x8, 0x8); printk("_PerCPU#%d: x64 address = 0x%lx, val = 0x%lx\n", id, percpu_addr(x64), percpu_get(x64)); printk("_PerCPU#%d: x32 address = 0x%lx, val = 0x%x\n", id, percpu_addr(x32), percpu_get(x32)); printk("_PerCPU#%d: x16 address = 0x%lx, val = 0x%x\n", id, percpu_addr(x16), percpu_get(x16)); printk("_PerCPU#%d: x8 address = 0x%lx, val = 0x%x\n", id, percpu_addr(x8 ), percpu_get(x8 )); }
/** * cpu_do_bookkepping - runs periodic per-cpu tasks */ void cpu_do_bookkeeping(void) { struct cpu_runlist *rlist = &percpu_get(runlist); struct cpu_runner *runner; if (rlist->next_runner) { spin_lock(&rlist->lock); runner = rlist->next_runner; rlist->next_runner = NULL; spin_unlock(&rlist->lock); do { struct cpu_runner *last = runner; runner->func(runner->data); runner = runner->next; free(last); } while (runner); } }
uint64_t percpu_inspect_order(void) { u = percpu_get(x64); percpu_set(x64, 0xdead); v = percpu_get(x64); percpu_set(x64, 0xbeef); w = percpu_get(x64); percpu_set(x64, 0xcafe); x = percpu_get(x64); percpu_set(x64, 0xbabe); y = percpu_get(x64); z = percpu_get(x16); return z; }
void mem_free(void *ptr) { mempool_free(&percpu_get(pbuf_with_payload_mempool), ptr); }
void *mem_malloc(size_t size) { LWIP_ASSERT("mem_malloc", size <= PBUF_WITH_PAYLOAD_SIZE); return mempool_alloc(&percpu_get(pbuf_with_payload_mempool)); }
/** * eth_process_recv - processes pending received packets * * Returns true if there are no remaining packets. */ int eth_process_recv(void) { int i, count = 0; bool empty; unsigned long min_timestamp = -1; unsigned long timestamp; int value; double val; struct metrics_accumulator *this_metrics_acc = &percpu_get(metrics_acc); /* * We round robin through each queue one packet at * a time for fairness, and stop when all queues are * empty or the batch limit is hit. We're okay with * going a little over the batch limit if it means * we're not favoring one queue over another. */ do { empty = true; for (i = 0; i < percpu_get(eth_num_queues); i++) { struct eth_rx_queue *rxq = percpu_get(eth_rxqs[i]); struct mbuf *pos = rxq->head; if (pos) min_timestamp = min(min_timestamp, pos->timestamp); if (!eth_process_recv_queue(rxq)) { count++; empty = false; } } } while (!empty && count < eth_rx_max_batch); timestamp = rdtsc(); this_metrics_acc->count++; value = count ? (timestamp - min_timestamp) / cycles_per_us : 0; this_metrics_acc->queuing_delay += value; this_metrics_acc->batch_size += count; if (timestamp - this_metrics_acc->timestamp > (long) cycles_per_us * METRICS_PERIOD_US) { if (this_metrics_acc->batch_size) val = (double) this_metrics_acc->queuing_delay / this_metrics_acc->batch_size; else val = 0; EMA_UPDATE(cp_shmem->cpu_metrics[percpu_get(cpu_nr)].queuing_delay, val, EMA_SMOOTH_FACTOR); if (this_metrics_acc->count) val = (double) this_metrics_acc->batch_size / this_metrics_acc->count; else val = 0; EMA_UPDATE(cp_shmem->cpu_metrics[percpu_get(cpu_nr)].batch_size, val, EMA_SMOOTH_FACTOR); this_metrics_acc->timestamp = timestamp; this_metrics_acc->count = 0; this_metrics_acc->queuing_delay = 0; this_metrics_acc->batch_size = 0; } KSTATS_PACKETS_INC(count); KSTATS_BATCH_INC(count); #ifdef ENABLE_KSTATS int backlog = 0; for (i = 0; i < percpu_get(eth_num_queues); i++) { struct eth_rx_queue *rxq = percpu_get(eth_rxqs[i]); backlog += rxq->len; } backlog = div_up(backlog, eth_rx_max_batch); KSTATS_BACKLOG_INC(backlog); #endif return empty; }
void unset_current_queue() { percpu_get(current_perqueue) = NULL; }
void set_current_queue(struct eth_rx_queue *rx_queue) { percpu_get(current_perqueue) = rx_queue->perqueue_offset; }