void mutex_enter(mutex_t *mp) { mutex_t imp = *mp; /*static int f; if (f++ == 70) { int c = smp_processor_id(); unsigned long x = 0; for (x = 0; x < 4000000000UL; x++) { if (c != smp_processor_id()) break; } dtrace_printf("FIRST CPU SW: %d x=%lu\n", c, x); }*/ /***********************************************/ /* Try and detect a nested call from this */ /* cpu whilst the mutex is held. */ /***********************************************/ if (mp->m_count && mp->m_type && mp->m_cpu == smp_processor_id()) { dtrace_printf("%p mutex...fail in mutex_enter count=%d type=%d\n", mp, mp->m_count, mp->m_type); } cnt_mtx2++; mutex_enter_common(mp, FALSE); if (disable_ints && irqs_disabled()) { dtrace_printf("%p: mutex_enter with irqs disabled fl:%lx level:%d cpu:%d\n", mp, mp->m_flags, mp->m_level, mp->m_cpu); dtrace_printf("orig: init=%d fl:%lx cpu:%d\n", imp.m_initted, imp.m_flags, imp.m_cpu); } }
void mutex_dump(mutex_t *mp) { dtrace_printf("mutex: %p initted=%d count=%p flags=%lx cpu=%d type=%d level=%d\n", mp, mp->m_initted, mp->m_count, mp->m_flags, mp->m_cpu, mp->m_type, mp->m_level); }
static int instr_is_patched(char *name, uint8_t *addr) { instr_probe_t *fbt = instr_probetab[INSTR_ADDR2NDX(addr)]; for (; fbt != NULL; fbt = fbt->insp_hashnext) { if (fbt->insp_patchpoint == addr) { dtrace_printf("fbt:dup patch: %p %s\n", addr, name); return 1; } } return 0; }
static int fbt_is_patched(char *name, instr_t *addr) { fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { if (fbt->fbtp_patchpoint == addr) { dtrace_printf("fbt:dup patch: %p %s\n", addr, name); return 1; } } return 0; }
int dtrace_profile_init(void) { int ret; ret = misc_register(&profile_dev); if (ret) { printk(KERN_WARNING "dtrace-profile: Unable to register misc device\n"); return ret; } initted = TRUE; profile_attach(); dtrace_printf("profile loaded: /dev/dtrace_profile available\n"); return 0; }
int sdt_init(void) { int ret; ret = misc_register(&sdt_dev); if (ret) { printk(KERN_WARNING "sdt: Unable to register misc device\n"); return ret; } sdt_attach(); dtrace_printf("sdt loaded: /dev/sdt now available\n"); initted = 1; return 0; }
void dtrace_xcall1(processorid_t cpu, dtrace_xcall_t func, void *arg) { /***********************************************/ /* Just track re-entrancy events - we will */ /* be lockless in dtrace_xcall2. */ /***********************************************/ if (in_xcall >= 0 && (cnt_xcall0 < 500 || (cnt_xcall0 % 50) == 0)) { dtrace_printf("x_call: re-entrant call in progress (%d) other=%d.\n", cnt_xcall0, in_xcall); cnt_xcall0++; } in_xcall = smp_processor_id(); //int flags = dtrace_interrupt_disable(); dtrace_xcall2(cpu, func, arg); //dtrace_interrupt_enable(flags); in_xcall = -1; }
static void send_ipi_interrupt(cpumask_t *mask, int vector) { # if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) /***********************************************/ /* Theres 'flat' and theres 'cluster'. The */ /* cluster functions handle more than 8 */ /* cpus. The flat does not - since the APIC */ /* only has room for an 8-bit cpu mask. */ /***********************************************/ static void (*send_IPI_mask)(cpumask_t, int); if (send_IPI_mask == NULL) send_IPI_mask = get_proc_addr("cluster_send_IPI_mask"); if (send_IPI_mask == NULL) dtrace_printf("HELP ON send_ipi_interrupt!\n"); else send_IPI_mask(*mask, vector); # elif LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 28) /***********************************************/ /* Issue with GPL/inlined function. */ /***********************************************/ { void send_IPI_mask_sequence(cpumask_t mask, int vector); static void (*send_IPI_mask_sequence_ptr)(cpumask_t, int); if (send_IPI_mask_sequence_ptr == NULL) send_IPI_mask_sequence_ptr = get_proc_addr("send_IPI_mask_sequence"); send_IPI_mask_sequence_ptr(*mask, vector); } # elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) send_IPI_mask(*mask, vector); # else if (x_apic == NULL) { static void (*flat_send_IPI_mask)(cpumask_t *, int); if (flat_send_IPI_mask == NULL) flat_send_IPI_mask = get_proc_addr("flat_send_IPI_mask"); if (flat_send_IPI_mask) { flat_send_IPI_mask(mask, vector); return; } dtrace_linux_panic("x_apic is null - giving up\n"); return; } x_apic->send_IPI_mask(mask, vector); # endif }
static void send_ipi_interrupt(cpumask_t *mask, int vector) { # if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) /***********************************************/ /* Theres 'flat' and theres 'cluster'. The */ /* cluster functions handle more than 8 */ /* cpus. The flat does not - since the APIC */ /* only has room for an 8-bit cpu mask. */ /***********************************************/ static void (*send_IPI_mask)(cpumask_t, int); if (send_IPI_mask == NULL) send_IPI_mask = get_proc_addr("cluster_send_IPI_mask"); if (send_IPI_mask == NULL) dtrace_printf("HELP ON send_ipi_interrupt!\n"); else send_IPI_mask(*mask, vector); # elif LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 28) send_IPI_mask_sequence(*mask, vector); # elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) send_IPI_mask(*mask, vector); # else x_apic->send_IPI_mask(mask, vector); # endif }
void dtrace_xcall2(processorid_t cpu, dtrace_xcall_t func, void *arg) { int c; int cpu_id = smp_processor_id(); int cpus_todo = 0; # if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 24) typedef struct cpumask cpumask_t; //#define cpu_set(c, mask) cpumask_set_cpu(c, &(mask)) //#define cpus_clear(mask) cpumask_clear(&mask) # endif cpumask_t mask; /***********************************************/ /* If we had an internal panic, stop doing */ /* xcalls. Shouldnt happen, but useful */ /* during debugging so we can diagnose what */ /* caused the panic. */ /***********************************************/ if (dtrace_shutdown) return; /***********************************************/ /* Special case - just 'us'. */ /***********************************************/ cnt_xcall1++; if (cpu_id == cpu) { local_irq_disable(); //dtrace_printf("[%d] sole cnt=%lu\n", smp_processor_id(), cnt_xcall1); func(arg); local_irq_enable(); return; } /***********************************************/ /* Set up the cpu mask to do just the */ /* relevant cpu. */ /***********************************************/ if (cpu != DTRACE_CPUALL) { //dtrace_printf("just me %d %d\n", cpu_id, cpu); cpu = 1 << cpu; } //dtrace_printf("xcall %d f=%p\n", cpu_id, func); cnt_xcall2++; if (xcall_levels[cpu_id]++) cnt_xcall3++; /***********************************************/ /* Set up the rendezvous with the other */ /* targetted cpus. We use a nearly square */ /* NCPU*NCPU matrix to allow for any cpu to */ /* wait for any other. We have two slots */ /* per cpu - because we may be in an */ /* interrupt. */ /* */ /* The interrupt slave will service all */ /* queued calls - sometimes it will be */ /* lucky and see multiple, especially if we */ /* are heavily loaded. */ /***********************************************/ cpus_clear(mask); for (c = 0; c < nr_cpus; c++) { struct xcalls *xc = &xcalls[cpu_id][c]; unsigned int cnt; /***********************************************/ /* Dont set ourselves - we dont want our */ /* cpu to be taking an IPI interrupt and */ /* doing the work twice. We inline */ /* ourselves below. */ /***********************************************/ if ((cpu & (1 << c)) == 0 || c == cpu_id) { continue; } /***********************************************/ /* Is this safe? We want to avoid an IPI */ /* call if the other cpu is idle/not doing */ /* dtrace work. If thats the case and we */ /* are calling dtrace_sync, then we can */ /* avoid the xcall. */ /***********************************************/ if ((void *) func == (void *) dtrace_sync_func && cpu_core[c].cpuc_probe_level == 0) { cpu &= ~(1 << c); cnt_xcall7++; continue; } //dtrace_printf("xcall %p\n", func); xc->xc_func = func; xc->xc_arg = arg; /***********************************************/ /* Spinlock in case the interrupt hasnt */ /* fired. This should be very rare, and */ /* when it happens, we would be hanging for */ /* 100m iterations (about 1s). We reduce */ /* the chance of a hit by using the */ /* NCPU*NCPU*2 array approach. These things */ /* happen when buffers are full or user is */ /* ^C-ing dtrace. */ /***********************************************/ for (cnt = 0; dtrace_cas32((void *) &xc->xc_state, XC_WORKING, XC_WORKING) == XC_WORKING; cnt++) { /***********************************************/ /* Avoid noise for tiny windows. */ /***********************************************/ if ((cnt == 0 && xcall_debug) || !(xcall_debug && cnt == 50)) { dtrace_printf("[%d] cpu%d in wrong state (state=%d)\n", smp_processor_id(), c, xc->xc_state); } // xcall_slave2(); if (cnt == 100 * 1000 * 1000) { dtrace_printf("[%d] cpu%d - busting lock\n", smp_processor_id(), c); break; } } if ((cnt && xcall_debug) || (!xcall_debug && cnt > 50)) { dtrace_printf("[%d] cpu%d in wrong state (state=%d) %u cycles\n", smp_processor_id(), c, xc->xc_state, cnt); } /***********************************************/ /* As soon as we set xc_state and BEFORE */ /* the apic call, the cpu may see the */ /* change since it may be taking an IPI */ /* interrupt for someone else. We need to */ /* be careful with barriers (I think - */ /* although the clflush/wmb may be */ /* redundant). */ /***********************************************/ xc->xc_state = XC_WORKING; // clflush(&xc->xc_state); // smp_wmb(); cpu_set(c, mask); cpus_todo++; } smp_mb(); /***********************************************/ /* Now tell the other cpus to do some work. */ /***********************************************/ if (cpus_todo) send_ipi_interrupt(&mask, ipi_vector); /***********************************************/ /* Check for ourselves. */ /***********************************************/ if (cpu & (1 << cpu_id)) { func(arg); } if (xcall_debug) dtrace_printf("[%d] getting ready.... (%ld) mask=%x func=%p\n", smp_processor_id(), cnt_xcall1, *(int *) &mask, func); /***********************************************/ /* Wait for the cpus we invoked the IPI on. */ /* Cycle thru the cpus, to avoid mutual */ /* deadlock between one cpu trying to call */ /* us whilst we are calling them. */ /***********************************************/ while (cpus_todo > 0) { for (c = 0; c < nr_cpus && cpus_todo > 0; c++) { xcall_slave2(); if (c == cpu_id || (cpu & (1 << c)) == 0) continue; /***********************************************/ /* Wait a little while for this cpu to */ /* respond before going on to the next one. */ /***********************************************/ if (ack_wait(c, 100)) { cpus_todo--; cpu &= ~(1 << c); } } } // smp_mb(); xcall_levels[cpu_id]--; }
int ack_wait(int c, int attempts) { unsigned long cnt = 0; int cnt1 = 0; volatile struct xcalls *xc = &xcalls[smp_processor_id()][c]; /***********************************************/ /* Avoid holding on to a stale cache line. */ /***********************************************/ while (dtrace_cas32((void *) &xc->xc_state, XC_WORKING, XC_WORKING) != XC_IDLE) { if (attempts-- <= 0) return 0; barrier(); /***********************************************/ /* Be HT friendly. */ /***********************************************/ // smt_pause(); cnt_xcall6++; /***********************************************/ /* Keep track of the max. */ /***********************************************/ if (cnt > cnt_xcall5) cnt_xcall5 = cnt; /***********************************************/ /* On my Dual Core 2.26GHz system, we are */ /* seeing counters in the range of hundreds */ /* to maybe 2,000,000 for more extreme */ /* cases. (This is inside a VM). During */ /* debugging, we found problems with the */ /* two cores not seeing each other -- */ /* possibly because I wasnt doing the right */ /* things to ensure memory barriers were in */ /* place. */ /* */ /* We dont want to wait forever because */ /* that will crash/hang your machine, but */ /* we do need to give up if its taken far */ /* too long. */ /***********************************************/ // if (cnt++ == 50 * 1000 * 1000UL) { if (cnt++ == 1 * 1000 * 1000UL) { cnt = 0; cnt_xcall4++; if (cnt1 == 0) { /***********************************************/ /* Looks like we are having trouble getting */ /* the interrupt, so try for an NMI. */ /***********************************************/ cpumask_t mask; cpus_clear(mask); cpu_set(c, mask); // nmi_masks[c] = 1; // send_ipi_interrupt(&mask, 2); //NMI_VECTOR); } if (1) { // set_console_on(1); dtrace_printf("ack_wait cpu=%d xcall %staking too long! c=%d [xcall1=%lu]\n", smp_processor_id(), cnt1 ? "STILL " : "", c, cnt_xcall1); //dump_stack(); // set_console_on(0); } if (cnt1++ > 3) { dump_xcalls(); dtrace_linux_panic("xcall taking too long"); break; } } } if (xcall_debug) { dtrace_printf("[%d] ack_wait finished c=%d cnt=%lu (max=%lu)\n", smp_processor_id(), c, cnt, cnt_xcall5); } return 1; }
void mutex_enter_common(mutex_t *mp, int dflag) { unsigned long flags; unsigned int cnt; if (!mp->m_initted) { /***********************************************/ /* Special debug: detect a dynamic mutex */ /* being used (one coming from a kmalloc */ /* type block of memory), vs the statically */ /* defined ones). */ /***********************************************/ if (mp->m_initted != 2) { dtrace_printf("initting a mutex\n"); dump_stack(); } dmutex_init(mp); } /***********************************************/ /* Check for recursive mutex. Theres a */ /* number of scenarios. */ /* */ /* Non-intr followed by an intr: we have to */ /* allow the intr. */ /* */ /* Non-intr followed by non-intr: normal */ /* recursive mutex. */ /* */ /* Intr followed by an intr: shouldnt */ /* happen. */ /* */ /* We mustnt allow us to be put on another */ /* cpu, else we will lose track of which */ /* cpu has the mutex. */ /* */ /* Now that the mutex code is working, we */ /* mustnt allow recursive mutexes. This */ /* causes problems for two dtrace user */ /* space apps running at the same time. */ /* Turn off for now. Later on, we can */ /* delete the code below. */ /***********************************************/ if (0 && mp->m_count && mp->m_cpu == smp_processor_id()) { static int x; if (x++ < 4 || (x < 1000000 && (x % 5000) == 0)) dtrace_printf("%p mutex recursive, dflag=%d %d [%d]\n", mp, dflag, mp->m_type, x); mp->m_level++; return; } if (disable_ints && dflag) flags = dtrace_interrupt_disable(); else flags = dtrace_interrupt_get(); for (cnt = 0; dtrace_casptr(&mp->m_count, 0, (void *) 1) == (void *) 1; ) { /***********************************************/ /* We are waiting for the lock. Someone */ /* else has it. Someone else might be */ /* waiting for us (xcall), so occasionally */ /* empty the xcall queue for us. */ /***********************************************/ if ((cnt++ % 100) == 0) xcall_slave2(); /***********************************************/ /* If we are running in the upper half of */ /* the kernel, periodically let the */ /* scheduler run, to avoid deadlock when */ /* running N+1 copies of dtrace on an N CPU */ /* system. */ /***********************************************/ if (/*!dflag &&*/ (cnt % 2000) == 0) schedule(); /***********************************************/ /* If we start locking up the kernel, let */ /* user know something bad is happening. */ /* Probably pointless if mutex is working */ /* correctly. */ /***********************************************/ if ((cnt % (500 * 1000 * 1000)) == 0) { dtrace_printf("mutex_enter: taking a long time to grab lock mtx3=%llu\n", cnt_mtx3); cnt_mtx3++; } } //preempt_disable(); mp->m_flags = flags; mp->m_cpu = smp_processor_id(); mp->m_level = 1; mp->m_type = dflag; }