Ejemplo n.º 1
0
RTDECL(int) RTMpOnOthers(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    /* Will panic if no rendezvousing cpus, so check up front. */
    if (RTMpGetOnlineCount() > 1)
    {
        RTMPARGS    Args;

        Args.pfnWorker = pfnWorker;
        Args.pvUser1 = pvUser1;
        Args.pvUser2 = pvUser2;
        Args.idCpu = RTMpCpuId();
        Args.cHits = 0;
        // XXX: is _sync needed ?
        call_all_cpus_sync(rtmpOnOthersHaikuWrapper, &Args);
    }
    return VINF_SUCCESS;
}
Ejemplo n.º 2
0
RTDECL(int) RTMpOnSpecific(RTCPUID idCpu, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    IPRT_LINUX_SAVE_EFL_AC();
    int rc;
    RTMPARGS Args;

    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
    Args.pfnWorker = pfnWorker;
    Args.pvUser1 = pvUser1;
    Args.pvUser2 = pvUser2;
    Args.idCpu = idCpu;
    Args.cHits = 0;

    if (!RTMpIsCpuPossible(idCpu))
        return VERR_CPU_NOT_FOUND;

    RTThreadPreemptDisable(&PreemptState);
    if (idCpu != RTMpCpuId())
    {
        if (RTMpIsCpuOnline(idCpu))
        {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
            rc = smp_call_function_single(idCpu, rtmpLinuxWrapper, &Args, 1 /* wait */);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
            rc = smp_call_function_single(idCpu, rtmpLinuxWrapper, &Args, 0 /* retry */, 1 /* wait */);
#else /* older kernels */
            rc = smp_call_function(rtmpOnSpecificLinuxWrapper, &Args, 0 /* retry */, 1 /* wait */);
#endif /* older kernels */
            Assert(rc == 0);
            rc = Args.cHits ? VINF_SUCCESS : VERR_CPU_OFFLINE;
        }
        else
            rc = VERR_CPU_OFFLINE;
    }
    else
    {
        rtmpLinuxWrapper(&Args);
        rc = VINF_SUCCESS;
    }
    RTThreadPreemptRestore(&PreemptState);;

    NOREF(rc);
    IPRT_LINUX_RESTORE_EFL_AC();
    return rc;
}
Ejemplo n.º 3
0
RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet)
{
#ifdef CONFIG_SMP
    RTCPUID idCpu;

    RTCpuSetEmpty(pSet);
    idCpu = RTMpGetMaxCpuId();
    do
    {
        if (RTMpIsCpuOnline(idCpu))
            RTCpuSetAdd(pSet, idCpu);
    } while (idCpu-- > 0);
#else
    RTCpuSetEmpty(pSet);
    RTCpuSetAdd(pSet, RTMpCpuId());
#endif
    return pSet;
}
Ejemplo n.º 4
0
RTDECL(void) RTThreadPreemptRestore(PRTTHREADPREEMPTSTATE pState)
{
    AssertPtr(pState);
    Assert(pState->u32Reserved == 42);
    pState->u32Reserved = 0;
    RT_ASSERT_PREEMPT_CPUID_RESTORE(pState);

    RTCPUID idCpu = RTMpCpuId();
    if (RT_UNLIKELY(idCpu < RT_ELEMENTS(g_aPreemptHacks)))
    {
        Assert(g_aPreemptHacks[idCpu].cRecursion > 0);
        if (--g_aPreemptHacks[idCpu].cRecursion == 0)
        {
            lck_spin_t *pSpinLock = g_aPreemptHacks[idCpu].pSpinLock;
            if (pSpinLock)
                lck_spin_unlock(pSpinLock);
            else
                AssertFailed();
        }
    }
}
Ejemplo n.º 5
0
/**
 * The slave DPC callback for an omni timer.
 *
 * @param   pDpc                The DPC object.
 * @param   pvUser              Pointer to the sub-timer.
 * @param   SystemArgument1     Some system stuff.
 * @param   SystemArgument2     Some system stuff.
 */
static void _stdcall rtTimerNtOmniSlaveCallback(IN PKDPC pDpc, IN PVOID pvUser, IN PVOID SystemArgument1, IN PVOID SystemArgument2)
{
    PRTTIMERNTSUBTIMER pSubTimer = (PRTTIMERNTSUBTIMER)pvUser;
    PRTTIMER pTimer = pSubTimer->pParent;

    AssertPtr(pTimer);
#ifdef RT_STRICT
    if (KeGetCurrentIrql() < DISPATCH_LEVEL)
        RTAssertMsg2Weak("rtTimerNtOmniSlaveCallback: Irql=%d expected >=%d\n", KeGetCurrentIrql(), DISPATCH_LEVEL);
    int iCpuSelf = RTMpCpuIdToSetIndex(RTMpCpuId());
    if (pSubTimer - &pTimer->aSubTimers[0] != iCpuSelf)
        RTAssertMsg2Weak("rtTimerNtOmniSlaveCallback: iCpuSelf=%d pSubTimer=%p / %d\n", iCpuSelf, pSubTimer, pSubTimer - &pTimer->aSubTimers[0]);
#endif

    /*
     * Check that we haven't been suspended before doing the callout.
     */
    if (    !ASMAtomicUoReadBool(&pTimer->fSuspended)
        &&  pTimer->u32Magic == RTTIMER_MAGIC)
        pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick);

    NOREF(pDpc); NOREF(SystemArgument1); NOREF(SystemArgument2);
}
RTDECL(int) RTMpOnSpecific(RTCPUID idCpu, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    /*
     * Don't try mess with an offline CPU.
     */
    if (!RTMpIsCpuOnline(idCpu))
        return !RTMpIsCpuPossible(idCpu)
              ? VERR_CPU_NOT_FOUND
              : VERR_CPU_OFFLINE;

    /*
     * Use the broadcast IPI routine if there are no more than two CPUs online,
     * or if the current IRQL is unsuitable for KeWaitForSingleObject.
     */
    int rc;
    uint32_t cHits = 0;
    if (   g_pfnrtKeIpiGenericCall
        && (   RTMpGetOnlineCount() <= 2
            || KeGetCurrentIrql()   > APC_LEVEL)
       )
    {
        rc = rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnSpecificBroadcastIpiWrapper,
                                       idCpu, NIL_RTCPUID, &cHits);
        if (RT_SUCCESS(rc))
        {
            if (cHits == 1)
                return VINF_SUCCESS;
            rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1;
        }
        return rc;
    }

#if 0
    rc = rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_SPECIFIC, idCpu, NIL_RTCPUID, &cHits);
    if (RT_SUCCESS(rc))
    {
        if (cHits == 1)
            return VINF_SUCCESS;
        rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1;
    }
    return rc;

#else
    /*
     * Initialize the argument package and the objects within it.
     * The package is referenced counted to avoid unnecessary spinning to
     * synchronize cleanup and prevent stack corruption.
     */
    PRTMPNTONSPECIFICARGS pArgs = (PRTMPNTONSPECIFICARGS)ExAllocatePoolWithTag(NonPagedPool, sizeof(*pArgs), (ULONG)'RTMp');
    if (!pArgs)
        return VERR_NO_MEMORY;
    pArgs->cRefs                  = 2;
    pArgs->fExecuting             = false;
    pArgs->fDone                  = false;
    pArgs->CallbackArgs.pfnWorker = pfnWorker;
    pArgs->CallbackArgs.pvUser1   = pvUser1;
    pArgs->CallbackArgs.pvUser2   = pvUser2;
    pArgs->CallbackArgs.idCpu     = idCpu;
    pArgs->CallbackArgs.cHits     = 0;
    pArgs->CallbackArgs.cRefs     = 2;
    KeInitializeEvent(&pArgs->DoneEvt, SynchronizationEvent, FALSE /* not signalled */);
    KeInitializeDpc(&pArgs->Dpc, rtMpNtOnSpecificDpcWrapper, pArgs);
    KeSetImportanceDpc(&pArgs->Dpc, HighImportance);
    KeSetTargetProcessorDpc(&pArgs->Dpc, (int)idCpu);

    /*
     * Disable preemption while we check the current processor and inserts the DPC.
     */
    KIRQL bOldIrql;
    KeRaiseIrql(DISPATCH_LEVEL, &bOldIrql);
    ASMCompilerBarrier(); /* paranoia */

    if (RTMpCpuId() == idCpu)
    {
        /* Just execute the callback on the current CPU. */
        pfnWorker(idCpu, pvUser1, pvUser2);
        KeLowerIrql(bOldIrql);

        ExFreePool(pArgs);
        return VINF_SUCCESS;
    }

    /* Different CPU, so queue it if the CPU is still online. */
    if (RTMpIsCpuOnline(idCpu))
    {
        BOOLEAN fRc = KeInsertQueueDpc(&pArgs->Dpc, 0, 0);
        Assert(fRc);
        KeLowerIrql(bOldIrql);

        uint64_t const nsRealWaitTS = RTTimeNanoTS();

        /*
         * Wait actively for a while in case the CPU/thread responds quickly.
         */
        uint32_t cLoopsLeft = 0x20000;
        while (cLoopsLeft-- > 0)
        {
            if (pArgs->fDone)
            {
                rtMpNtOnSpecificRelease(pArgs);
                return VINF_SUCCESS;
            }
            ASMNopPause();
        }

        /*
         * It didn't respond, so wait on the event object, poking the CPU if it's slow.
         */
        LARGE_INTEGER Timeout;
        Timeout.QuadPart = -10000; /* 1ms */
        NTSTATUS rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
        if (rcNt == STATUS_SUCCESS)
        {
            rtMpNtOnSpecificRelease(pArgs);
            return VINF_SUCCESS;
        }

        /* If it hasn't respondend yet, maybe poke it and wait some more. */
        if (rcNt == STATUS_TIMEOUT)
        {
#ifndef IPRT_TARGET_NT4
            if (   !pArgs->fExecuting
                && (   g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalSendSoftwareInterrupt
                    || g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalReqestIpiW7Plus
                    || g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalReqestIpiPreW7))
                RTMpPokeCpu(idCpu);
#endif

            Timeout.QuadPart = -1280000; /* 128ms */
            rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
            if (rcNt == STATUS_SUCCESS)
            {
                rtMpNtOnSpecificRelease(pArgs);
                return VINF_SUCCESS;
            }
        }

        /*
         * Something weird is happening, try bail out.
         */
        if (KeRemoveQueueDpc(&pArgs->Dpc))
        {
            ExFreePool(pArgs); /* DPC was still queued, so we can return without further ado. */
            LogRel(("RTMpOnSpecific(%#x): Not processed after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));
        }
        else
        {
            /* DPC is running, wait a good while for it to complete. */
            LogRel(("RTMpOnSpecific(%#x): Still running after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));

            Timeout.QuadPart = -30*1000*1000*10; /* 30 seconds */
            rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
            if (rcNt != STATUS_SUCCESS)
                LogRel(("RTMpOnSpecific(%#x): Giving up on running worker after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));
        }
        rc = RTErrConvertFromNtStatus(rcNt);
    }
    else
    {
        /* CPU is offline.*/
        KeLowerIrql(bOldIrql);
        rc = !RTMpIsCpuPossible(idCpu) ? VERR_CPU_NOT_FOUND : VERR_CPU_OFFLINE;
    }

    rtMpNtOnSpecificRelease(pArgs);
    return rc;
#endif
}
/**
 * Get the timestamp frequency.
 *
 * @returns Number of ticks per second.
 * @param   pVM     The cross context VM structure.
 */
VMMDECL(uint64_t) TMCpuTicksPerSecond(PVM pVM)
{
    if (   pVM->tm.s.enmTSCMode == TMTSCMODE_REAL_TSC_OFFSET
        && g_pSUPGlobalInfoPage->u32Mode != SUPGIPMODE_INVARIANT_TSC)
    {
#ifdef IN_RING3
        uint64_t cTSCTicksPerSecond = SUPGetCpuHzFromGip(g_pSUPGlobalInfoPage);
#elif defined(IN_RING0)
        uint64_t cTSCTicksPerSecond = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, RTMpCpuIdToSetIndex(RTMpCpuId()));
#else
        uint64_t cTSCTicksPerSecond = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, VMMGetCpu(pVM)->iHostCpuSet);
#endif
        if (RT_LIKELY(cTSCTicksPerSecond != ~(uint64_t)0))
            return cTSCTicksPerSecond;
    }
    return pVM->tm.s.cTSCTicksPerSecond;
}
Ejemplo n.º 8
0
/**
 * The slow case for SUPReadTsc where we need to apply deltas.
 *
 * Must only be called when deltas are applicable, so please do not call it
 * directly.
 *
 * @returns TSC with delta applied.
 * @param   pGip        Pointer to the GIP.
 *
 * @remarks May be called with interrupts disabled in ring-0!  This is why the
 *          ring-0 code doesn't attempt to figure the delta.
 *
 * @internal
 */
SUPDECL(uint64_t) SUPReadTscWithDelta(PSUPGLOBALINFOPAGE pGip)
{
    uint64_t            uTsc;
    uint16_t            iGipCpu;
    AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
    AssertCompile(RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx) >= RTCPUSET_MAX_CPUS);
    Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO);

    /*
     * Read the TSC and get the corresponding aCPUs index.
     */
#ifdef IN_RING3
    if (pGip->fGetGipCpu & SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS)
    {
        /* RDTSCP gives us all we need, no loops/cli. */
        uint32_t iCpuSet;
        uTsc      = ASMReadTscWithAux(&iCpuSet);
        iCpuSet  &= RTCPUSET_MAX_CPUS - 1;
        iGipCpu   = pGip->aiCpuFromCpuSetIdx[iCpuSet];
    }
    else if (pGip->fGetGipCpu & SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS)
    {
        /* Storing the IDTR is normally very quick, but we need to loop. */
        uint32_t cTries = 0;
        for (;;)
        {
            uint16_t cbLim = ASMGetIdtrLimit();
            uTsc = ASMReadTSC();
            if (RT_LIKELY(ASMGetIdtrLimit() == cbLim))
            {
                uint16_t iCpuSet = cbLim - 256 * (ARCH_BITS == 64 ? 16 : 8);
                iCpuSet &= RTCPUSET_MAX_CPUS - 1;
                iGipCpu  = pGip->aiCpuFromCpuSetIdx[iCpuSet];
                break;
            }
            if (cTries >= 16)
            {
                iGipCpu = UINT16_MAX;
                break;
            }
            cTries++;
        }
    }
    else
    {
        /* Get APIC ID via the slow CPUID instruction, requires looping. */
        uint32_t cTries = 0;
        for (;;)
        {
            uint8_t idApic = ASMGetApicId();
            uTsc = ASMReadTSC();
            if (RT_LIKELY(ASMGetApicId() == idApic))
            {
                iGipCpu = pGip->aiCpuFromApicId[idApic];
                break;
            }
            if (cTries >= 16)
            {
                iGipCpu = UINT16_MAX;
                break;
            }
            cTries++;
        }
    }
#elif defined(IN_RING0)
    /* Ring-0: Use use RTMpCpuId(), no loops. */
    RTCCUINTREG uFlags  = ASMIntDisableFlags();
    int         iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
    if (RT_LIKELY((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
        iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
    else
        iGipCpu = UINT16_MAX;
    uTsc = ASMReadTSC();
    ASMSetFlags(uFlags);

# elif defined(IN_RC)
    /* Raw-mode context: We can get the host CPU set index via VMCPU, no loops. */
    RTCCUINTREG uFlags  = ASMIntDisableFlags(); /* Are already disable, but play safe. */
    uint32_t    iCpuSet = VMMGetCpu(&g_VM)->iHostCpuSet;
    if (RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
        iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
    else
        iGipCpu = UINT16_MAX;
    uTsc = ASMReadTSC();
    ASMSetFlags(uFlags);
#else
# error "IN_RING3, IN_RC or IN_RING0 must be defined!"
#endif

    /*
     * If the delta is valid, apply it.
     */
    if (RT_LIKELY(iGipCpu < pGip->cCpus))
    {
        int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
        if (RT_LIKELY(iTscDelta != INT64_MAX))
            return uTsc - iTscDelta;

# ifdef IN_RING3
        /*
         * The delta needs calculating, call supdrv to get the TSC.
         */
        int rc = SUPR3ReadTsc(&uTsc, NULL);
        if (RT_SUCCESS(rc))
            return uTsc;
        AssertMsgFailed(("SUPR3ReadTsc -> %Rrc\n", rc));
        uTsc = ASMReadTSC();
# endif /* IN_RING3 */
    }

    /*
     * This shouldn't happen, especially not in ring-3 and raw-mode context.
     * But if it does, return something that's half useful.
     */
    AssertMsgFailed(("iGipCpu=%d (%#x) cCpus=%d fGetGipCpu=%#x\n", iGipCpu, iGipCpu, pGip->cCpus, pGip->fGetGipCpu));
    return uTsc;
}
Ejemplo n.º 9
0
/**
 * Wrapper between the native linux per-cpu callbacks and PFNRTWORKER
 *
 * @param   pvInfo      Pointer to the RTMPARGS package.
 */
static void rtmpLinuxWrapper(void *pvInfo)
{
    PRTMPARGS pArgs = (PRTMPARGS)pvInfo;
    ASMAtomicIncU32(&pArgs->cHits);
    pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2);
}
RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu)
{
    return RTMpCpuId() == idCpu;
}
Ejemplo n.º 11
0
/**
 * The timer callback for an omni-timer.
 *
 * This is responsible for queueing the DPCs for the other CPUs and
 * perform the callback on the CPU on which it is called.
 *
 * @param   pDpc                The DPC object.
 * @param   pvUser              Pointer to the sub-timer.
 * @param   SystemArgument1     Some system stuff.
 * @param   SystemArgument2     Some system stuff.
 */
static void _stdcall rtTimerNtOmniMasterCallback(IN PKDPC pDpc, IN PVOID pvUser, IN PVOID SystemArgument1, IN PVOID SystemArgument2)
{
    PRTTIMERNTSUBTIMER pSubTimer = (PRTTIMERNTSUBTIMER)pvUser;
    PRTTIMER pTimer = pSubTimer->pParent;
    int iCpuSelf = RTMpCpuIdToSetIndex(RTMpCpuId());

    AssertPtr(pTimer);
#ifdef RT_STRICT
    if (KeGetCurrentIrql() < DISPATCH_LEVEL)
        RTAssertMsg2Weak("rtTimerNtOmniMasterCallback: Irql=%d expected >=%d\n", KeGetCurrentIrql(), DISPATCH_LEVEL);
    if (pSubTimer - &pTimer->aSubTimers[0] != iCpuSelf)
        RTAssertMsg2Weak("rtTimerNtOmniMasterCallback: iCpuSelf=%d pSubTimer=%p / %d\n", iCpuSelf, pSubTimer, pSubTimer - &pTimer->aSubTimers[0]);
#endif

    /*
     * Check that we haven't been suspended before scheduling the other DPCs
     * and doing the callout.
     */
    if (    !ASMAtomicUoReadBool(&pTimer->fSuspended)
        &&  pTimer->u32Magic == RTTIMER_MAGIC)
    {
        RTCPUSET    OnlineSet;
        RTMpGetOnlineSet(&OnlineSet);

        ASMAtomicWriteHandle(&pSubTimer->hActiveThread, RTThreadNativeSelf());

        if (pTimer->u64NanoInterval)
        {
            /*
             * Recurring timer.
             */
            for (int iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++)
                if (    RTCpuSetIsMemberByIndex(&OnlineSet, iCpu)
                    &&  iCpuSelf != iCpu)
                    KeInsertQueueDpc(&pTimer->aSubTimers[iCpu].NtDpc, 0, 0);

            uint64_t iTick = ++pSubTimer->iTick;
            rtTimerNtRearmInternval(pTimer, iTick, &pTimer->aSubTimers[RTMpCpuIdToSetIndex(pTimer->idCpu)].NtDpc);
            pTimer->pfnTimer(pTimer, pTimer->pvUser, iTick);
        }
        else
        {
            /*
             * Single shot timers gets complicated wrt to fSuspended maintance.
             */
            uint32_t cCpus = 0;
            for (int iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++)
                if (RTCpuSetIsMemberByIndex(&OnlineSet, iCpu))
                    cCpus++;
            ASMAtomicAddS32(&pTimer->cOmniSuspendCountDown, cCpus);

            for (int iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++)
                if (    RTCpuSetIsMemberByIndex(&OnlineSet, iCpu)
                    &&  iCpuSelf != iCpu)
                    if (!KeInsertQueueDpc(&pTimer->aSubTimers[iCpu].NtDpc, 0, 0))
                        ASMAtomicDecS32(&pTimer->cOmniSuspendCountDown); /* already queued and counted. */

            if (ASMAtomicDecS32(&pTimer->cOmniSuspendCountDown) <= 0)
                ASMAtomicWriteBool(&pTimer->fSuspended, true);

            pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick);
        }

        ASMAtomicWriteHandle(&pSubTimer->hActiveThread, NIL_RTNATIVETHREAD);
    }

    NOREF(pDpc); NOREF(SystemArgument1); NOREF(SystemArgument2);
}
Ejemplo n.º 12
0
RTDECL(int) RTMpOnPair(RTCPUID idCpu1, RTCPUID idCpu2, uint32_t fFlags, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    IPRT_LINUX_SAVE_EFL_AC();
    int rc;
    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;

    AssertReturn(idCpu1 != idCpu2, VERR_INVALID_PARAMETER);
    AssertReturn(!(fFlags & RTMPON_F_VALID_MASK), VERR_INVALID_FLAGS);

    /*
     * Check that both CPUs are online before doing the broadcast call.
     */
    RTThreadPreemptDisable(&PreemptState);
    if (   RTMpIsCpuOnline(idCpu1)
        && RTMpIsCpuOnline(idCpu2))
    {
        /*
         * Use the smp_call_function variant taking a cpu mask where available,
         * falling back on broadcast with filter.  Slight snag if one of the
         * CPUs is the one we're running on, we must do the call and the post
         * call wait ourselves.
         */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        cpumask_t   DstCpuMask;
#endif
        RTCPUID     idCpuSelf = RTMpCpuId();
        bool const  fCallSelf = idCpuSelf == idCpu1 || idCpuSelf == idCpu2;
        RTMPARGS    Args;
        Args.pfnWorker = pfnWorker;
        Args.pvUser1 = pvUser1;
        Args.pvUser2 = pvUser2;
        Args.idCpu   = idCpu1;
        Args.idCpu2  = idCpu2;
        Args.cHits   = 0;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
        cpumask_clear(&DstCpuMask);
        cpumask_set_cpu(idCpu1, &DstCpuMask);
        cpumask_set_cpu(idCpu2, &DstCpuMask);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        cpus_clear(DstCpuMask);
        cpu_set(idCpu1, DstCpuMask);
        cpu_set(idCpu2, DstCpuMask);
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
        smp_call_function_many(&DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
        rc = 0;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
        rc = smp_call_function_many(&DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        rc = smp_call_function_mask(DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
#else /* older kernels */
        rc = smp_call_function(rtMpLinuxOnPairWrapper, &Args, 0 /* retry */, !fCallSelf /* wait */);
#endif /* older kernels */
        Assert(rc == 0);

        /* Call ourselves if necessary and wait for the other party to be done. */
        if (fCallSelf)
        {
            uint32_t cLoops = 0;
            rtmpLinuxWrapper(&Args);
            while (ASMAtomicReadU32(&Args.cHits) < 2)
            {
                if ((cLoops & 0x1ff) == 0 && !RTMpIsCpuOnline(idCpuSelf == idCpu1 ? idCpu2 : idCpu1))
                    break;
                cLoops++;
                ASMNopPause();
            }
        }

        Assert(Args.cHits <= 2);
        if (Args.cHits == 2)
            rc = VINF_SUCCESS;
        else if (Args.cHits == 1)
            rc = VERR_NOT_ALL_CPUS_SHOWED;
        else if (Args.cHits == 0)
            rc = VERR_CPU_OFFLINE;
        else
            rc = VERR_CPU_IPE_1;
    }
    /*
     * A CPU must be present to be considered just offline.
     */
    else if (   RTMpIsCpuPresent(idCpu1)
             && RTMpIsCpuPresent(idCpu2))
        rc = VERR_CPU_OFFLINE;
    else
        rc = VERR_CPU_NOT_FOUND;
    RTThreadPreemptRestore(&PreemptState);;
    IPRT_LINUX_RESTORE_EFL_AC();
    return rc;
}