RTDECL(void) RTSpinlockAcquire(RTSPINLOCK Spinlock)
{
    PRTSPINLOCKINTERNAL pThis = (PRTSPINLOCKINTERNAL)Spinlock;
    AssertMsg(pThis && pThis->u32Magic == RTSPINLOCK_MAGIC, ("magic=%#x\n", pThis->u32Magic));

    KIRQL SavedIrql;
    if (pThis->fFlags & RTSPINLOCK_FLAGS_INTERRUPT_SAFE)
    {
#ifndef RTSPINLOCK_NT_HACK_NOIRQ
        RTCCUINTREG fIntSaved = ASMGetFlags();
        ASMIntDisable();
        KeAcquireSpinLock(&pThis->Spinlock, &SavedIrql);
#else
        SavedIrql = KeGetCurrentIrql();
        if (SavedIrql < DISPATCH_LEVEL)
        {
            KeRaiseIrql(DISPATCH_LEVEL, &SavedIrql);
            Assert(SavedIrql < DISPATCH_LEVEL);
        }
        RTCCUINTREG fIntSaved = ASMGetFlags();
        ASMIntDisable();

        if (!ASMAtomicCmpXchgU32(&pThis->u32Hack, RTSPINLOCK_NT_HACK_NOIRQ_TAKEN, RTSPINLOCK_NT_HACK_NOIRQ_FREE))
        {
            while (!ASMAtomicCmpXchgU32(&pThis->u32Hack, RTSPINLOCK_NT_HACK_NOIRQ_TAKEN, RTSPINLOCK_NT_HACK_NOIRQ_FREE))
                ASMNopPause();
        }

        pThis->fIntSaved = fIntSaved;
#endif
    }
    else
        KeAcquireSpinLock(&pThis->Spinlock, &SavedIrql);
    pThis->SavedIrql = SavedIrql;
}
/**
 * Worker for supdrvOSMsrProberModify.
 */
static DECLCALLBACK(void) supdrvLnxMsrProberModifyOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
{
    PSUPMSRPROBER               pReq    = (PSUPMSRPROBER)pvUser1;
    register uint32_t           uMsr    = pReq->u.In.uMsr;
    bool const                  fFaster = pReq->u.In.enmOp == SUPMSRPROBEROP_MODIFY_FASTER;
    uint64_t                    uBefore;
    uint64_t                    uWritten;
    uint64_t                    uAfter;
    int                         rcBefore, rcWrite, rcAfter, rcRestore;
    RTCCUINTREG                 fOldFlags;

    /* Initialize result variables. */
    uBefore = uWritten = uAfter    = 0;
    rcWrite = rcAfter  = rcRestore = -EIO;

    /*
     * Do the job.
     */
    fOldFlags = ASMIntDisableFlags();
    ASMCompilerBarrier(); /* paranoia */
    if (!fFaster)
        ASMWriteBackAndInvalidateCaches();

    rcBefore = rdmsrl_safe(uMsr, &uBefore);
    if (rcBefore >= 0)
    {
        register uint64_t uRestore = uBefore;
        uWritten  = uRestore;
        uWritten &= pReq->u.In.uArgs.Modify.fAndMask;
        uWritten |= pReq->u.In.uArgs.Modify.fOrMask;

        rcWrite   = wrmsr_safe(uMsr, RT_LODWORD(uWritten), RT_HIDWORD(uWritten));
        rcAfter   = rdmsrl_safe(uMsr, &uAfter);
        rcRestore = wrmsr_safe(uMsr, RT_LODWORD(uRestore), RT_HIDWORD(uRestore));

        if (!fFaster)
        {
            ASMWriteBackAndInvalidateCaches();
            ASMReloadCR3();
            ASMNopPause();
        }
    }

    ASMCompilerBarrier(); /* paranoia */
    ASMSetFlags(fOldFlags);

    /*
     * Write out the results.
     */
    pReq->u.Out.uResults.Modify.uBefore    = uBefore;
    pReq->u.Out.uResults.Modify.uWritten   = uWritten;
    pReq->u.Out.uResults.Modify.uAfter     = uAfter;
    pReq->u.Out.uResults.Modify.fBeforeGp  = rcBefore  != 0;
    pReq->u.Out.uResults.Modify.fModifyGp  = rcWrite   != 0;
    pReq->u.Out.uResults.Modify.fAfterGp   = rcAfter   != 0;
    pReq->u.Out.uResults.Modify.fRestoreGp = rcRestore != 0;
    RT_ZERO(pReq->u.Out.uResults.Modify.afReserved);
}
Beispiel #3
0
RTDECL(int) RTMpOnAll(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    IPRT_LINUX_SAVE_EFL_AC();
    int rc;
    RTMPARGS Args;
    RTCPUSET OnlineSet;
    RTCPUID  idCpu;
    uint32_t cLoops;

    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;

    Args.pfnWorker  = pfnWorker;
    Args.pvUser1    = pvUser1;
    Args.pvUser2    = pvUser2;
    Args.idCpu      = NIL_RTCPUID;
    Args.cHits      = 0;

    RTThreadPreemptDisable(&PreemptState);
    RTMpGetOnlineSet(&OnlineSet);
    Args.pWorkerSet = &OnlineSet;
    idCpu = RTMpCpuId();

    if (RTCpuSetCount(&OnlineSet) > 1)
    {
        /* Fire the function on all other CPUs without waiting for completion. */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        rc = smp_call_function(rtmpLinuxAllWrapper, &Args, 0 /* wait */);
#else
        rc = smp_call_function(rtmpLinuxAllWrapper, &Args, 0 /* retry */, 0 /* wait */);
#endif
        Assert(!rc); NOREF(rc);
    }

    /* Fire the function on this CPU. */
    Args.pfnWorker(idCpu, Args.pvUser1, Args.pvUser2);
    RTCpuSetDel(Args.pWorkerSet, idCpu);

    /* Wait for all of them finish. */
    cLoops = 64000;
    while (!RTCpuSetIsEmpty(Args.pWorkerSet))
    {
        /* Periodically check if any CPU in the wait set has gone offline, if so update the wait set. */
        if (!cLoops--)
        {
            RTCPUSET OnlineSetNow;
            RTMpGetOnlineSet(&OnlineSetNow);
            RTCpuSetAnd(Args.pWorkerSet, &OnlineSetNow);

            cLoops = 64000;
        }

        ASMNopPause();
    }

    RTThreadPreemptRestore(&PreemptState);
    IPRT_LINUX_RESTORE_EFL_AC();
    return VINF_SUCCESS;
}
Beispiel #4
0
/**
 * Prepare non-blocking mode.
 *
 * @returns VINF_SUCCESS
 * @retval  VERR_WRONG_ORDER
 * @retval  VERR_INTERNAL_ERROR_4
 *
 * @param   pThis               The pipe handle.
 */
static int rtPipeTryNonBlocking(RTPIPEINTERNAL *pThis)
{
    /*
     * Update the state.
     */
    for (;;)
    {
        uint32_t        u32State    = ASMAtomicReadU32(&pThis->u32State);
        uint32_t const  u32StateOld = u32State;
        uint32_t const  cUsers      = (u32State & RTPIPE_POSIX_USERS_MASK);

        if (!(u32State & RTPIPE_POSIX_BLOCKING))
        {
            AssertReturn(cUsers < RTPIPE_POSIX_USERS_MASK / 2, VERR_INTERNAL_ERROR_4);
            u32State &= ~RTPIPE_POSIX_USERS_MASK;
            u32State |= cUsers + 1;
            if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
            {
                if (u32State & RTPIPE_POSIX_SWITCHING)
                    break;
                return VINF_SUCCESS;
            }
        }
        else if (cUsers == 0)
        {
            u32State = 1 | RTPIPE_POSIX_SWITCHING;
            if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
                break;
        }
        else
            return VERR_WRONG_ORDER;
        ASMNopPause();
    }

    /*
     * Do the switching.
     */
    int fFlags = fcntl(pThis->fd, F_GETFL, 0);
    if (fFlags != -1)
    {
        if (    (fFlags & O_NONBLOCK)
            ||  fcntl(pThis->fd, F_SETFL, fFlags | O_NONBLOCK) != -1)
        {
            ASMAtomicBitClear(&pThis->u32State, RTPIPE_POSIX_SWITCHING_BIT);
            return VINF_SUCCESS;
        }
    }

    ASMAtomicDecU32(&pThis->u32State);
    return RTErrConvertFromErrno(errno);
}
/**
 * Yield the critical section if someone is waiting on it.
 *
 * When yielding, we'll leave the critical section and try to make sure the
 * other waiting threads get a chance of entering before we reclaim it.
 *
 * @retval  true if yielded.
 * @retval  false if not yielded.
 * @param   pCritSect           The critical section.
 */
VMMR3DECL(bool) PDMR3CritSectYield(PPDMCRITSECT pCritSect)
{
    AssertPtrReturn(pCritSect, false);
    AssertReturn(pCritSect->s.Core.u32Magic == RTCRITSECT_MAGIC, false);
    Assert(pCritSect->s.Core.NativeThreadOwner == RTThreadNativeSelf());
    Assert(!(pCritSect->s.Core.fFlags & RTCRITSECT_FLAGS_NOP));

    /* No recursion allowed here. */
    int32_t const cNestings = pCritSect->s.Core.cNestings;
    AssertReturn(cNestings == 1, false);

    int32_t const cLockers  = ASMAtomicReadS32(&pCritSect->s.Core.cLockers);
    if (cLockers < cNestings)
        return false;

#ifdef PDMCRITSECT_STRICT
    RTLOCKVALSRCPOS const SrcPos = pCritSect->s.Core.pValidatorRec->SrcPos;
#endif
    PDMCritSectLeave(pCritSect);

    /*
     * If we're lucky, then one of the waiters has entered the lock already.
     * We spin a little bit in hope for this to happen so we can avoid the
     * yield detour.
     */
    if (ASMAtomicUoReadS32(&pCritSect->s.Core.cNestings) == 0)
    {
        int cLoops = 20;
        while (   cLoops > 0
               && ASMAtomicUoReadS32(&pCritSect->s.Core.cNestings) == 0
               && ASMAtomicUoReadS32(&pCritSect->s.Core.cLockers)  >= 0)
        {
            ASMNopPause();
            cLoops--;
        }
        if (cLoops == 0)
            RTThreadYield();
    }

#ifdef PDMCRITSECT_STRICT
    int rc = PDMCritSectEnterDebug(pCritSect, VERR_IGNORED,
                                   SrcPos.uId, SrcPos.pszFile, SrcPos.uLine, SrcPos.pszFunction);
#else
    int rc = PDMCritSectEnter(pCritSect, VERR_IGNORED);
#endif
    AssertLogRelRC(rc);
    return true;
}
RTDECL(int) RTMpOnSpecific(RTCPUID idCpu, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    /*
     * Don't try mess with an offline CPU.
     */
    if (!RTMpIsCpuOnline(idCpu))
        return !RTMpIsCpuPossible(idCpu)
              ? VERR_CPU_NOT_FOUND
              : VERR_CPU_OFFLINE;

    /*
     * Use the broadcast IPI routine if there are no more than two CPUs online,
     * or if the current IRQL is unsuitable for KeWaitForSingleObject.
     */
    int rc;
    uint32_t cHits = 0;
    if (   g_pfnrtKeIpiGenericCall
        && (   RTMpGetOnlineCount() <= 2
            || KeGetCurrentIrql()   > APC_LEVEL)
       )
    {
        rc = rtMpCallUsingBroadcastIpi(pfnWorker, pvUser1, pvUser2, rtmpNtOnSpecificBroadcastIpiWrapper,
                                       idCpu, NIL_RTCPUID, &cHits);
        if (RT_SUCCESS(rc))
        {
            if (cHits == 1)
                return VINF_SUCCESS;
            rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1;
        }
        return rc;
    }

#if 0
    rc = rtMpCallUsingDpcs(pfnWorker, pvUser1, pvUser2, RT_NT_CPUID_SPECIFIC, idCpu, NIL_RTCPUID, &cHits);
    if (RT_SUCCESS(rc))
    {
        if (cHits == 1)
            return VINF_SUCCESS;
        rc = cHits == 0 ? VERR_CPU_OFFLINE : VERR_CPU_IPE_1;
    }
    return rc;

#else
    /*
     * Initialize the argument package and the objects within it.
     * The package is referenced counted to avoid unnecessary spinning to
     * synchronize cleanup and prevent stack corruption.
     */
    PRTMPNTONSPECIFICARGS pArgs = (PRTMPNTONSPECIFICARGS)ExAllocatePoolWithTag(NonPagedPool, sizeof(*pArgs), (ULONG)'RTMp');
    if (!pArgs)
        return VERR_NO_MEMORY;
    pArgs->cRefs                  = 2;
    pArgs->fExecuting             = false;
    pArgs->fDone                  = false;
    pArgs->CallbackArgs.pfnWorker = pfnWorker;
    pArgs->CallbackArgs.pvUser1   = pvUser1;
    pArgs->CallbackArgs.pvUser2   = pvUser2;
    pArgs->CallbackArgs.idCpu     = idCpu;
    pArgs->CallbackArgs.cHits     = 0;
    pArgs->CallbackArgs.cRefs     = 2;
    KeInitializeEvent(&pArgs->DoneEvt, SynchronizationEvent, FALSE /* not signalled */);
    KeInitializeDpc(&pArgs->Dpc, rtMpNtOnSpecificDpcWrapper, pArgs);
    KeSetImportanceDpc(&pArgs->Dpc, HighImportance);
    KeSetTargetProcessorDpc(&pArgs->Dpc, (int)idCpu);

    /*
     * Disable preemption while we check the current processor and inserts the DPC.
     */
    KIRQL bOldIrql;
    KeRaiseIrql(DISPATCH_LEVEL, &bOldIrql);
    ASMCompilerBarrier(); /* paranoia */

    if (RTMpCpuId() == idCpu)
    {
        /* Just execute the callback on the current CPU. */
        pfnWorker(idCpu, pvUser1, pvUser2);
        KeLowerIrql(bOldIrql);

        ExFreePool(pArgs);
        return VINF_SUCCESS;
    }

    /* Different CPU, so queue it if the CPU is still online. */
    if (RTMpIsCpuOnline(idCpu))
    {
        BOOLEAN fRc = KeInsertQueueDpc(&pArgs->Dpc, 0, 0);
        Assert(fRc);
        KeLowerIrql(bOldIrql);

        uint64_t const nsRealWaitTS = RTTimeNanoTS();

        /*
         * Wait actively for a while in case the CPU/thread responds quickly.
         */
        uint32_t cLoopsLeft = 0x20000;
        while (cLoopsLeft-- > 0)
        {
            if (pArgs->fDone)
            {
                rtMpNtOnSpecificRelease(pArgs);
                return VINF_SUCCESS;
            }
            ASMNopPause();
        }

        /*
         * It didn't respond, so wait on the event object, poking the CPU if it's slow.
         */
        LARGE_INTEGER Timeout;
        Timeout.QuadPart = -10000; /* 1ms */
        NTSTATUS rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
        if (rcNt == STATUS_SUCCESS)
        {
            rtMpNtOnSpecificRelease(pArgs);
            return VINF_SUCCESS;
        }

        /* If it hasn't respondend yet, maybe poke it and wait some more. */
        if (rcNt == STATUS_TIMEOUT)
        {
#ifndef IPRT_TARGET_NT4
            if (   !pArgs->fExecuting
                && (   g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalSendSoftwareInterrupt
                    || g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalReqestIpiW7Plus
                    || g_pfnrtMpPokeCpuWorker == rtMpPokeCpuUsingHalReqestIpiPreW7))
                RTMpPokeCpu(idCpu);
#endif

            Timeout.QuadPart = -1280000; /* 128ms */
            rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
            if (rcNt == STATUS_SUCCESS)
            {
                rtMpNtOnSpecificRelease(pArgs);
                return VINF_SUCCESS;
            }
        }

        /*
         * Something weird is happening, try bail out.
         */
        if (KeRemoveQueueDpc(&pArgs->Dpc))
        {
            ExFreePool(pArgs); /* DPC was still queued, so we can return without further ado. */
            LogRel(("RTMpOnSpecific(%#x): Not processed after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));
        }
        else
        {
            /* DPC is running, wait a good while for it to complete. */
            LogRel(("RTMpOnSpecific(%#x): Still running after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));

            Timeout.QuadPart = -30*1000*1000*10; /* 30 seconds */
            rcNt = KeWaitForSingleObject(&pArgs->DoneEvt, Executive, KernelMode, FALSE /* Alertable */, &Timeout);
            if (rcNt != STATUS_SUCCESS)
                LogRel(("RTMpOnSpecific(%#x): Giving up on running worker after %llu ns: rcNt=%#x\n", idCpu, RTTimeNanoTS() - nsRealWaitTS, rcNt));
        }
        rc = RTErrConvertFromNtStatus(rcNt);
    }
    else
    {
        /* CPU is offline.*/
        KeLowerIrql(bOldIrql);
        rc = !RTMpIsCpuPossible(idCpu) ? VERR_CPU_NOT_FOUND : VERR_CPU_OFFLINE;
    }

    rtMpNtOnSpecificRelease(pArgs);
    return rc;
#endif
}
/**
 * Common worker for the debug and normal APIs.
 *
 * @returns VINF_SUCCESS if entered successfully.
 * @returns rcBusy when encountering a busy critical section in GC/R0.
 * @returns VERR_SEM_DESTROYED if the critical section is dead.
 *
 * @param   pCritSect           The PDM critical section to enter.
 * @param   rcBusy              The status code to return when we're in GC or R0
 *                              and the section is busy.
 */
DECL_FORCE_INLINE(int) pdmCritSectEnter(PPDMCRITSECT pCritSect, int rcBusy, PCRTLOCKVALSRCPOS pSrcPos)
{
    Assert(pCritSect->s.Core.cNestings < 8);  /* useful to catch incorrect locking */
    Assert(pCritSect->s.Core.cNestings >= 0);

    /*
     * If the critical section has already been destroyed, then inform the caller.
     */
    AssertMsgReturn(pCritSect->s.Core.u32Magic == RTCRITSECT_MAGIC,
                    ("%p %RX32\n", pCritSect, pCritSect->s.Core.u32Magic),
                    VERR_SEM_DESTROYED);

    /*
     * See if we're lucky.
     */
    /* NOP ... */
    if (pCritSect->s.Core.fFlags & RTCRITSECT_FLAGS_NOP)
        return VINF_SUCCESS;

    RTNATIVETHREAD hNativeSelf = pdmCritSectGetNativeSelf(pCritSect);
    /* ... not owned ... */
    if (ASMAtomicCmpXchgS32(&pCritSect->s.Core.cLockers, 0, -1))
        return pdmCritSectEnterFirst(pCritSect, hNativeSelf, pSrcPos);

    /* ... or nested. */
    if (pCritSect->s.Core.NativeThreadOwner == hNativeSelf)
    {
        ASMAtomicIncS32(&pCritSect->s.Core.cLockers);
        ASMAtomicIncS32(&pCritSect->s.Core.cNestings);
        Assert(pCritSect->s.Core.cNestings > 1);
        return VINF_SUCCESS;
    }

    /*
     * Spin for a bit without incrementing the counter.
     */
    /** @todo Move this to cfgm variables since it doesn't make sense to spin on UNI
     *        cpu systems. */
    int32_t cSpinsLeft = CTX_SUFF(PDMCRITSECT_SPIN_COUNT_);
    while (cSpinsLeft-- > 0)
    {
        if (ASMAtomicCmpXchgS32(&pCritSect->s.Core.cLockers, 0, -1))
            return pdmCritSectEnterFirst(pCritSect, hNativeSelf, pSrcPos);
        ASMNopPause();
        /** @todo Should use monitor/mwait on e.g. &cLockers here, possibly with a
           cli'ed pendingpreemption check up front using sti w/ instruction fusing
           for avoiding races. Hmm ... This is assuming the other party is actually
           executing code on another CPU ... which we could keep track of if we
           wanted. */
    }

#ifdef IN_RING3
    /*
     * Take the slow path.
     */
    return pdmR3R0CritSectEnterContended(pCritSect, hNativeSelf, pSrcPos);

#else
# ifdef IN_RING0
    /** @todo If preemption is disabled it means we're in VT-x/AMD-V context
     *        and would be better off switching out of that while waiting for
     *        the lock.  Several of the locks jumps back to ring-3 just to
     *        get the lock, the ring-3 code will then call the kernel to do
     *        the lock wait and when the call return it will call ring-0
     *        again and resume via in setjmp style.  Not very efficient. */
#  if 0
    if (ASMIntAreEnabled()) /** @todo this can be handled as well by changing
                             * callers not prepared for longjmp/blocking to
                             * use PDMCritSectTryEnter. */
    {
        /*
         * Leave HWACCM context while waiting if necessary.
         */
        int rc;
        if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
        {
            STAM_REL_COUNTER_ADD(&pCritSect->s.StatContentionRZLock,    1000000);
            rc = pdmR3R0CritSectEnterContended(pCritSect, hNativeSelf, pSrcPos);
        }
        else
        {
            STAM_REL_COUNTER_ADD(&pCritSect->s.StatContentionRZLock, 1000000000);
            PVM     pVM   = pCritSect->s.CTX_SUFF(pVM);
            PVMCPU  pVCpu = VMMGetCpu(pVM);
            HWACCMR0Leave(pVM, pVCpu);
            RTThreadPreemptRestore(NIL_RTTHREAD, ????);

            rc = pdmR3R0CritSectEnterContended(pCritSect, hNativeSelf, pSrcPos);

            RTThreadPreemptDisable(NIL_RTTHREAD, ????);
            HWACCMR0Enter(pVM, pVCpu);
        }
        return rc;
    }
#  else
    /*
     * We preemption hasn't been disabled, we can block here in ring-0.
     */
    if (   RTThreadPreemptIsEnabled(NIL_RTTHREAD)
        && ASMIntAreEnabled())
        return pdmR3R0CritSectEnterContended(pCritSect, hNativeSelf, pSrcPos);
#  endif
#endif /* IN_RING0 */

    STAM_REL_COUNTER_INC(&pCritSect->s.StatContentionRZLock);

    /*
     * Call ring-3 to acquire the critical section?
     */
    if (rcBusy == VINF_SUCCESS)
    {
        PVM     pVM   = pCritSect->s.CTX_SUFF(pVM); AssertPtr(pVM);
        PVMCPU  pVCpu = VMMGetCpu(pVM);             AssertPtr(pVCpu);
        return VMMRZCallRing3(pVM, pVCpu, VMMCALLRING3_PDM_CRIT_SECT_ENTER, MMHyperCCToR3(pVM, pCritSect));
    }

    /*
     * Return busy.
     */
    LogFlow(("PDMCritSectEnter: locked => R3 (%Rrc)\n", rcBusy));
    return rcBusy;
#endif /* !IN_RING3 */
}
Beispiel #8
0
RTDECL(void) RTSpinlockAcquire(RTSPINLOCK Spinlock)
{
    PRTSPINLOCKINTERNAL pThis = (PRTSPINLOCKINTERNAL)Spinlock;
    AssertMsg(pThis && pThis->u32Magic == RTSPINLOCK_GEN_MAGIC,
              ("pThis=%p u32Magic=%08x\n", pThis, pThis ? (int)pThis->u32Magic : 0));

    if (pThis->fFlags & RTSPINLOCK_FLAGS_INTERRUPT_SAFE)
    {
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
        uint32_t fIntSaved = ASMGetFlags();
#endif

#if RT_CFG_SPINLOCK_GENERIC_DO_SLEEP
        for (;;)
        {
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
            ASMIntDisable();
#endif
            for (int c = RT_CFG_SPINLOCK_GENERIC_DO_SLEEP; c > 0; c--)
            {
                if (ASMAtomicCmpXchgU32(&pThis->fLocked, 1, 0))
                {
# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
                    pThis->fIntSaved = fIntSaved;
# endif
                    return;
                }
                ASMNopPause();
            }
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
            ASMSetFlags(fIntSaved);
#endif
            RTThreadYield();
        }
#else
        for (;;)
        {
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
            ASMIntDisable();
#endif
            if (ASMAtomicCmpXchgU32(&pThis->fLocked, 1, 0))
            {
# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
                pThis->fIntSaved = fIntSaved;
# endif
                return;
            }
#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
            ASMSetFlags(fIntSaved);
#endif
            ASMNopPause();
        }
#endif
    }
    else
    {
#if RT_CFG_SPINLOCK_GENERIC_DO_SLEEP
        for (;;)
        {
            for (int c = RT_CFG_SPINLOCK_GENERIC_DO_SLEEP; c > 0; c--)
            {
                if (ASMAtomicCmpXchgU32(&pThis->fLocked, 1, 0))
                    return;
                ASMNopPause();
            }
            RTThreadYield();
        }
#else
        while (!ASMAtomicCmpXchgU32(&pThis->fLocked, 1, 0))
            ASMNopPause();
#endif
    }
}
Beispiel #9
0
int main(int argc, char **argv)
{
    RTR3InitExe(argc, &argv, 0);

    /*
     * Parse args
     */
    static const RTGETOPTDEF g_aOptions[] =
    {
        { "--iterations",       'i', RTGETOPT_REQ_INT32 },
        { "--hex",              'h', RTGETOPT_REQ_NOTHING },
        { "--decimal",          'd', RTGETOPT_REQ_NOTHING },
        { "--spin",             's', RTGETOPT_REQ_NOTHING },
        { "--reference",        'r', RTGETOPT_REQ_UINT64 },  /* reference value of CpuHz, display the
                                                              * CpuHz deviation in a separate column. */
    };

    uint32_t cIterations = 40;
    bool fHex = true;
    bool fSpin = false;
    int ch;
    uint64_t uCpuHzRef = 0;
    uint64_t uCpuHzOverallDeviation = 0;
    int64_t  iCpuHzMaxDeviation = 0;
    int32_t cCpuHzOverallDevCnt = 0;
    RTGETOPTUNION ValueUnion;
    RTGETOPTSTATE GetState;
    RTGetOptInit(&GetState, argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), 1, RTGETOPTINIT_FLAGS_NO_STD_OPTS);
    while ((ch = RTGetOpt(&GetState, &ValueUnion)))
    {
        switch (ch)
        {
            case 'i':
                cIterations = ValueUnion.u32;
                break;

            case 'd':
                fHex = false;
                break;

            case 'h':
                fHex = true;
                break;

            case 's':
                fSpin = true;
                break;

            case 'r':
                uCpuHzRef = ValueUnion.u64;
                break;

            default:
                return RTGetOptPrintError(ch, &ValueUnion);
        }
    }

    /*
     * Init
     */
    PSUPDRVSESSION pSession = NIL_RTR0PTR;
    int rc = SUPR3Init(&pSession);
    if (RT_SUCCESS(rc))
    {
        if (g_pSUPGlobalInfoPage)
        {
            RTPrintf("tstGIP-2: cCpus=%d  u32UpdateHz=%RU32  u32UpdateIntervalNS=%RU32  u64NanoTSLastUpdateHz=%RX64  u64CpuHz=%RU64  uCpuHzRef=%RU64  u32Mode=%d (%s) u32Version=%#x\n",
                     g_pSUPGlobalInfoPage->cCpus,
                     g_pSUPGlobalInfoPage->u32UpdateHz,
                     g_pSUPGlobalInfoPage->u32UpdateIntervalNS,
                     g_pSUPGlobalInfoPage->u64NanoTSLastUpdateHz,
                     g_pSUPGlobalInfoPage->u64CpuHz,
                     uCpuHzRef,
                     g_pSUPGlobalInfoPage->u32Mode,
                     SUPGetGIPModeName(g_pSUPGlobalInfoPage),
                     g_pSUPGlobalInfoPage->u32Version);
            RTPrintf(fHex
                     ? "tstGIP-2:     it: u64NanoTS        delta     u64TSC           UpIntTSC H  TransId      CpuHz      %sTSC Interval History...\n"
                     : "tstGIP-2:     it: u64NanoTS        delta     u64TSC             UpIntTSC H    TransId      CpuHz      %sTSC Interval History...\n",
                     uCpuHzRef ? "  CpuHz deviation  " : "");
            static SUPGIPCPU s_aaCPUs[2][256];
            for (uint32_t i = 0; i < cIterations; i++)
            {
                /* copy the data */
                memcpy(&s_aaCPUs[i & 1][0], &g_pSUPGlobalInfoPage->aCPUs[0], g_pSUPGlobalInfoPage->cCpus * sizeof(g_pSUPGlobalInfoPage->aCPUs[0]));

                /* display it & find something to spin on. */
                uint32_t u32TransactionId = 0;
                uint32_t volatile *pu32TransactionId = NULL;
                for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++)
                    if (    g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz > 0
                        &&  g_pSUPGlobalInfoPage->aCPUs[iCpu].u64CpuHz != _4G + 1)
                    {
                        char szCpuHzDeviation[32];
                        PSUPGIPCPU pPrevCpu = &s_aaCPUs[!(i & 1)][iCpu];
                        PSUPGIPCPU pCpu = &s_aaCPUs[i & 1][iCpu];
                        if (uCpuHzRef)
                        {
                            int64_t iCpuHzDeviation = pCpu->u64CpuHz - uCpuHzRef;
                            uint64_t uCpuHzDeviation = RT_ABS(iCpuHzDeviation);
                            if (uCpuHzDeviation > 999999999)
                                RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%17s  ", "?");
                            else
                            {
                                /* Wait until the history validation code takes effect. */
                                if (pCpu->u32TransactionId > 23 + (8 * 2) + 1)
                                {
                                    if (RT_ABS(iCpuHzDeviation) > RT_ABS(iCpuHzMaxDeviation))
                                        iCpuHzMaxDeviation = iCpuHzDeviation;
                                    uCpuHzOverallDeviation += uCpuHzDeviation;
                                    cCpuHzOverallDevCnt++;
                                }
                                uint32_t uPct = (uint32_t)(uCpuHzDeviation * 100000 / uCpuHzRef + 5);
                                RTStrPrintf(szCpuHzDeviation, sizeof(szCpuHzDeviation), "%10RI64%3d.%02d%%  ",
                                            iCpuHzDeviation, uPct / 1000, (uPct % 1000) / 10);
                            }
                        }
                        else
                            szCpuHzDeviation[0] = '\0';
                        RTPrintf(fHex
                                 ? "tstGIP-2: %4d/%d: %016llx %09llx %016llx %08x %d %08x %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n"
                                 : "tstGIP-2: %4d/%d: %016llu %09llu %016llu %010u %d %010u %15llu %s%08x %08x %08x %08x %08x %08x %08x %08x (%d)\n",
                                 i, iCpu,
                                 pCpu->u64NanoTS,
                                 i ? pCpu->u64NanoTS - pPrevCpu->u64NanoTS : 0,
                                 pCpu->u64TSC,
                                 pCpu->u32UpdateIntervalTSC,
                                 pCpu->iTSCHistoryHead,
                                 pCpu->u32TransactionId,
                                 pCpu->u64CpuHz,
                                 szCpuHzDeviation,
                                 pCpu->au32TSCHistory[0],
                                 pCpu->au32TSCHistory[1],
                                 pCpu->au32TSCHistory[2],
                                 pCpu->au32TSCHistory[3],
                                 pCpu->au32TSCHistory[4],
                                 pCpu->au32TSCHistory[5],
                                 pCpu->au32TSCHistory[6],
                                 pCpu->au32TSCHistory[7],
                                 pCpu->cErrors);
                        if (!pu32TransactionId)
                        {
                            pu32TransactionId = &g_pSUPGlobalInfoPage->aCPUs[iCpu].u32TransactionId;
                            u32TransactionId = pCpu->u32TransactionId;
                        }
                    }

                /* wait a bit / spin */
                if (!fSpin)
                    RTThreadSleep(9);
                else
                {
                    if (pu32TransactionId)
                    {
                        uint32_t uTmp;
                        while (   u32TransactionId == (uTmp = *pu32TransactionId)
                               || (uTmp & 1))
                            ASMNopPause();
                    }
                    else
                        RTThreadSleep(1);
                }
            }

            /*
             * Display TSC deltas.
             *
             * First iterative over the APIC ID array to get mostly consistent CPUID to APIC ID mapping.
             * Then iterate over the offline CPUs. It is possible that there's a race between the online/offline
             * states between the two iterations, but that cannot be helped from ring-3 anyway and not a biggie.
             */
            RTPrintf("tstGIP-2: TSC deltas:\n");
            RTPrintf("tstGIP-2:  idApic: i64TSCDelta\n");
            for (unsigned i = 0; i < RT_ELEMENTS(g_pSUPGlobalInfoPage->aiCpuFromApicId); i++)
            {
                uint16_t iCpu = g_pSUPGlobalInfoPage->aiCpuFromApicId[i];
                if (iCpu != UINT16_MAX)
                {
                    RTPrintf("tstGIP-2: %7d: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic,
                             g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta);
                }
            }

            for (unsigned iCpu = 0; iCpu < g_pSUPGlobalInfoPage->cCpus; iCpu++)
                if (g_pSUPGlobalInfoPage->aCPUs[iCpu].idApic == UINT16_MAX)
                    RTPrintf("tstGIP-2: offline: %lld\n", g_pSUPGlobalInfoPage->aCPUs[iCpu].i64TSCDelta);

            RTPrintf("tstGIP-2: enmUseTscDelta=%d  fGetGipCpu=%#x\n",
                     g_pSUPGlobalInfoPage->enmUseTscDelta, g_pSUPGlobalInfoPage->fGetGipCpu);
            if (   uCpuHzRef
                && cCpuHzOverallDevCnt)
            {
                uint32_t uPct    = (uint32_t)(uCpuHzOverallDeviation * 100000 / cCpuHzOverallDevCnt / uCpuHzRef + 5);
                RTPrintf("tstGIP-2: Average CpuHz deviation: %d.%02d%%\n",
                         uPct / 1000, (uPct % 1000) / 10);

                uint32_t uMaxPct = (uint32_t)(RT_ABS(iCpuHzMaxDeviation) * 100000 / uCpuHzRef + 5);
                RTPrintf("tstGIP-2: Maximum CpuHz deviation: %d.%02d%% (%RI64 ticks)\n",
                         uMaxPct / 1000, (uMaxPct % 1000) / 10, iCpuHzMaxDeviation);
            }
        }
        else
        {
            RTPrintf("tstGIP-2: g_pSUPGlobalInfoPage is NULL\n");
            rc = -1;
        }

        SUPR3Term(false /*fForced*/);
    }
    else
        RTPrintf("tstGIP-2: SUPR3Init failed: %Rrc\n", rc);
    return !!rc;
}
Beispiel #10
0
RTDECL(int) RTMpOnPair(RTCPUID idCpu1, RTCPUID idCpu2, uint32_t fFlags, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
{
    IPRT_LINUX_SAVE_EFL_AC();
    int rc;
    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;

    AssertReturn(idCpu1 != idCpu2, VERR_INVALID_PARAMETER);
    AssertReturn(!(fFlags & RTMPON_F_VALID_MASK), VERR_INVALID_FLAGS);

    /*
     * Check that both CPUs are online before doing the broadcast call.
     */
    RTThreadPreemptDisable(&PreemptState);
    if (   RTMpIsCpuOnline(idCpu1)
        && RTMpIsCpuOnline(idCpu2))
    {
        /*
         * Use the smp_call_function variant taking a cpu mask where available,
         * falling back on broadcast with filter.  Slight snag if one of the
         * CPUs is the one we're running on, we must do the call and the post
         * call wait ourselves.
         */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        cpumask_t   DstCpuMask;
#endif
        RTCPUID     idCpuSelf = RTMpCpuId();
        bool const  fCallSelf = idCpuSelf == idCpu1 || idCpuSelf == idCpu2;
        RTMPARGS    Args;
        Args.pfnWorker = pfnWorker;
        Args.pvUser1 = pvUser1;
        Args.pvUser2 = pvUser2;
        Args.idCpu   = idCpu1;
        Args.idCpu2  = idCpu2;
        Args.cHits   = 0;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
        cpumask_clear(&DstCpuMask);
        cpumask_set_cpu(idCpu1, &DstCpuMask);
        cpumask_set_cpu(idCpu2, &DstCpuMask);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        cpus_clear(DstCpuMask);
        cpu_set(idCpu1, DstCpuMask);
        cpu_set(idCpu2, DstCpuMask);
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
        smp_call_function_many(&DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
        rc = 0;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
        rc = smp_call_function_many(&DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
        rc = smp_call_function_mask(DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
#else /* older kernels */
        rc = smp_call_function(rtMpLinuxOnPairWrapper, &Args, 0 /* retry */, !fCallSelf /* wait */);
#endif /* older kernels */
        Assert(rc == 0);

        /* Call ourselves if necessary and wait for the other party to be done. */
        if (fCallSelf)
        {
            uint32_t cLoops = 0;
            rtmpLinuxWrapper(&Args);
            while (ASMAtomicReadU32(&Args.cHits) < 2)
            {
                if ((cLoops & 0x1ff) == 0 && !RTMpIsCpuOnline(idCpuSelf == idCpu1 ? idCpu2 : idCpu1))
                    break;
                cLoops++;
                ASMNopPause();
            }
        }

        Assert(Args.cHits <= 2);
        if (Args.cHits == 2)
            rc = VINF_SUCCESS;
        else if (Args.cHits == 1)
            rc = VERR_NOT_ALL_CPUS_SHOWED;
        else if (Args.cHits == 0)
            rc = VERR_CPU_OFFLINE;
        else
            rc = VERR_CPU_IPE_1;
    }
    /*
     * A CPU must be present to be considered just offline.
     */
    else if (   RTMpIsCpuPresent(idCpu1)
             && RTMpIsCpuPresent(idCpu2))
        rc = VERR_CPU_OFFLINE;
    else
        rc = VERR_CPU_NOT_FOUND;
    RTThreadPreemptRestore(&PreemptState);;
    IPRT_LINUX_RESTORE_EFL_AC();
    return rc;
}
RTDECL(int) RTSemSpinMutexRequest(RTSEMSPINMUTEX hSpinMtx)
{
    RTSEMSPINMUTEXINTERNAL *pThis = hSpinMtx;
    RTNATIVETHREAD          hSelf = RTThreadNativeSelf();
    RTSEMSPINMUTEXSTATE     State;
    bool                    fRc;
    int                     rc;

    Assert(hSelf != NIL_RTNATIVETHREAD);
    RTSEMSPINMUTEX_VALIDATE_RETURN(pThis);

    /*
     * Check context, disable preemption and save flags if necessary.
     */
    rc = rtSemSpinMutexEnter(&State, pThis);
    if (RT_FAILURE(rc))
        return rc;

    /*
     * Try take the ownership.
     */
    ASMAtomicIncS32(&pThis->cLockers);
    ASMAtomicCmpXchgHandle(&pThis->hOwner, hSelf, NIL_RTNATIVETHREAD, fRc);
    if (!fRc)
    {
        uint32_t cSpins;

        /*
         * It's busy. Check if it's an attempt at nested access.
         */
        if (RT_UNLIKELY(pThis->hOwner == hSelf))
        {
            AssertMsgFailed(("%p attempt at nested access\n"));
            rtSemSpinMutexLeave(&State);
            return VERR_SEM_NESTED;
        }

        /*
         * Return if we're in interrupt context and the semaphore isn't
         * configure to be interrupt safe.
         */
        if (rc == VINF_SEM_BAD_CONTEXT)
        {
            rtSemSpinMutexLeave(&State);
            return VERR_SEM_BAD_CONTEXT;
        }

        /*
         * Ok, we have to wait.
         */
        if (State.fSpin)
        {
            for (cSpins = 0; ; cSpins++)
            {
                ASMAtomicCmpXchgHandle(&pThis->hOwner, hSelf, NIL_RTNATIVETHREAD, fRc);
                if (fRc)
                    break;
                ASMNopPause();
                if (RT_UNLIKELY(pThis->u32Magic != RTSEMSPINMUTEX_MAGIC))
                {
                    rtSemSpinMutexLeave(&State);
                    return VERR_SEM_DESTROYED;
                }

                /*
                 * "Yield" once in a while. This may lower our IRQL/PIL which
                 * may preempting us, and it will certainly stop the hammering
                 * of hOwner for a little while.
                 */
                if ((cSpins & 0x7f) == 0x1f)
                {
                    rtSemSpinMutexLeave(&State);
                    rtSemSpinMutexEnter(&State, pThis);
                    Assert(State.fSpin);
                }
            }
        }
        else
        {
            for (cSpins = 0;; cSpins++)
            {
                ASMAtomicCmpXchgHandle(&pThis->hOwner, hSelf, NIL_RTNATIVETHREAD, fRc);
                if (fRc)
                    break;
                ASMNopPause();
                if (RT_UNLIKELY(pThis->u32Magic != RTSEMSPINMUTEX_MAGIC))
                {
                    rtSemSpinMutexLeave(&State);
                    return VERR_SEM_DESTROYED;
                }

                if ((cSpins & 15) == 15) /* spin a bit before going sleep (again). */
                {
                    rtSemSpinMutexLeave(&State);

                    rc = RTSemEventWait(pThis->hEventSem, RT_INDEFINITE_WAIT);
                    ASMCompilerBarrier();
                    if (RT_SUCCESS(rc))
                        AssertReturn(pThis->u32Magic == RTSEMSPINMUTEX_MAGIC, VERR_SEM_DESTROYED);
                    else if (rc == VERR_INTERRUPTED)
                        AssertRC(rc);       /* shouldn't happen */
                    else
                    {
                        AssertRC(rc);
                        return rc;
                    }

                    rc = rtSemSpinMutexEnter(&State, pThis);
                    AssertRCReturn(rc, rc);
                    Assert(!State.fSpin);
                }
            }
        }
    }

    /*
     * We're the semaphore owner.
     */
    pThis->SavedState = State;
    Assert(pThis->hOwner == hSelf);
    return VINF_SUCCESS;
}