VOID MiCheckPfn ( ) /*++ Routine Description: This routine checks each physical page in the PFN database to ensure it is in the proper state. Arguments: None. Return Value: None. Environment: Kernel mode, APCs disabled. --*/ { PMMPFN Pfn1; PFN_NUMBER Link, Previous; ULONG i; PMMPTE PointerPte; KIRQL PreviousIrql; KIRQL OldIrql; USHORT ValidCheck[4]; USHORT ValidPage[4]; PMMPFN PfnX; ValidCheck[0] = ValidCheck[1] = ValidCheck[2] = ValidCheck[3] = 0; ValidPage[0] = ValidPage[1] = ValidPage[2] = ValidPage[3] = 0; if (CheckPfnBitMap == NULL) { MiCreateBitMap ( &CheckPfnBitMap, MmNumberOfPhysicalPages, NonPagedPool); } RtlClearAllBits (CheckPfnBitMap); // // Walk free list. // KeRaiseIrql (APC_LEVEL, &PreviousIrql); LOCK_PFN (OldIrql); Previous = MM_EMPTY_LIST; Link = MmFreePageListHead.Flink; for (i=0; i < MmFreePageListHead.Total; i++) { if (Link == MM_EMPTY_LIST) { DbgPrint("free list total count wrong\n"); UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; } RtlSetBits (CheckPfnBitMap, (ULONG)Link, 1L); Pfn1 = MI_PFN_ELEMENT(Link); if (Pfn1->u3.e2.ReferenceCount != 0) { DbgPrint("non zero reference count on free list\n"); MiFormatPfn(Pfn1); } if (Pfn1->u3.e1.PageLocation != FreePageList) { DbgPrint("page location not freelist\n"); MiFormatPfn(Pfn1); } if (Pfn1->u2.Blink != Previous) { DbgPrint("bad blink on free list\n"); MiFormatPfn(Pfn1); } Previous = Link; Link = Pfn1->u1.Flink; } if (Link != MM_EMPTY_LIST) { DbgPrint("free list total count wrong\n"); Pfn1 = MI_PFN_ELEMENT(Link); MiFormatPfn(Pfn1); } // // Walk zeroed list. // Previous = MM_EMPTY_LIST; Link = MmZeroedPageListHead.Flink; for (i=0; i < MmZeroedPageListHead.Total; i++) { if (Link == MM_EMPTY_LIST) { DbgPrint("zero list total count wrong\n"); UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; } RtlSetBits (CheckPfnBitMap, (ULONG)Link, 1L); Pfn1 = MI_PFN_ELEMENT(Link); if (Pfn1->u3.e2.ReferenceCount != 0) { DbgPrint("non zero reference count on zero list\n"); MiFormatPfn(Pfn1); } if (Pfn1->u3.e1.PageLocation != ZeroedPageList) { DbgPrint("page location not zerolist\n"); MiFormatPfn(Pfn1); } if (Pfn1->u2.Blink != Previous) { DbgPrint("bad blink on zero list\n"); MiFormatPfn(Pfn1); } Previous = Link; Link = Pfn1->u1.Flink; } if (Link != MM_EMPTY_LIST) { DbgPrint("zero list total count wrong\n"); Pfn1 = MI_PFN_ELEMENT(Link); MiFormatPfn(Pfn1); } // // Walk Bad list. // Previous = MM_EMPTY_LIST; Link = MmBadPageListHead.Flink; for (i=0; i < MmBadPageListHead.Total; i++) { if (Link == MM_EMPTY_LIST) { DbgPrint("Bad list total count wrong\n"); UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; } RtlSetBits (CheckPfnBitMap, (ULONG)Link, 1L); Pfn1 = MI_PFN_ELEMENT(Link); if (Pfn1->u3.e2.ReferenceCount != 0) { DbgPrint("non zero reference count on Bad list\n"); MiFormatPfn(Pfn1); } if (Pfn1->u3.e1.PageLocation != BadPageList) { DbgPrint("page location not Badlist\n"); MiFormatPfn(Pfn1); } if (Pfn1->u2.Blink != Previous) { DbgPrint("bad blink on Bad list\n"); MiFormatPfn(Pfn1); } Previous = Link; Link = Pfn1->u1.Flink; } if (Link != MM_EMPTY_LIST) { DbgPrint("Bad list total count wrong\n"); Pfn1 = MI_PFN_ELEMENT(Link); MiFormatPfn(Pfn1); } // // Walk Standby list. // Previous = MM_EMPTY_LIST; Link = MmStandbyPageListHead.Flink; for (i=0; i < MmStandbyPageListHead.Total; i++) { if (Link == MM_EMPTY_LIST) { DbgPrint("Standby list total count wrong\n"); UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; } RtlSetBits (CheckPfnBitMap, (ULONG)Link, 1L); Pfn1 = MI_PFN_ELEMENT(Link); if (Pfn1->u3.e2.ReferenceCount != 0) { DbgPrint("non zero reference count on Standby list\n"); MiFormatPfn(Pfn1); } if (Pfn1->u3.e1.PageLocation != StandbyPageList) { DbgPrint("page location not Standbylist\n"); MiFormatPfn(Pfn1); } if (Pfn1->u2.Blink != Previous) { DbgPrint("bad blink on Standby list\n"); MiFormatPfn(Pfn1); } // // Check to see if referenced PTE is okay. // if (MI_IS_PFN_DELETED (Pfn1)) { DbgPrint("Invalid pteaddress in standby list\n"); MiFormatPfn(Pfn1); } else { OldIrql = 99; if ((Pfn1->u3.e1.PrototypePte == 1) && (MmIsAddressValid (Pfn1->PteAddress))) { PointerPte = Pfn1->PteAddress; } else { PointerPte = MiMapPageInHyperSpace(Pfn1->PteFrame, &OldIrql); PointerPte = (PMMPTE)((ULONG_PTR)PointerPte + MiGetByteOffset(Pfn1->PteAddress)); } if (MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (PointerPte) != Link) { DbgPrint("Invalid PFN - PTE address is wrong in standby list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } if (PointerPte->u.Soft.Transition == 0) { DbgPrint("Pte not in transition for page on standby list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } if (OldIrql != 99) { MiUnmapPageInHyperSpace (OldIrql); OldIrql = 99; } } Previous = Link; Link = Pfn1->u1.Flink; } if (Link != MM_EMPTY_LIST) { DbgPrint("Standby list total count wrong\n"); Pfn1 = MI_PFN_ELEMENT(Link); MiFormatPfn(Pfn1); } // // Walk Modified list. // Previous = MM_EMPTY_LIST; Link = MmModifiedPageListHead.Flink; for (i=0; i < MmModifiedPageListHead.Total; i++) { if (Link == MM_EMPTY_LIST) { DbgPrint("Modified list total count wrong\n"); UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; } RtlSetBits (CheckPfnBitMap, (ULONG)Link, 1L); Pfn1 = MI_PFN_ELEMENT(Link); if (Pfn1->u3.e2.ReferenceCount != 0) { DbgPrint("non zero reference count on Modified list\n"); MiFormatPfn(Pfn1); } if (Pfn1->u3.e1.PageLocation != ModifiedPageList) { DbgPrint("page location not Modifiedlist\n"); MiFormatPfn(Pfn1); } if (Pfn1->u2.Blink != Previous) { DbgPrint("bad blink on Modified list\n"); MiFormatPfn(Pfn1); } // // Check to see if referenced PTE is okay. // if (MI_IS_PFN_DELETED (Pfn1)) { DbgPrint("Invalid pteaddress in modified list\n"); MiFormatPfn(Pfn1); } else { if ((Pfn1->u3.e1.PrototypePte == 1) && (MmIsAddressValid (Pfn1->PteAddress))) { PointerPte = Pfn1->PteAddress; } else { PointerPte = MiMapPageInHyperSpace(Pfn1->PteFrame, &OldIrql); PointerPte = (PMMPTE)((ULONG_PTR)PointerPte + MiGetByteOffset(Pfn1->PteAddress)); } if (MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (PointerPte) != Link) { DbgPrint("Invalid PFN - PTE address is wrong in modified list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } if (PointerPte->u.Soft.Transition == 0) { DbgPrint("Pte not in transition for page on modified list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } if (OldIrql != 99) { MiUnmapPageInHyperSpace (OldIrql); OldIrql = 99; } } Previous = Link; Link = Pfn1->u1.Flink; } if (Link != MM_EMPTY_LIST) { DbgPrint("Modified list total count wrong\n"); Pfn1 = MI_PFN_ELEMENT(Link); MiFormatPfn(Pfn1); } // // All non active pages have been scanned. Locate the // active pages and make sure they are consistent. // // // set bit zero as page zero is reserved for now // RtlSetBits (CheckPfnBitMap, 0L, 1L); Link = RtlFindClearBitsAndSet (CheckPfnBitMap, 1L, 0); while (Link != 0xFFFFFFFF) { Pfn1 = MI_PFN_ELEMENT (Link); // // Make sure the PTE address is okay // if ((Pfn1->PteAddress >= (PMMPTE)HYPER_SPACE) && (Pfn1->u3.e1.PrototypePte == 0)) { DbgPrint("pfn with illegal pte address\n"); MiFormatPfn(Pfn1); break; } if (Pfn1->PteAddress < (PMMPTE)PTE_BASE) { DbgPrint("pfn with illegal pte address\n"); MiFormatPfn(Pfn1); break; } #if defined(_IA64_) // // ignore PTEs mapped to IA64 kernel BAT. // if (MI_IS_PHYSICAL_ADDRESS(MiGetVirtualAddressMappedByPte(Pfn1->PteAddress))) { goto NoCheck; } #endif // _IA64_ #ifdef _ALPHA_ // // ignore ptes mapped to ALPHA's 32-bit superpage. // if ((Pfn1->PteAddress > (PMMPTE)(ULONG_PTR)0xc0100000) && (Pfn1->PteAddress < (PMMPTE)(ULONG_PTR)0xc0180000)) { goto NoCheck; } #endif //ALPHA // // Check to make sure the referenced PTE is for this page. // if ((Pfn1->u3.e1.PrototypePte == 1) && (MmIsAddressValid (Pfn1->PteAddress))) { PointerPte = Pfn1->PteAddress; } else { PointerPte = MiMapPageInHyperSpace(Pfn1->PteFrame, &OldIrql); PointerPte = (PMMPTE)((ULONG_PTR)PointerPte + MiGetByteOffset(Pfn1->PteAddress)); } if (MI_GET_PAGE_FRAME_FROM_PTE (PointerPte) != Link) { DbgPrint("Invalid PFN - PTE address is wrong in active list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } if (PointerPte->u.Hard.Valid == 0) { // // if the page is a page table page it could be out of // the working set yet a transition page is keeping it // around in memory (ups the share count). // if ((Pfn1->PteAddress < (PMMPTE)PDE_BASE) || (Pfn1->PteAddress > (PMMPTE)PDE_TOP)) { DbgPrint("Pte not valid for page on active list\n"); MiFormatPfn(Pfn1); MiFormatPte(PointerPte); } } if (Pfn1->u3.e2.ReferenceCount != 1) { DbgPrint("refcount not 1\n"); MiFormatPfn(Pfn1); } // // Check to make sure the PTE count for the frame is okay. // if (Pfn1->u3.e1.PrototypePte == 1) { PfnX = MI_PFN_ELEMENT(Pfn1->PteFrame); for (i = 0; i < 4; i++) { if (ValidPage[i] == 0) { ValidPage[i] = (USHORT)Pfn1->PteFrame; } if (ValidPage[i] == (USHORT)Pfn1->PteFrame) { ValidCheck[i] += 1; break; } } } if (OldIrql != 99) { MiUnmapPageInHyperSpace (OldIrql); OldIrql = 99; } #if defined(_ALPHA_) || defined(_IA64_) NoCheck: #endif Link = RtlFindClearBitsAndSet (CheckPfnBitMap, 1L, 0); } for (i = 0; i < 4; i++) { if (ValidPage[i] == 0) { break; } PfnX = MI_PFN_ELEMENT(ValidPage[i]); } UNLOCK_PFN (OldIrql); KeLowerIrql (PreviousIrql); return; }
NTSTATUS MmRemovePhysicalMemory ( IN PPHYSICAL_ADDRESS StartAddress, IN OUT PLARGE_INTEGER NumberOfBytes ) /*++ Routine Description: This routine attempts to remove the specified physical address range from the system. Arguments: StartAddress - Supplies the starting physical address. NumberOfBytes - Supplies a pointer to the number of bytes being removed. Return Value: NTSTATUS. Environment: Kernel mode. PASSIVE level. No locks held. --*/ { ULONG i; ULONG Additional; PFN_NUMBER Page; PFN_NUMBER LastPage; PFN_NUMBER OriginalLastPage; PFN_NUMBER start; PFN_NUMBER PagesReleased; PMMPFN Pfn1; PMMPFN StartPfn; PMMPFN EndPfn; KIRQL OldIrql; PFN_NUMBER StartPage; PFN_NUMBER EndPage; PFN_COUNT NumberOfPages; SPFN_NUMBER MaxPages; PFN_NUMBER PageFrameIndex; PFN_NUMBER RemovedPages; LOGICAL Inserted; NTSTATUS Status; PMMPTE PointerPte; PMMPTE EndPte; PVOID VirtualAddress; PPHYSICAL_MEMORY_DESCRIPTOR OldPhysicalMemoryBlock; PPHYSICAL_MEMORY_DESCRIPTOR NewPhysicalMemoryBlock; PPHYSICAL_MEMORY_RUN NewRun; LOGICAL PfnDatabaseIsPhysical; ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL); ASSERT (BYTE_OFFSET(NumberOfBytes->LowPart) == 0); ASSERT (BYTE_OFFSET(StartAddress->LowPart) == 0); if (MI_IS_PHYSICAL_ADDRESS(MmPfnDatabase)) { // // The system must be configured for dynamic memory addition. This is // not strictly required to remove the memory, but it's better to check // for it now under the assumption that the administrator is probably // going to want to add this range of memory back in - better to give // the error now and refuse the removal than to refuse the addition // later. // if (MmDynamicPfn == FALSE) { return STATUS_NOT_SUPPORTED; } PfnDatabaseIsPhysical = TRUE; } else { PfnDatabaseIsPhysical = FALSE; } StartPage = (PFN_NUMBER)(StartAddress->QuadPart >> PAGE_SHIFT); NumberOfPages = (PFN_COUNT)(NumberOfBytes->QuadPart >> PAGE_SHIFT); EndPage = StartPage + NumberOfPages; if (EndPage - 1 > MmHighestPossiblePhysicalPage) { // // Truncate the request into something that can be mapped by the PFN // database. // EndPage = MmHighestPossiblePhysicalPage + 1; NumberOfPages = (PFN_COUNT)(EndPage - StartPage); } // // The range cannot wrap. // if (StartPage >= EndPage) { return STATUS_INVALID_PARAMETER_1; } StartPfn = MI_PFN_ELEMENT (StartPage); EndPfn = MI_PFN_ELEMENT (EndPage); ExAcquireFastMutex (&MmDynamicMemoryMutex); #if DBG MiDynmemData[0] += 1; #endif // // Decrease all commit limits to reflect the removed memory. // ExAcquireSpinLock (&MmChargeCommitmentLock, &OldIrql); ASSERT (MmTotalCommitLimit <= MmTotalCommitLimitMaximum); if ((NumberOfPages + 100 > MmTotalCommitLimit - MmTotalCommittedPages) || (MmTotalCommittedPages > MmTotalCommitLimit)) { #if DBG MiDynmemData[1] += 1; #endif ExReleaseSpinLock (&MmChargeCommitmentLock, OldIrql); ExReleaseFastMutex (&MmDynamicMemoryMutex); return STATUS_INSUFFICIENT_RESOURCES; } MmTotalCommitLimit -= NumberOfPages; MmTotalCommitLimitMaximum -= NumberOfPages; ExReleaseSpinLock (&MmChargeCommitmentLock, OldIrql); // // Check for outstanding promises that cannot be broken. // LOCK_PFN (OldIrql); MaxPages = MI_NONPAGABLE_MEMORY_AVAILABLE() - 100; if ((SPFN_NUMBER)NumberOfPages > MaxPages) { #if DBG MiDynmemData[2] += 1; #endif UNLOCK_PFN (OldIrql); Status = STATUS_INSUFFICIENT_RESOURCES; goto giveup2; } MmResidentAvailablePages -= NumberOfPages; MmNumberOfPhysicalPages -= NumberOfPages; // // The range must be contained in a single entry. It is permissible for // it to be part of a single entry, but it must not cross multiple entries. // Additional = (ULONG)-2; start = 0; do { Page = MmPhysicalMemoryBlock->Run[start].BasePage; LastPage = Page + MmPhysicalMemoryBlock->Run[start].PageCount; if ((StartPage >= Page) && (EndPage <= LastPage)) { if ((StartPage == Page) && (EndPage == LastPage)) { Additional = (ULONG)-1; } else if ((StartPage == Page) || (EndPage == LastPage)) { Additional = 0; } else { Additional = 1; } break; } start += 1; } while (start != MmPhysicalMemoryBlock->NumberOfRuns); if (Additional == (ULONG)-2) { #if DBG MiDynmemData[3] += 1; #endif MmResidentAvailablePages += NumberOfPages; MmNumberOfPhysicalPages += NumberOfPages; UNLOCK_PFN (OldIrql); Status = STATUS_CONFLICTING_ADDRESSES; goto giveup2; } for (Pfn1 = StartPfn; Pfn1 < EndPfn; Pfn1 += 1) { Pfn1->u3.e1.RemovalRequested = 1; } // // The free and zero lists must be pruned now before releasing the PFN // lock otherwise if another thread allocates the page from these lists, // the allocation will clear the RemovalRequested flag forever. // RemovedPages = MiRemovePhysicalPages (StartPage, EndPage); if (RemovedPages != NumberOfPages) { #if DBG retry: #endif Pfn1 = StartPfn; InterlockedIncrement (&MiDelayPageFaults); for (i = 0; i < 5; i += 1) { UNLOCK_PFN (OldIrql); // // Attempt to move pages to the standby list. Note that only the // pages with RemovalRequested set are moved. // MiTrimRemovalPagesOnly = TRUE; MiEmptyAllWorkingSets (); MiTrimRemovalPagesOnly = FALSE; MiFlushAllPages (); KeDelayExecutionThread (KernelMode, FALSE, &MmHalfSecond); LOCK_PFN (OldIrql); RemovedPages += MiRemovePhysicalPages (StartPage, EndPage); if (RemovedPages == NumberOfPages) { break; } // // RemovedPages doesn't include pages that were freed directly to // the bad page list via MiDecrementReferenceCount. So use the above // check purely as an optimization - and walk here when necessary. // for ( ; Pfn1 < EndPfn; Pfn1 += 1) { if (Pfn1->u3.e1.PageLocation != BadPageList) { break; } } if (Pfn1 == EndPfn) { RemovedPages = NumberOfPages; break; } } InterlockedDecrement (&MiDelayPageFaults); } if (RemovedPages != NumberOfPages) { #if DBG MiDynmemData[4] += 1; if (MiShowStuckPages != 0) { RemovedPages = 0; for (Pfn1 = StartPfn; Pfn1 < EndPfn; Pfn1 += 1) { if (Pfn1->u3.e1.PageLocation != BadPageList) { RemovedPages += 1; } } ASSERT (RemovedPages != 0); DbgPrint("MmRemovePhysicalMemory : could not get %d of %d pages\n", RemovedPages, NumberOfPages); if (MiShowStuckPages & 0x2) { ULONG PfnsPrinted; ULONG EnoughShown; PMMPFN FirstPfn; PFN_COUNT PfnCount; PfnCount = 0; PfnsPrinted = 0; EnoughShown = 100; if (MiShowStuckPages & 0x4) { EnoughShown = (ULONG)-1; } DbgPrint("Stuck PFN list: "); for (Pfn1 = StartPfn; Pfn1 < EndPfn; Pfn1 += 1) { if (Pfn1->u3.e1.PageLocation != BadPageList) { if (PfnCount == 0) { FirstPfn = Pfn1; } PfnCount += 1; } else { if (PfnCount != 0) { DbgPrint("%x -> %x ; ", FirstPfn - MmPfnDatabase, (FirstPfn - MmPfnDatabase) + PfnCount - 1); PfnsPrinted += 1; if (PfnsPrinted == EnoughShown) { break; } PfnCount = 0; } } } if (PfnCount != 0) { DbgPrint("%x -> %x ; ", FirstPfn - MmPfnDatabase, (FirstPfn - MmPfnDatabase) + PfnCount - 1); } DbgPrint("\n"); } if (MiShowStuckPages & 0x8) { DbgBreakPoint (); } if (MiShowStuckPages & 0x10) { goto retry; } } #endif UNLOCK_PFN (OldIrql); Status = STATUS_NO_MEMORY; goto giveup; } #if DBG for (Pfn1 = StartPfn; Pfn1 < EndPfn; Pfn1 += 1) { ASSERT (Pfn1->u3.e1.PageLocation == BadPageList); } #endif // // All the pages in the range have been removed. Update the physical // memory blocks and other associated housekeeping. // if (Additional == 0) { // // The range can be split off from an end of an existing chunk so no // pool growth or shrinkage is required. // NewPhysicalMemoryBlock = MmPhysicalMemoryBlock; OldPhysicalMemoryBlock = NULL; } else { // // The range cannot be split off from an end of an existing chunk so // pool growth or shrinkage is required. // UNLOCK_PFN (OldIrql); i = (sizeof(PHYSICAL_MEMORY_DESCRIPTOR) + (sizeof(PHYSICAL_MEMORY_RUN) * (MmPhysicalMemoryBlock->NumberOfRuns + Additional))); NewPhysicalMemoryBlock = ExAllocatePoolWithTag (NonPagedPool, i, ' mM'); if (NewPhysicalMemoryBlock == NULL) { Status = STATUS_INSUFFICIENT_RESOURCES; #if DBG MiDynmemData[5] += 1; #endif goto giveup; } OldPhysicalMemoryBlock = MmPhysicalMemoryBlock; RtlZeroMemory (NewPhysicalMemoryBlock, i); LOCK_PFN (OldIrql); } // // Remove or split the requested range from the existing memory block. // NewPhysicalMemoryBlock->NumberOfRuns = MmPhysicalMemoryBlock->NumberOfRuns + Additional; NewPhysicalMemoryBlock->NumberOfPages = MmPhysicalMemoryBlock->NumberOfPages - NumberOfPages; NewRun = &NewPhysicalMemoryBlock->Run[0]; start = 0; Inserted = FALSE; do { Page = MmPhysicalMemoryBlock->Run[start].BasePage; LastPage = Page + MmPhysicalMemoryBlock->Run[start].PageCount; if (Inserted == FALSE) { if ((StartPage >= Page) && (EndPage <= LastPage)) { if ((StartPage == Page) && (EndPage == LastPage)) { ASSERT (Additional == -1); start += 1; continue; } else if ((StartPage == Page) || (EndPage == LastPage)) { ASSERT (Additional == 0); if (StartPage == Page) { MmPhysicalMemoryBlock->Run[start].BasePage += NumberOfPages; } MmPhysicalMemoryBlock->Run[start].PageCount -= NumberOfPages; } else { ASSERT (Additional == 1); OriginalLastPage = LastPage; MmPhysicalMemoryBlock->Run[start].PageCount = StartPage - MmPhysicalMemoryBlock->Run[start].BasePage; *NewRun = MmPhysicalMemoryBlock->Run[start]; NewRun += 1; NewRun->BasePage = EndPage; NewRun->PageCount = OriginalLastPage - EndPage; NewRun += 1; start += 1; continue; } Inserted = TRUE; } } *NewRun = MmPhysicalMemoryBlock->Run[start]; NewRun += 1; start += 1; } while (start != MmPhysicalMemoryBlock->NumberOfRuns); // // Repoint the MmPhysicalMemoryBlock at the new chunk. // Free the old block after releasing the PFN lock. // MmPhysicalMemoryBlock = NewPhysicalMemoryBlock; if (EndPage - 1 == MmHighestPhysicalPage) { MmHighestPhysicalPage = StartPage - 1; } // // Throw away all the removed pages that are currently enqueued. // for (Pfn1 = StartPfn; Pfn1 < EndPfn; Pfn1 += 1) { ASSERT (Pfn1->u3.e1.PageLocation == BadPageList); ASSERT (Pfn1->u3.e1.RemovalRequested == 1); MiUnlinkPageFromList (Pfn1); ASSERT (Pfn1->u1.Flink == 0); ASSERT (Pfn1->u2.Blink == 0); ASSERT (Pfn1->u3.e2.ReferenceCount == 0); ASSERT64 (Pfn1->UsedPageTableEntries == 0); Pfn1->PteAddress = PFN_REMOVED; Pfn1->u3.e2.ShortFlags = 0; Pfn1->OriginalPte.u.Long = ZeroKernelPte.u.Long; Pfn1->PteFrame = 0; } // // Now that the removed pages have been discarded, eliminate the PFN // entries that mapped them. Straddling entries left over from an // adjacent earlier removal are not collapsed at this point. // // PagesReleased = 0; if (PfnDatabaseIsPhysical == FALSE) { VirtualAddress = (PVOID)ROUND_TO_PAGES(MI_PFN_ELEMENT(StartPage)); PointerPte = MiGetPteAddress (VirtualAddress); EndPte = MiGetPteAddress (PAGE_ALIGN(MI_PFN_ELEMENT(EndPage))); while (PointerPte < EndPte) { PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (PointerPte); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u2.ShareCount == 1); ASSERT (Pfn1->u3.e2.ReferenceCount == 1); Pfn1->u2.ShareCount = 0; MI_SET_PFN_DELETED (Pfn1); #if DBG Pfn1->u3.e1.PageLocation = StandbyPageList; #endif //DBG MiDecrementReferenceCount (PageFrameIndex); KeFlushSingleTb (VirtualAddress, TRUE, TRUE, (PHARDWARE_PTE)PointerPte, ZeroKernelPte.u.Flush); PagesReleased += 1; PointerPte += 1; VirtualAddress = (PVOID)((PCHAR)VirtualAddress + PAGE_SIZE); } MmResidentAvailablePages += PagesReleased; } #if DBG MiDynmemData[6] += 1; #endif UNLOCK_PFN (OldIrql); if (PagesReleased != 0) { MiReturnCommitment (PagesReleased); } ExReleaseFastMutex (&MmDynamicMemoryMutex); if (OldPhysicalMemoryBlock != NULL) { ExFreePool (OldPhysicalMemoryBlock); } NumberOfBytes->QuadPart = (ULONGLONG)NumberOfPages * PAGE_SIZE; return STATUS_SUCCESS; giveup: // // All the pages in the range were not obtained. Back everything out. // PageFrameIndex = StartPage; Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); LOCK_PFN (OldIrql); while (PageFrameIndex < EndPage) { ASSERT (Pfn1->u3.e1.RemovalRequested == 1); Pfn1->u3.e1.RemovalRequested = 0; if ((Pfn1->u3.e1.PageLocation == BadPageList) && (Pfn1->u3.e1.ParityError == 0)) { MiUnlinkPageFromList (Pfn1); MiInsertPageInList (MmPageLocationList[FreePageList], PageFrameIndex); } Pfn1 += 1; PageFrameIndex += 1; } MmResidentAvailablePages += NumberOfPages; MmNumberOfPhysicalPages += NumberOfPages; UNLOCK_PFN (OldIrql); giveup2: ExAcquireSpinLock (&MmChargeCommitmentLock, &OldIrql); MmTotalCommitLimit += NumberOfPages; MmTotalCommitLimitMaximum += NumberOfPages; ExReleaseSpinLock (&MmChargeCommitmentLock, OldIrql); ExReleaseFastMutex (&MmDynamicMemoryMutex); return Status; }
NTSTATUS MiCcPutPagesInTransition ( IN PMI_READ_INFO MiReadInfo ) /*++ Routine Description: This routine allocates physical memory for the specified read-list and puts all the pages in transition (so collided faults from other threads for these same pages remain coherent). I/O for any pages not already resident are issued here. The caller must wait for their completion. Arguments: MiReadInfo - Supplies a pointer to the read-list. Return Value: STATUS_SUCCESS - all the pages were already resident, reference counts have been applied and no I/O needs to be waited for. STATUS_ISSUE_PAGING_IO - the I/O has been issued and the caller must wait. Various other failure status values indicate the operation failed. Environment: Kernel mode. PASSIVE_LEVEL. --*/ { NTSTATUS status; PMMPTE LocalPrototypePte; PVOID StartingVa; PFN_NUMBER MdlPages; KIRQL OldIrql; MMPTE PteContents; PFN_NUMBER PageFrameIndex; PFN_NUMBER ResidentAvailableCharge; PPFN_NUMBER IoPage; PPFN_NUMBER ApiPage; PPFN_NUMBER Page; PPFN_NUMBER DestinationPage; ULONG PageColor; PMMPTE PointerPte; PMMPTE *ProtoPteArray; PMMPTE *EndProtoPteArray; PFN_NUMBER DummyPage; PMDL Mdl; PMDL FreeMdl; PMMPFN PfnProto; PMMPFN Pfn1; PMMPFN DummyPfn1; ULONG i; PFN_NUMBER DummyTrim; ULONG NumberOfPagesNeedingIo; MMPTE TempPte; PMMPTE PointerPde; PEPROCESS CurrentProcess; PMMINPAGE_SUPPORT InPageSupport; PKPRCB Prcb; ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL); MiReadInfo->DummyPagePfn = NULL; FreeMdl = NULL; CurrentProcess = PsGetCurrentProcess(); PfnProto = NULL; PointerPde = NULL; InPageSupport = MiReadInfo->InPageSupport; Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport); ASSERT (Mdl == MiReadInfo->IoMdl); IoPage = (PPFN_NUMBER)(Mdl + 1); ApiPage = (PPFN_NUMBER)(MiReadInfo->ApiMdl + 1); StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset); MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES (StartingVa, Mdl->ByteCount); if (MdlPages + 1 > MAXUSHORT) { // // The PFN ReferenceCount for the dummy page could wrap, refuse the // request. // return STATUS_INSUFFICIENT_RESOURCES; } NumberOfPagesNeedingIo = 0; ProtoPteArray = (PMMPTE *)InPageSupport->BasePte; EndProtoPteArray = ProtoPteArray + MdlPages; ASSERT (*ProtoPteArray != NULL); LOCK_PFN (OldIrql); // // Ensure sufficient pages exist for the transfer plus the dummy page. // if (((SPFN_NUMBER)MdlPages > (SPFN_NUMBER)(MmAvailablePages - MM_HIGH_LIMIT)) || (MI_NONPAGEABLE_MEMORY_AVAILABLE() <= (SPFN_NUMBER)MdlPages)) { UNLOCK_PFN (OldIrql); return STATUS_INSUFFICIENT_RESOURCES; } // // Charge resident available immediately as the PFN lock may get released // and reacquired below before all the pages have been locked down. // Note the dummy page is immediately charged separately. // MI_DECREMENT_RESIDENT_AVAILABLE (MdlPages, MM_RESAVAIL_ALLOCATE_BUILDMDL); ResidentAvailableCharge = MdlPages; // // Allocate a dummy page to map discarded pages that aren't skipped. // DummyPage = MiRemoveAnyPage (0); Pfn1 = MI_PFN_ELEMENT (DummyPage); ASSERT (Pfn1->u2.ShareCount == 0); ASSERT (Pfn1->u3.e2.ReferenceCount == 0); MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0); // // Give the page a containing frame so MiIdentifyPfn won't crash. // Pfn1->u4.PteFrame = PsInitialSystemProcess->Pcb.DirectoryTableBase[0] >> PAGE_SHIFT; // // Always bias the reference count by 1 and charge for this locked page // up front so the myriad increments and decrements don't get slowed // down with needless checking. // Pfn1->u3.e1.PrototypePte = 0; MI_ADD_LOCKED_PAGE_CHARGE (Pfn1); Pfn1->u3.e1.ReadInProgress = 1; MiReadInfo->DummyPagePfn = Pfn1; DummyPfn1 = Pfn1; DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages); // // Properly initialize the inpage support block fields we overloaded. // InPageSupport->BasePte = *ProtoPteArray; // // Build the proper InPageSupport and MDL to describe this run. // for (; ProtoPteArray < EndProtoPteArray; ProtoPteArray += 1, IoPage += 1, ApiPage += 1) { // // Fill the MDL entry for this RLE. // PointerPte = *ProtoPteArray; ASSERT (PointerPte != NULL); // // The PointerPte better be inside a prototype PTE allocation // so that subsequent page trims update the correct PTEs. // ASSERT (((PointerPte >= (PMMPTE)MmPagedPoolStart) && (PointerPte <= (PMMPTE)MmPagedPoolEnd)) || ((PointerPte >= (PMMPTE)MmSpecialPoolStart) && (PointerPte <= (PMMPTE)MmSpecialPoolEnd))); // // Check the state of this prototype PTE now that the PFN lock is held. // If the page is not resident, the PTE must be put in transition with // read in progress before the PFN lock is released. // // // Lock page containing prototype PTEs in memory by // incrementing the reference count for the page. // Unlock any page locked earlier containing prototype PTEs if // the containing page is not the same for both. // if (PfnProto != NULL) { if (PointerPde != MiGetPteAddress (PointerPte)) { ASSERT (PfnProto->u3.e2.ReferenceCount > 1); MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF (PfnProto); PfnProto = NULL; } } if (PfnProto == NULL) { ASSERT (!MI_IS_PHYSICAL_ADDRESS (PointerPte)); PointerPde = MiGetPteAddress (PointerPte); if (PointerPde->u.Hard.Valid == 0) { MiMakeSystemAddressValidPfn (PointerPte, OldIrql); } PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber); MI_ADD_LOCKED_PAGE_CHARGE (PfnProto); ASSERT (PfnProto->u3.e2.ReferenceCount > 1); } recheck: PteContents = *PointerPte; // LWFIX: are zero or dzero ptes possible here ? ASSERT (PteContents.u.Long != 0); if (PteContents.u.Hard.Valid == 1) { PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&PteContents); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e1.PrototypePte == 1); MI_ADD_LOCKED_PAGE_CHARGE (Pfn1); *ApiPage = PageFrameIndex; *IoPage = DummyPage; continue; } if ((PteContents.u.Soft.Prototype == 0) && (PteContents.u.Soft.Transition == 1)) { // // The page is in transition. If there is an inpage still in // progress, wait for it to complete. Reference the PFN and // then march on. // PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e1.PrototypePte == 1); if (Pfn1->u4.InPageError) { // // There was an in-page read error and there are other // threads colliding for this page, delay to let the // other threads complete and then retry. // UNLOCK_PFN (OldIrql); KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond); LOCK_PFN (OldIrql); goto recheck; } if (Pfn1->u3.e1.ReadInProgress) { // LWFIX - start with temp\aw.c } // // PTE refers to a normal transition PTE. // ASSERT ((SPFN_NUMBER)MmAvailablePages >= 0); if (MmAvailablePages == 0) { // // This can only happen if the system is utilizing a hardware // compression cache. This ensures that only a safe amount // of the compressed virtual cache is directly mapped so that // if the hardware gets into trouble, we can bail it out. // UNLOCK_PFN (OldIrql); KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond); LOCK_PFN (OldIrql); goto recheck; } // // The PFN reference count will be 1 already here if the // modified writer has begun a write of this page. Otherwise // it's ordinarily 0. // MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1); *IoPage = DummyPage; *ApiPage = PageFrameIndex; continue; } // LWFIX: need to handle protos that are now pagefile (or dzero) // backed - prefetching it from the file here would cause us to lose // the contents. Note this can happen for session-space images // as we back modified (ie: for relocation fixups or IAT // updated) portions from the pagefile. remove the assert below too. ASSERT (PteContents.u.Soft.Prototype == 1); if ((MmAvailablePages < MM_HIGH_LIMIT) && (MiEnsureAvailablePageOrWait (NULL, OldIrql))) { // // Had to wait so recheck all state. // goto recheck; } NumberOfPagesNeedingIo += 1; // // Allocate a physical page. // PageColor = MI_PAGE_COLOR_VA_PROCESS ( MiGetVirtualAddressMappedByPte (PointerPte), &CurrentProcess->NextPageColor); PageFrameIndex = MiRemoveAnyPage (PageColor); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e2.ReferenceCount == 0); ASSERT (Pfn1->u2.ShareCount == 0); ASSERT (PointerPte->u.Hard.Valid == 0); // // Initialize read-in-progress PFN. // MiInitializePfn (PageFrameIndex, PointerPte, 0); // // These pieces of MiInitializePfn initialization are overridden // here as these pages are only going into prototype // transition and not into any page tables. // Pfn1->u3.e1.PrototypePte = 1; Pfn1->u2.ShareCount -= 1; ASSERT (Pfn1->u2.ShareCount == 0); Pfn1->u3.e1.PageLocation = ZeroedPageList; Pfn1->u3.e2.ReferenceCount -= 1; ASSERT (Pfn1->u3.e2.ReferenceCount == 0); MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1); // // Initialize the I/O specific fields. // Pfn1->u1.Event = &InPageSupport->Event; Pfn1->u3.e1.ReadInProgress = 1; ASSERT (Pfn1->u4.InPageError == 0); // // Increment the PFN reference count in the control area for // the subsection. // MiReadInfo->ControlArea->NumberOfPfnReferences += 1; // // Put the prototype PTE into the transition state. // MI_MAKE_TRANSITION_PTE (TempPte, PageFrameIndex, PointerPte->u.Soft.Protection, PointerPte); MI_WRITE_INVALID_PTE (PointerPte, TempPte); *IoPage = PageFrameIndex; *ApiPage = PageFrameIndex; } // // If all the pages were resident, dereference the dummy page references // now and notify our caller that I/O is not necessary. // if (NumberOfPagesNeedingIo == 0) { ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages); // // Unlock page containing prototype PTEs. // if (PfnProto != NULL) { ASSERT (PfnProto->u3.e2.ReferenceCount > 1); MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF (PfnProto); } UNLOCK_PFN (OldIrql); // // Return the upfront resident available charge as the // individual charges have all been made at this point. // MI_INCREMENT_RESIDENT_AVAILABLE (ResidentAvailableCharge, MM_RESAVAIL_FREE_BUILDMDL_EXCESS); return STATUS_SUCCESS; } // // Carefully trim leading dummy pages. // Page = (PPFN_NUMBER)(Mdl + 1); DummyTrim = 0; for (i = 0; i < MdlPages - 1; i += 1) { if (*Page == DummyPage) { DummyTrim += 1; Page += 1; } else { break; } } if (DummyTrim != 0) { Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER))); Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE); ASSERT (Mdl->ByteCount != 0); InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim); // // Shuffle down the PFNs in the MDL. // Recalculate BasePte to adjust for the shuffle. // Pfn1 = MI_PFN_ELEMENT (*Page); ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0); ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) && (Pfn1->PteAddress->u.Soft.Transition == 1)); InPageSupport->BasePte = Pfn1->PteAddress; DestinationPage = (PPFN_NUMBER)(Mdl + 1); do { *DestinationPage = *Page; DestinationPage += 1; Page += 1; i += 1; } while (i < MdlPages); MdlPages -= DummyTrim; } // // Carefully trim trailing dummy pages. // ASSERT (MdlPages != 0); Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1; if (*Page == DummyPage) { ASSERT (MdlPages >= 2); // // Trim the last page specially as it may be a partial page. // Mdl->Size -= sizeof(PFN_NUMBER); if (BYTE_OFFSET(Mdl->ByteCount) != 0) { Mdl->ByteCount &= ~(PAGE_SIZE - 1); } else { Mdl->ByteCount -= PAGE_SIZE; } ASSERT (Mdl->ByteCount != 0); DummyPfn1->u3.e2.ReferenceCount -= 1; // // Now trim any other trailing pages. // Page -= 1; DummyTrim = 0; while (Page != ((PPFN_NUMBER)(Mdl + 1))) { if (*Page != DummyPage) { break; } DummyTrim += 1; Page -= 1; } if (DummyTrim != 0) { ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER))); Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER))); Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim); } ASSERT (MdlPages > DummyTrim + 1); MdlPages -= (DummyTrim + 1); #if DBG StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset); ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa, Mdl->ByteCount)); #endif } // // If the MDL is not already embedded in the inpage block, see if its // final size qualifies it - if so, embed it now. // if ((Mdl != &InPageSupport->Mdl) && (Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){ #if DBG RtlFillMemoryUlong (&InPageSupport->Page[0], (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER), 0xf1f1f1f1); #endif RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size); FreeMdl = Mdl; Mdl = &InPageSupport->Mdl; ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0); InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3); }
BOOLEAN MmCreateProcessAddressSpace ( IN ULONG MinimumWorkingSetSize, IN PEPROCESS NewProcess, OUT PULONG_PTR DirectoryTableBase ) /*++ Routine Description: This routine creates an address space which maps the system portion and contains a hyper space entry. Arguments: MinimumWorkingSetSize - Supplies the minimum working set size for this address space. This value is only used to ensure that ample physical pages exist to create this process. NewProcess - Supplies a pointer to the process object being created. DirectoryTableBase - Returns the value of the newly created address space's Page Directory (PD) page and hyper space page. Return Value: Returns TRUE if an address space was successfully created, FALSE if ample physical pages do not exist. Environment: Kernel mode. APCs Disabled. --*/ { LOGICAL FlushTbNeeded; PFN_NUMBER PageDirectoryIndex; PFN_NUMBER HyperSpaceIndex; PFN_NUMBER PageContainingWorkingSet; PFN_NUMBER VadBitMapPage; MMPTE TempPte; MMPTE TempPte2; PEPROCESS CurrentProcess; KIRQL OldIrql; PMMPFN Pfn1; ULONG Color; PMMPTE PointerPte; ULONG PdeOffset; PMMPTE MappingPte; PMMPTE PointerFillPte; PMMPTE CurrentAddressSpacePde; ULONG TopQuad; MMPTE TopPte; PPAE_ENTRY PaeVa; ULONG i; PFN_NUMBER PageDirectories[PD_PER_SYSTEM]; FlushTbNeeded = FALSE; // // Charge commitment for the page directory pages, working set page table // page, and working set list. If Vad bitmap lookups are enabled, then // charge for a page or two for that as well. // if (MiChargeCommitment (MM_PROCESS_COMMIT_CHARGE, NULL) == FALSE) { return FALSE; } CurrentProcess = PsGetCurrentProcess (); NewProcess->NextPageColor = (USHORT) (RtlRandom (&MmProcessColorSeed)); KeInitializeSpinLock (&NewProcess->HyperSpaceLock); TopQuad = MiPaeAllocate (&PaeVa); if (TopQuad == 0) { MiReturnCommitment (MM_PROCESS_COMMIT_CHARGE); return FALSE; } TempPte = ValidPdePde; MI_SET_GLOBAL_STATE (TempPte, 0); // // Get the PFN lock to get physical pages. // LOCK_PFN (OldIrql); // // Check to make sure the physical pages are available. // if (MI_NONPAGEABLE_MEMORY_AVAILABLE() <= (SPFN_NUMBER)MinimumWorkingSetSize){ UNLOCK_PFN (OldIrql); MiPaeFree (PaeVa); MiReturnCommitment (MM_PROCESS_COMMIT_CHARGE); // // Indicate no directory base was allocated. // return FALSE; } MM_TRACK_COMMIT (MM_DBG_COMMIT_PROCESS_CREATE, MM_PROCESS_COMMIT_CHARGE); MI_DECREMENT_RESIDENT_AVAILABLE (MinimumWorkingSetSize, MM_RESAVAIL_ALLOCATE_CREATE_PROCESS); // // Allocate the page directory pages. // for (i = 0; i < PD_PER_SYSTEM; i += 1) { if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_PTE_PROCESS (PDE_BASE, &CurrentProcess->NextPageColor); PageDirectories[i] = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (PageDirectories[i]); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } } // // Initialize the parent page directory entries. // TopPte.u.Long = TempPte.u.Long & ~MM_PAE_PDPTE_MASK; for (i = 0; i < PD_PER_SYSTEM; i += 1) { TopPte.u.Hard.PageFrameNumber = PageDirectories[i]; PaeVa->PteEntry[i].u.Long = TopPte.u.Long; } NewProcess->PaeTop = (PVOID) PaeVa; DirectoryTableBase[0] = TopQuad; // // Allocate the hyper space page table page. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_PTE_PROCESS (MiGetPdeAddress(HYPER_SPACE), &CurrentProcess->NextPageColor); HyperSpaceIndex = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (HyperSpaceIndex); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } // // Unlike DirectoryTableBase[0], the HyperSpaceIndex is stored as an // absolute PFN and does not need to be below 4GB. // DirectoryTableBase[1] = HyperSpaceIndex; // // Remove page(s) for the VAD bitmap. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_VA_PROCESS (MmWorkingSetList, &CurrentProcess->NextPageColor); VadBitMapPage = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (VadBitMapPage); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } // // Remove a page for the working set list. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_VA_PROCESS (MmWorkingSetList, &CurrentProcess->NextPageColor); PageContainingWorkingSet = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (PageContainingWorkingSet); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } UNLOCK_PFN (OldIrql); if (FlushTbNeeded == TRUE) { MI_FLUSH_TB_FOR_CACHED_ATTRIBUTE (); } ASSERT (NewProcess->AddressSpaceInitialized == 0); PS_SET_BITS (&NewProcess->Flags, PS_PROCESS_FLAGS_ADDRESS_SPACE1); ASSERT (NewProcess->AddressSpaceInitialized == 1); NewProcess->Vm.MinimumWorkingSetSize = MinimumWorkingSetSize; NewProcess->WorkingSetPage = PageContainingWorkingSet; // // Initialize the PTEs for hyperspace and the VAD bitmap mapping. // TempPte.u.Hard.PageFrameNumber = VadBitMapPage; MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, HyperSpaceIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); MI_WRITE_VALID_PTE (MappingPte, TempPte2); PointerPte = MiGetVirtualAddressMappedByPte (MappingPte); } else { PointerPte = MiMapPageInHyperSpace (CurrentProcess, HyperSpaceIndex, &OldIrql); } PointerPte[MiGetPteOffset(VAD_BITMAP_SPACE)] = TempPte; TempPte.u.Hard.PageFrameNumber = PageContainingWorkingSet; PointerPte[MiGetPteOffset(MmWorkingSetList)] = TempPte; if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, PointerPte, OldIrql); } // // Set the PTE address in the PFN for the page directory page. // Pfn1 = MI_PFN_ELEMENT (PageDirectories[PD_PER_SYSTEM - 1]); Pfn1->PteAddress = (PMMPTE)PDE_BASE; // // Add the new process to our internal list prior to filling any // system PDEs so if a system PDE changes (large page map or unmap) // it can mark this process for a subsequent update. // ASSERT (NewProcess->Pcb.DirectoryTableBase[0] == 0); LOCK_EXPANSION (OldIrql); InsertTailList (&MmProcessList, &NewProcess->MmProcessLinks); UNLOCK_EXPANSION (OldIrql); // // Map the page directory page in hyperspace. // MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, PageDirectories[PD_PER_SYSTEM - 1], MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); MI_WRITE_VALID_PTE (MappingPte, TempPte2); PointerPte = MiGetVirtualAddressMappedByPte (MappingPte); } else { PointerPte = MiMapPageInHyperSpace (CurrentProcess, PageDirectories[PD_PER_SYSTEM - 1], &OldIrql); } CurrentAddressSpacePde = MiGetPdeAddress (0xC0000000); // // Copy the entire page directory page for the highest GB so all the // kernel mappings are inherited. // RtlCopyMemory (PointerPte, CurrentAddressSpacePde, PAGE_SIZE); // // Recursively map each page directory page so it points to itself. // for (i = 0; i < PD_PER_SYSTEM; i += 1) { TempPte.u.Hard.PageFrameNumber = PageDirectories[i]; PointerPte[i] = TempPte; } // // Map the working set page table page. // PdeOffset = MiGetPdeOffset (HYPER_SPACE); TempPte.u.Hard.PageFrameNumber = HyperSpaceIndex; PointerPte[PdeOffset] = TempPte; // // Zero the remaining page directory range used to map the working // set list and its hash. // PdeOffset += 1; ASSERT (MiGetPdeOffset (MmHyperSpaceEnd) >= PdeOffset); MiZeroMemoryPte (&PointerPte[PdeOffset], (MiGetPdeOffset (MmHyperSpaceEnd) - PdeOffset + 1)); // // The page directory page is now initialized. // if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, PointerPte, OldIrql); } // // Map all the virtual space in the 2GB->3GB range when it's not user space. // This includes kernel/HAL code & data, the PFN database, initial nonpaged // pool, any extra system PTE or system cache areas, system views and // session space. // if (MmSystemRangeStart < (PVOID) 0xC0000000) { PageDirectoryIndex = MI_GET_PAGE_FRAME_FROM_PTE (&PaeVa->PteEntry[PD_PER_SYSTEM - 2]); MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, PageDirectoryIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); MI_WRITE_VALID_PTE (MappingPte, TempPte2); PointerPte = MiGetVirtualAddressMappedByPte (MappingPte); } else { PointerPte = MiMapPageInHyperSpace (CurrentProcess, PageDirectoryIndex, &OldIrql); } PdeOffset = MiGetPdeOffset (MmSystemRangeStart); PointerFillPte = &PointerPte[PdeOffset]; CurrentAddressSpacePde = MiGetPdeAddress (MmSystemRangeStart); RtlCopyMemory (PointerFillPte, CurrentAddressSpacePde, PAGE_SIZE - PdeOffset * sizeof (MMPTE)); if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, PointerPte, OldIrql); } } InterlockedExchangeAddSizeT (&MmProcessCommit, MM_PROCESS_COMMIT_CHARGE); // // Up the session space reference count. // MiSessionAddProcess (NewProcess); return TRUE; }
LOGICAL MiSetDirtyBit ( IN PVOID FaultingAddress, IN PMMPTE PointerPte, IN ULONG PfnHeld ) /*++ Routine Description: This routine sets dirty in the specified PTE and the modify bit in the corresponding PFN element. If any page file space is allocated, it is deallocated. Arguments: FaultingAddress - Supplies the faulting address. PointerPte - Supplies a pointer to the corresponding valid PTE. PfnHeld - Supplies TRUE if the PFN lock is already held. Return Value: TRUE if action was taken, FALSE if not. Environment: Kernel mode, APCs disabled, working set pushlock held. --*/ { MMPTE TempPte; PFN_NUMBER PageFrameIndex; PMMPFN Pfn1; // // The page is NOT copy on write, update the PTE setting both the // dirty bit and the accessed bit. Note, that as this PTE is in // the TB, the TB must be flushed. // TempPte = *PointerPte; PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&TempPte); // // This may be a PTE from a rotate physical frame so there may be no // corresponding PFN for it. // if (!MI_IS_PFN (PageFrameIndex)) { return FALSE; } MI_SET_PTE_DIRTY (TempPte); MI_SET_ACCESSED_IN_PTE (&TempPte, 1); MI_WRITE_VALID_PTE_NEW_PROTECTION (PointerPte, TempPte); // // Check state of PFN lock and if not held, don't update PFN database. // if (PfnHeld) { Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); // // Set the modified field in the PFN database, also, if the physical // page is currently in a paging file, free up the page file space // as the contents are now worthless. // if ((Pfn1->OriginalPte.u.Soft.Prototype == 0) && (Pfn1->u3.e1.WriteInProgress == 0)) { // // This page is in page file format, deallocate the page file space. // MiReleasePageFileSpace (Pfn1->OriginalPte); // // Change original PTE to indicate no page file space is reserved, // otherwise the space will be deallocated when the PTE is // deleted. // Pfn1->OriginalPte.u.Soft.PageFileHigh = 0; } MI_SET_MODIFIED (Pfn1, 1, 0x17); } // // The TB entry must be flushed as the valid PTE with the dirty bit clear // has been fetched into the TB. If it isn't flushed, another fault // is generated as the dirty bit is not set in the cached TB entry. // KeFillEntryTb (FaultingAddress); return TRUE; }
LOGICAL FASTCALL MiCopyOnWrite ( IN PVOID FaultingAddress, IN PMMPTE PointerPte ) /*++ Routine Description: This routine performs a copy on write operation for the specified virtual address. Arguments: FaultingAddress - Supplies the virtual address which caused the fault. PointerPte - Supplies the pointer to the PTE which caused the page fault. Return Value: Returns TRUE if the page was actually split, FALSE if not. Environment: Kernel mode, APCs disabled, working set mutex held. --*/ { MMPTE TempPte; MMPTE TempPte2; PMMPTE MappingPte; PFN_NUMBER PageFrameIndex; PFN_NUMBER NewPageIndex; PVOID CopyTo; PVOID CopyFrom; KIRQL OldIrql; PMMPFN Pfn1; PEPROCESS CurrentProcess; PMMCLONE_BLOCK CloneBlock; PMMCLONE_DESCRIPTOR CloneDescriptor; WSLE_NUMBER WorkingSetIndex; LOGICAL FakeCopyOnWrite; PMMWSL WorkingSetList; PVOID SessionSpace; PLIST_ENTRY NextEntry; PIMAGE_ENTRY_IN_SESSION Image; // // This is called from MmAccessFault, the PointerPte is valid // and the working set mutex ensures it cannot change state. // // Capture the PTE contents to TempPte. // TempPte = *PointerPte; ASSERT (TempPte.u.Hard.Valid == 1); PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&TempPte); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); // // Check to see if this is a prototype PTE with copy on write enabled. // FakeCopyOnWrite = FALSE; CurrentProcess = PsGetCurrentProcess (); CloneBlock = NULL; if (FaultingAddress >= (PVOID) MmSessionBase) { WorkingSetList = MmSessionSpace->Vm.VmWorkingSetList; ASSERT (Pfn1->u3.e1.PrototypePte == 1); SessionSpace = (PVOID) MmSessionSpace; MM_SESSION_SPACE_WS_LOCK_ASSERT (); if (MmSessionSpace->ImageLoadingCount != 0) { NextEntry = MmSessionSpace->ImageList.Flink; while (NextEntry != &MmSessionSpace->ImageList) { Image = CONTAINING_RECORD (NextEntry, IMAGE_ENTRY_IN_SESSION, Link); if ((FaultingAddress >= Image->Address) && (FaultingAddress <= Image->LastAddress)) { if (Image->ImageLoading) { ASSERT (Pfn1->u3.e1.PrototypePte == 1); TempPte.u.Hard.CopyOnWrite = 0; TempPte.u.Hard.Write = 1; // // The page is no longer copy on write, update the PTE // setting both the dirty bit and the accessed bit. // // Even though the page's current backing is the image // file, the modified writer will convert it to // pagefile backing when it notices the change later. // MI_SET_PTE_DIRTY (TempPte); MI_SET_ACCESSED_IN_PTE (&TempPte, 1); MI_WRITE_VALID_PTE_NEW_PROTECTION (PointerPte, TempPte); // // The TB entry must be flushed as the valid PTE with // the dirty bit clear has been fetched into the TB. If // it isn't flushed, another fault is generated as the // dirty bit is not set in the cached TB entry. // MI_FLUSH_SINGLE_TB (FaultingAddress, TRUE); return FALSE; } break; } NextEntry = NextEntry->Flink; } } } else { WorkingSetList = MmWorkingSetList; SessionSpace = NULL; // // If a fork operation is in progress, block until the fork is // completed, then retry the whole operation as the state of // everything may have changed between when the mutexes were // released and reacquired. // if (CurrentProcess->ForkInProgress != NULL) { if (MiWaitForForkToComplete (CurrentProcess) == TRUE) { return FALSE; } } if (TempPte.u.Hard.CopyOnWrite == 0) { // // This is a fork page which is being made private in order // to change the protection of the page. // Do not make the page writable. // FakeCopyOnWrite = TRUE; } } WorkingSetIndex = MiLocateWsle (FaultingAddress, WorkingSetList, Pfn1->u1.WsIndex, FALSE); // // The page must be copied into a new page. // LOCK_PFN (OldIrql); if ((MmAvailablePages < MM_HIGH_LIMIT) && (MiEnsureAvailablePageOrWait (SessionSpace != NULL ? HYDRA_PROCESS : CurrentProcess, OldIrql))) { // // A wait operation was performed to obtain an available // page and the working set mutex and PFN lock have // been released and various things may have changed for // the worse. Rather than examine all the conditions again, // return and if things are still proper, the fault will // be taken again. // UNLOCK_PFN (OldIrql); return FALSE; } // // This must be a prototype PTE. Perform the copy on write. // ASSERT (Pfn1->u3.e1.PrototypePte == 1); // // A page is being copied and made private, the global state of // the shared page needs to be updated at this point on certain // hardware. This is done by ORing the dirty bit into the modify bit in // the PFN element. // // Note that a session page cannot be dirty (no POSIX-style forking is // supported for these drivers). // if (SessionSpace != NULL) { ASSERT ((TempPte.u.Hard.Valid == 1) && (TempPte.u.Hard.Write == 0)); ASSERT (!MI_IS_PTE_DIRTY (TempPte)); NewPageIndex = MiRemoveAnyPage (MI_GET_PAGE_COLOR_FROM_SESSION(MmSessionSpace)); } else { MI_CAPTURE_DIRTY_BIT_TO_PFN (PointerPte, Pfn1); CloneBlock = (PMMCLONE_BLOCK) Pfn1->PteAddress; // // Get a new page with the same color as this page. // NewPageIndex = MiRemoveAnyPage ( MI_PAGE_COLOR_PTE_PROCESS(PageFrameIndex, &CurrentProcess->NextPageColor)); } MiInitializeCopyOnWritePfn (NewPageIndex, PointerPte, WorkingSetIndex, WorkingSetList); UNLOCK_PFN (OldIrql); InterlockedIncrement (&KeGetCurrentPrcb ()->MmCopyOnWriteCount); CopyFrom = PAGE_ALIGN (FaultingAddress); MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, NewPageIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); if (Pfn1->u3.e1.CacheAttribute == MiNonCached) { MI_DISABLE_CACHING (TempPte2); } else if (Pfn1->u3.e1.CacheAttribute == MiWriteCombined) { MI_SET_PTE_WRITE_COMBINE (TempPte2); } MI_WRITE_VALID_PTE (MappingPte, TempPte2); CopyTo = MiGetVirtualAddressMappedByPte (MappingPte); } else { CopyTo = MiMapPageInHyperSpace (CurrentProcess, NewPageIndex, &OldIrql); } KeCopyPage (CopyTo, CopyFrom); if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, CopyTo, OldIrql); } if (!FakeCopyOnWrite) { // // If the page was really a copy on write page, make it // accessed, dirty and writable. Also, clear the copy-on-write // bit in the PTE. // MI_SET_PTE_DIRTY (TempPte); TempPte.u.Hard.Write = 1; MI_SET_ACCESSED_IN_PTE (&TempPte, 1); TempPte.u.Hard.CopyOnWrite = 0; } // // Regardless of whether the page was really a copy on write, // the frame field of the PTE must be updated. // TempPte.u.Hard.PageFrameNumber = NewPageIndex; // // If the modify bit is set in the PFN database for the // page, the data cache must be flushed. This is due to the // fact that this process may have been cloned and the cache // still contains stale data destined for the page we are // going to remove. // ASSERT (TempPte.u.Hard.Valid == 1); MI_WRITE_VALID_PTE_NEW_PAGE (PointerPte, TempPte); // // Flush the TB entry for this page. // if (SessionSpace == NULL) { MI_FLUSH_SINGLE_TB (FaultingAddress, FALSE); // // Increment the number of private pages. // CurrentProcess->NumberOfPrivatePages += 1; } else { MI_FLUSH_SINGLE_TB (FaultingAddress, TRUE); ASSERT (Pfn1->u3.e1.PrototypePte == 1); } // // Decrement the share count for the page which was copied // as this PTE no longer refers to it. // LOCK_PFN (OldIrql); MiDecrementShareCount (Pfn1, PageFrameIndex); if (SessionSpace == NULL) { CloneDescriptor = MiLocateCloneAddress (CurrentProcess, (PVOID)CloneBlock); if (CloneDescriptor != NULL) { // // Decrement the reference count for the clone block, // note that this could release and reacquire the mutexes. // MiDecrementCloneBlockReference (CloneDescriptor, CloneBlock, CurrentProcess, NULL, OldIrql); } } UNLOCK_PFN (OldIrql); return TRUE; }
VOID MiProcessValidPteList ( IN PMMPTE *ValidPteList, IN ULONG Count ) /*++ Routine Description: This routine flushes the specified range of valid PTEs. Arguments: ValidPteList - Supplies a pointer to an array of PTEs to flush. Count - Supplies the count of the number of elements in the array. Return Value: none. Environment: Kernel mode, APCs disabled, WorkingSetMutex and AddressCreation mutexes held. --*/ { ULONG i; MMPTE_FLUSH_LIST PteFlushList; MMPTE PteContents; PMMPFN Pfn1; PMMPFN Pfn2; PFN_NUMBER PageFrameIndex; PFN_NUMBER PageTableFrameIndex; KIRQL OldIrql; i = 0; PteFlushList.Count = Count; if (Count < MM_MAXIMUM_FLUSH_COUNT) { do { PteFlushList.FlushVa[i] = MiGetVirtualAddressMappedByPte (ValidPteList[i]); i += 1; } while (i != Count); i = 0; } LOCK_PFN (OldIrql); do { PteContents = *ValidPteList[i]; ASSERT (PteContents.u.Hard.Valid == 1); PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE(&PteContents); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); // // Decrement the share and valid counts of the page table // page which maps this PTE. // PageTableFrameIndex = Pfn1->u4.PteFrame; Pfn2 = MI_PFN_ELEMENT (PageTableFrameIndex); MiDecrementShareCountInline (Pfn2, PageTableFrameIndex); MI_SET_PFN_DELETED (Pfn1); // // Decrement the share count for the physical page. As the page // is private it will be put on the free list. // MiDecrementShareCount (Pfn1, PageFrameIndex); MI_WRITE_INVALID_PTE (ValidPteList[i], MmDecommittedPte); i += 1; } while (i != Count); MiFlushPteList (&PteFlushList); UNLOCK_PFN (OldIrql); return; }
VOID MiSetDirtyBit ( IN PVOID FaultingAddress, IN PMMPTE PointerPte, IN ULONG PfnHeld ) /*++ Routine Description: This routine sets dirty in the specified PTE and the modify bit in the correpsonding PFN element. If any page file space is allocated, it is deallocated. Arguments: FaultingAddress - Supplies the faulting address. PointerPte - Supplies a pointer to the corresponding valid PTE. PfnHeld - Supplies TRUE if the PFN mutex is already held. Return Value: None. Environment: Kernel mode, APC's disabled, Working set mutex held. --*/ { MMPTE TempPte; PFN_NUMBER PageFrameIndex; PMMPFN Pfn1; KIRQL OldIrql; // // The TB entry must be flushed as the valid PTE with the dirty bit clear // has been fetched into the TB. If it isn't flushed, another fault // is generated as the dirty bit is not set in the cached TB entry. // // KiFlushSingleDataTb( FaultingAddress ); __dtbis( FaultingAddress ); // // The page is NOT copy on write, update the PTE setting both the // dirty bit and the accessed bit. Note, that as this PTE is in // the TB, the TB must be flushed. // PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE(PointerPte); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); TempPte = *PointerPte; TempPte.u.Hard.FaultOnWrite = 0; MI_SET_ACCESSED_IN_PTE (&TempPte, 1); *PointerPte = TempPte; // // If the PFN database lock is not held, then do not update the // PFN database. // if( PfnHeld ){ // // Set the modified field in the PFN database, also, if the physical // page is currently in a paging file, free up the page file space // as the contents are now worthless. // if ( (Pfn1->OriginalPte.u.Soft.Prototype == 0) && (Pfn1->u3.e1.WriteInProgress == 0) ) { // // This page is in page file format, deallocate the page file space. // MiReleasePageFileSpace (Pfn1->OriginalPte); // // Change original PTE to indicate no page file space is reserved, // otherwise the space will be deallocated when the PTE is // deleted. // Pfn1->OriginalPte.u.Soft.PageFileHigh = 0; } Pfn1->u3.e1.Modified = 1; } return; }