BOOLEAN MmCreateProcessAddressSpace ( IN ULONG MinimumWorkingSetSize, IN PEPROCESS NewProcess, OUT PULONG_PTR DirectoryTableBase ) /*++ Routine Description: This routine creates an address space which maps the system portion and contains a hyper space entry. Arguments: MinimumWorkingSetSize - Supplies the minimum working set size for this address space. This value is only used to ensure that ample physical pages exist to create this process. NewProcess - Supplies a pointer to the process object being created. DirectoryTableBase - Returns the value of the newly created address space's Page Directory (PD) page and hyper space page. Return Value: Returns TRUE if an address space was successfully created, FALSE if ample physical pages do not exist. Environment: Kernel mode. APCs Disabled. --*/ { LOGICAL FlushTbNeeded; PFN_NUMBER PageDirectoryIndex; PFN_NUMBER HyperSpaceIndex; PFN_NUMBER PageContainingWorkingSet; PFN_NUMBER VadBitMapPage; MMPTE TempPte; MMPTE TempPte2; PEPROCESS CurrentProcess; KIRQL OldIrql; PMMPFN Pfn1; ULONG Color; PMMPTE PointerPte; ULONG PdeOffset; PMMPTE MappingPte; PMMPTE PointerFillPte; PMMPTE CurrentAddressSpacePde; // // Charge commitment for the page directory pages, working set page table // page, and working set list. If Vad bitmap lookups are enabled, then // charge for a page or two for that as well. // if (MiChargeCommitment (MM_PROCESS_COMMIT_CHARGE, NULL) == FALSE) { return FALSE; } FlushTbNeeded = FALSE; CurrentProcess = PsGetCurrentProcess (); NewProcess->NextPageColor = (USHORT) (RtlRandom (&MmProcessColorSeed)); KeInitializeSpinLock (&NewProcess->HyperSpaceLock); // // Get the PFN lock to get physical pages. // LOCK_PFN (OldIrql); // // Check to make sure the physical pages are available. // if (MI_NONPAGEABLE_MEMORY_AVAILABLE() <= (SPFN_NUMBER)MinimumWorkingSetSize){ UNLOCK_PFN (OldIrql); MiReturnCommitment (MM_PROCESS_COMMIT_CHARGE); // // Indicate no directory base was allocated. // return FALSE; } MM_TRACK_COMMIT (MM_DBG_COMMIT_PROCESS_CREATE, MM_PROCESS_COMMIT_CHARGE); MI_DECREMENT_RESIDENT_AVAILABLE (MinimumWorkingSetSize, MM_RESAVAIL_ALLOCATE_CREATE_PROCESS); // // Allocate a page directory page. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_PTE_PROCESS (PDE_BASE, &CurrentProcess->NextPageColor); PageDirectoryIndex = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (PageDirectoryIndex); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } // // Allocate the hyper space page table page. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_PTE_PROCESS (MiGetPdeAddress(HYPER_SPACE), &CurrentProcess->NextPageColor); HyperSpaceIndex = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (HyperSpaceIndex); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } // // Remove page(s) for the VAD bitmap. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_VA_PROCESS (MmWorkingSetList, &CurrentProcess->NextPageColor); VadBitMapPage = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (VadBitMapPage); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } // // Remove a page for the working set list. // if (MmAvailablePages < MM_HIGH_LIMIT) { MiEnsureAvailablePageOrWait (NULL, OldIrql); } Color = MI_PAGE_COLOR_VA_PROCESS (MmWorkingSetList, &CurrentProcess->NextPageColor); PageContainingWorkingSet = MiRemoveZeroPageMayReleaseLocks (Color, OldIrql); Pfn1 = MI_PFN_ELEMENT (PageContainingWorkingSet); if (Pfn1->u3.e1.CacheAttribute != MiCached) { Pfn1->u3.e1.CacheAttribute = MiCached; FlushTbNeeded = TRUE; } UNLOCK_PFN (OldIrql); if (FlushTbNeeded == TRUE) { MI_FLUSH_TB_FOR_CACHED_ATTRIBUTE (); } ASSERT (NewProcess->AddressSpaceInitialized == 0); PS_SET_BITS (&NewProcess->Flags, PS_PROCESS_FLAGS_ADDRESS_SPACE1); ASSERT (NewProcess->AddressSpaceInitialized == 1); NewProcess->Vm.MinimumWorkingSetSize = MinimumWorkingSetSize; NewProcess->WorkingSetPage = PageContainingWorkingSet; INITIALIZE_DIRECTORY_TABLE_BASE (&DirectoryTableBase[0], PageDirectoryIndex); INITIALIZE_DIRECTORY_TABLE_BASE (&DirectoryTableBase[1], HyperSpaceIndex); // // Initialize the page reserved for hyper space. // TempPte = ValidPdePde; MI_SET_GLOBAL_STATE (TempPte, 0); MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, HyperSpaceIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); MI_WRITE_VALID_PTE (MappingPte, TempPte2); PointerPte = MiGetVirtualAddressMappedByPte (MappingPte); } else { PointerPte = MiMapPageInHyperSpace (CurrentProcess, HyperSpaceIndex, &OldIrql); } TempPte.u.Hard.PageFrameNumber = VadBitMapPage; PointerPte[MiGetPteOffset(VAD_BITMAP_SPACE)] = TempPte; TempPte.u.Hard.PageFrameNumber = PageContainingWorkingSet; PointerPte[MiGetPteOffset(MmWorkingSetList)] = TempPte; if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, PointerPte, OldIrql); } // // Set the PTE address in the PFN for the page directory page. // Pfn1 = MI_PFN_ELEMENT (PageDirectoryIndex); Pfn1->PteAddress = (PMMPTE)PDE_BASE; TempPte = ValidPdePde; TempPte.u.Hard.PageFrameNumber = HyperSpaceIndex; MI_SET_GLOBAL_STATE (TempPte, 0); // // Add the new process to our internal list prior to filling any // system PDEs so if a system PDE changes (large page map or unmap) // it can mark this process for a subsequent update. // ASSERT (NewProcess->Pcb.DirectoryTableBase[0] == 0); LOCK_EXPANSION (OldIrql); InsertTailList (&MmProcessList, &NewProcess->MmProcessLinks); UNLOCK_EXPANSION (OldIrql); // // Map the page directory page in hyperspace. // MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, PageDirectoryIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); MI_WRITE_VALID_PTE (MappingPte, TempPte2); PointerPte = MiGetVirtualAddressMappedByPte (MappingPte); } else { PointerPte = MiMapPageInHyperSpace (CurrentProcess, PageDirectoryIndex, &OldIrql); } PdeOffset = MiGetPdeOffset (MmSystemRangeStart); PointerFillPte = &PointerPte[PdeOffset]; CurrentAddressSpacePde = MiGetPdeAddress (MmSystemRangeStart); RtlCopyMemory (PointerFillPte, CurrentAddressSpacePde, PAGE_SIZE - PdeOffset * sizeof (MMPTE)); // // Map the working set page table page. // PdeOffset = MiGetPdeOffset (HYPER_SPACE); PointerPte[PdeOffset] = TempPte; // // Zero the remaining page directory range used to map the working // set list and its hash. // PdeOffset += 1; ASSERT (MiGetPdeOffset (MmHyperSpaceEnd) >= PdeOffset); MiZeroMemoryPte (&PointerPte[PdeOffset], (MiGetPdeOffset (MmHyperSpaceEnd) - PdeOffset + 1)); // // Recursively map the page directory page so it points to itself. // TempPte.u.Hard.PageFrameNumber = PageDirectoryIndex; PointerPte[MiGetPdeOffset(PTE_BASE)] = TempPte; if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, PointerPte, OldIrql); } InterlockedExchangeAddSizeT (&MmProcessCommit, MM_PROCESS_COMMIT_CHARGE); // // Up the session space reference count. // MiSessionAddProcess (NewProcess); return TRUE; }
NTSTATUS MiCcPutPagesInTransition ( IN PMI_READ_INFO MiReadInfo ) /*++ Routine Description: This routine allocates physical memory for the specified read-list and puts all the pages in transition (so collided faults from other threads for these same pages remain coherent). I/O for any pages not already resident are issued here. The caller must wait for their completion. Arguments: MiReadInfo - Supplies a pointer to the read-list. Return Value: STATUS_SUCCESS - all the pages were already resident, reference counts have been applied and no I/O needs to be waited for. STATUS_ISSUE_PAGING_IO - the I/O has been issued and the caller must wait. Various other failure status values indicate the operation failed. Environment: Kernel mode. PASSIVE_LEVEL. --*/ { NTSTATUS status; PMMPTE LocalPrototypePte; PVOID StartingVa; PFN_NUMBER MdlPages; KIRQL OldIrql; MMPTE PteContents; PFN_NUMBER PageFrameIndex; PFN_NUMBER ResidentAvailableCharge; PPFN_NUMBER IoPage; PPFN_NUMBER ApiPage; PPFN_NUMBER Page; PPFN_NUMBER DestinationPage; ULONG PageColor; PMMPTE PointerPte; PMMPTE *ProtoPteArray; PMMPTE *EndProtoPteArray; PFN_NUMBER DummyPage; PMDL Mdl; PMDL FreeMdl; PMMPFN PfnProto; PMMPFN Pfn1; PMMPFN DummyPfn1; ULONG i; PFN_NUMBER DummyTrim; ULONG NumberOfPagesNeedingIo; MMPTE TempPte; PMMPTE PointerPde; PEPROCESS CurrentProcess; PMMINPAGE_SUPPORT InPageSupport; PKPRCB Prcb; ASSERT (KeGetCurrentIrql() == PASSIVE_LEVEL); MiReadInfo->DummyPagePfn = NULL; FreeMdl = NULL; CurrentProcess = PsGetCurrentProcess(); PfnProto = NULL; PointerPde = NULL; InPageSupport = MiReadInfo->InPageSupport; Mdl = MI_EXTRACT_PREFETCH_MDL (InPageSupport); ASSERT (Mdl == MiReadInfo->IoMdl); IoPage = (PPFN_NUMBER)(Mdl + 1); ApiPage = (PPFN_NUMBER)(MiReadInfo->ApiMdl + 1); StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset); MdlPages = ADDRESS_AND_SIZE_TO_SPAN_PAGES (StartingVa, Mdl->ByteCount); if (MdlPages + 1 > MAXUSHORT) { // // The PFN ReferenceCount for the dummy page could wrap, refuse the // request. // return STATUS_INSUFFICIENT_RESOURCES; } NumberOfPagesNeedingIo = 0; ProtoPteArray = (PMMPTE *)InPageSupport->BasePte; EndProtoPteArray = ProtoPteArray + MdlPages; ASSERT (*ProtoPteArray != NULL); LOCK_PFN (OldIrql); // // Ensure sufficient pages exist for the transfer plus the dummy page. // if (((SPFN_NUMBER)MdlPages > (SPFN_NUMBER)(MmAvailablePages - MM_HIGH_LIMIT)) || (MI_NONPAGEABLE_MEMORY_AVAILABLE() <= (SPFN_NUMBER)MdlPages)) { UNLOCK_PFN (OldIrql); return STATUS_INSUFFICIENT_RESOURCES; } // // Charge resident available immediately as the PFN lock may get released // and reacquired below before all the pages have been locked down. // Note the dummy page is immediately charged separately. // MI_DECREMENT_RESIDENT_AVAILABLE (MdlPages, MM_RESAVAIL_ALLOCATE_BUILDMDL); ResidentAvailableCharge = MdlPages; // // Allocate a dummy page to map discarded pages that aren't skipped. // DummyPage = MiRemoveAnyPage (0); Pfn1 = MI_PFN_ELEMENT (DummyPage); ASSERT (Pfn1->u2.ShareCount == 0); ASSERT (Pfn1->u3.e2.ReferenceCount == 0); MiInitializePfnForOtherProcess (DummyPage, MI_PF_DUMMY_PAGE_PTE, 0); // // Give the page a containing frame so MiIdentifyPfn won't crash. // Pfn1->u4.PteFrame = PsInitialSystemProcess->Pcb.DirectoryTableBase[0] >> PAGE_SHIFT; // // Always bias the reference count by 1 and charge for this locked page // up front so the myriad increments and decrements don't get slowed // down with needless checking. // Pfn1->u3.e1.PrototypePte = 0; MI_ADD_LOCKED_PAGE_CHARGE (Pfn1); Pfn1->u3.e1.ReadInProgress = 1; MiReadInfo->DummyPagePfn = Pfn1; DummyPfn1 = Pfn1; DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount + MdlPages); // // Properly initialize the inpage support block fields we overloaded. // InPageSupport->BasePte = *ProtoPteArray; // // Build the proper InPageSupport and MDL to describe this run. // for (; ProtoPteArray < EndProtoPteArray; ProtoPteArray += 1, IoPage += 1, ApiPage += 1) { // // Fill the MDL entry for this RLE. // PointerPte = *ProtoPteArray; ASSERT (PointerPte != NULL); // // The PointerPte better be inside a prototype PTE allocation // so that subsequent page trims update the correct PTEs. // ASSERT (((PointerPte >= (PMMPTE)MmPagedPoolStart) && (PointerPte <= (PMMPTE)MmPagedPoolEnd)) || ((PointerPte >= (PMMPTE)MmSpecialPoolStart) && (PointerPte <= (PMMPTE)MmSpecialPoolEnd))); // // Check the state of this prototype PTE now that the PFN lock is held. // If the page is not resident, the PTE must be put in transition with // read in progress before the PFN lock is released. // // // Lock page containing prototype PTEs in memory by // incrementing the reference count for the page. // Unlock any page locked earlier containing prototype PTEs if // the containing page is not the same for both. // if (PfnProto != NULL) { if (PointerPde != MiGetPteAddress (PointerPte)) { ASSERT (PfnProto->u3.e2.ReferenceCount > 1); MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF (PfnProto); PfnProto = NULL; } } if (PfnProto == NULL) { ASSERT (!MI_IS_PHYSICAL_ADDRESS (PointerPte)); PointerPde = MiGetPteAddress (PointerPte); if (PointerPde->u.Hard.Valid == 0) { MiMakeSystemAddressValidPfn (PointerPte, OldIrql); } PfnProto = MI_PFN_ELEMENT (PointerPde->u.Hard.PageFrameNumber); MI_ADD_LOCKED_PAGE_CHARGE (PfnProto); ASSERT (PfnProto->u3.e2.ReferenceCount > 1); } recheck: PteContents = *PointerPte; // LWFIX: are zero or dzero ptes possible here ? ASSERT (PteContents.u.Long != 0); if (PteContents.u.Hard.Valid == 1) { PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&PteContents); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e1.PrototypePte == 1); MI_ADD_LOCKED_PAGE_CHARGE (Pfn1); *ApiPage = PageFrameIndex; *IoPage = DummyPage; continue; } if ((PteContents.u.Soft.Prototype == 0) && (PteContents.u.Soft.Transition == 1)) { // // The page is in transition. If there is an inpage still in // progress, wait for it to complete. Reference the PFN and // then march on. // PageFrameIndex = MI_GET_PAGE_FRAME_FROM_TRANSITION_PTE (&PteContents); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e1.PrototypePte == 1); if (Pfn1->u4.InPageError) { // // There was an in-page read error and there are other // threads colliding for this page, delay to let the // other threads complete and then retry. // UNLOCK_PFN (OldIrql); KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond); LOCK_PFN (OldIrql); goto recheck; } if (Pfn1->u3.e1.ReadInProgress) { // LWFIX - start with temp\aw.c } // // PTE refers to a normal transition PTE. // ASSERT ((SPFN_NUMBER)MmAvailablePages >= 0); if (MmAvailablePages == 0) { // // This can only happen if the system is utilizing a hardware // compression cache. This ensures that only a safe amount // of the compressed virtual cache is directly mapped so that // if the hardware gets into trouble, we can bail it out. // UNLOCK_PFN (OldIrql); KeDelayExecutionThread (KernelMode, FALSE, (PLARGE_INTEGER)&MmHalfSecond); LOCK_PFN (OldIrql); goto recheck; } // // The PFN reference count will be 1 already here if the // modified writer has begun a write of this page. Otherwise // it's ordinarily 0. // MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1); *IoPage = DummyPage; *ApiPage = PageFrameIndex; continue; } // LWFIX: need to handle protos that are now pagefile (or dzero) // backed - prefetching it from the file here would cause us to lose // the contents. Note this can happen for session-space images // as we back modified (ie: for relocation fixups or IAT // updated) portions from the pagefile. remove the assert below too. ASSERT (PteContents.u.Soft.Prototype == 1); if ((MmAvailablePages < MM_HIGH_LIMIT) && (MiEnsureAvailablePageOrWait (NULL, OldIrql))) { // // Had to wait so recheck all state. // goto recheck; } NumberOfPagesNeedingIo += 1; // // Allocate a physical page. // PageColor = MI_PAGE_COLOR_VA_PROCESS ( MiGetVirtualAddressMappedByPte (PointerPte), &CurrentProcess->NextPageColor); PageFrameIndex = MiRemoveAnyPage (PageColor); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); ASSERT (Pfn1->u3.e2.ReferenceCount == 0); ASSERT (Pfn1->u2.ShareCount == 0); ASSERT (PointerPte->u.Hard.Valid == 0); // // Initialize read-in-progress PFN. // MiInitializePfn (PageFrameIndex, PointerPte, 0); // // These pieces of MiInitializePfn initialization are overridden // here as these pages are only going into prototype // transition and not into any page tables. // Pfn1->u3.e1.PrototypePte = 1; Pfn1->u2.ShareCount -= 1; ASSERT (Pfn1->u2.ShareCount == 0); Pfn1->u3.e1.PageLocation = ZeroedPageList; Pfn1->u3.e2.ReferenceCount -= 1; ASSERT (Pfn1->u3.e2.ReferenceCount == 0); MI_ADD_LOCKED_PAGE_CHARGE_FOR_MODIFIED_PAGE (Pfn1); // // Initialize the I/O specific fields. // Pfn1->u1.Event = &InPageSupport->Event; Pfn1->u3.e1.ReadInProgress = 1; ASSERT (Pfn1->u4.InPageError == 0); // // Increment the PFN reference count in the control area for // the subsection. // MiReadInfo->ControlArea->NumberOfPfnReferences += 1; // // Put the prototype PTE into the transition state. // MI_MAKE_TRANSITION_PTE (TempPte, PageFrameIndex, PointerPte->u.Soft.Protection, PointerPte); MI_WRITE_INVALID_PTE (PointerPte, TempPte); *IoPage = PageFrameIndex; *ApiPage = PageFrameIndex; } // // If all the pages were resident, dereference the dummy page references // now and notify our caller that I/O is not necessary. // if (NumberOfPagesNeedingIo == 0) { ASSERT (DummyPfn1->u3.e2.ReferenceCount > MdlPages); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - MdlPages); // // Unlock page containing prototype PTEs. // if (PfnProto != NULL) { ASSERT (PfnProto->u3.e2.ReferenceCount > 1); MI_REMOVE_LOCKED_PAGE_CHARGE_AND_DECREF (PfnProto); } UNLOCK_PFN (OldIrql); // // Return the upfront resident available charge as the // individual charges have all been made at this point. // MI_INCREMENT_RESIDENT_AVAILABLE (ResidentAvailableCharge, MM_RESAVAIL_FREE_BUILDMDL_EXCESS); return STATUS_SUCCESS; } // // Carefully trim leading dummy pages. // Page = (PPFN_NUMBER)(Mdl + 1); DummyTrim = 0; for (i = 0; i < MdlPages - 1; i += 1) { if (*Page == DummyPage) { DummyTrim += 1; Page += 1; } else { break; } } if (DummyTrim != 0) { Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER))); Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE); ASSERT (Mdl->ByteCount != 0); InPageSupport->ReadOffset.QuadPart += (DummyTrim * PAGE_SIZE); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim); // // Shuffle down the PFNs in the MDL. // Recalculate BasePte to adjust for the shuffle. // Pfn1 = MI_PFN_ELEMENT (*Page); ASSERT (Pfn1->PteAddress->u.Hard.Valid == 0); ASSERT ((Pfn1->PteAddress->u.Soft.Prototype == 0) && (Pfn1->PteAddress->u.Soft.Transition == 1)); InPageSupport->BasePte = Pfn1->PteAddress; DestinationPage = (PPFN_NUMBER)(Mdl + 1); do { *DestinationPage = *Page; DestinationPage += 1; Page += 1; i += 1; } while (i < MdlPages); MdlPages -= DummyTrim; } // // Carefully trim trailing dummy pages. // ASSERT (MdlPages != 0); Page = (PPFN_NUMBER)(Mdl + 1) + MdlPages - 1; if (*Page == DummyPage) { ASSERT (MdlPages >= 2); // // Trim the last page specially as it may be a partial page. // Mdl->Size -= sizeof(PFN_NUMBER); if (BYTE_OFFSET(Mdl->ByteCount) != 0) { Mdl->ByteCount &= ~(PAGE_SIZE - 1); } else { Mdl->ByteCount -= PAGE_SIZE; } ASSERT (Mdl->ByteCount != 0); DummyPfn1->u3.e2.ReferenceCount -= 1; // // Now trim any other trailing pages. // Page -= 1; DummyTrim = 0; while (Page != ((PPFN_NUMBER)(Mdl + 1))) { if (*Page != DummyPage) { break; } DummyTrim += 1; Page -= 1; } if (DummyTrim != 0) { ASSERT (Mdl->Size > (USHORT)(DummyTrim * sizeof(PFN_NUMBER))); Mdl->Size = (USHORT)(Mdl->Size - (DummyTrim * sizeof(PFN_NUMBER))); Mdl->ByteCount -= (ULONG)(DummyTrim * PAGE_SIZE); DummyPfn1->u3.e2.ReferenceCount = (USHORT)(DummyPfn1->u3.e2.ReferenceCount - DummyTrim); } ASSERT (MdlPages > DummyTrim + 1); MdlPages -= (DummyTrim + 1); #if DBG StartingVa = (PVOID)((PCHAR)Mdl->StartVa + Mdl->ByteOffset); ASSERT (MdlPages == ADDRESS_AND_SIZE_TO_SPAN_PAGES(StartingVa, Mdl->ByteCount)); #endif } // // If the MDL is not already embedded in the inpage block, see if its // final size qualifies it - if so, embed it now. // if ((Mdl != &InPageSupport->Mdl) && (Mdl->ByteCount <= (MM_MAXIMUM_READ_CLUSTER_SIZE + 1) * PAGE_SIZE)){ #if DBG RtlFillMemoryUlong (&InPageSupport->Page[0], (MM_MAXIMUM_READ_CLUSTER_SIZE+1) * sizeof (PFN_NUMBER), 0xf1f1f1f1); #endif RtlCopyMemory (&InPageSupport->Mdl, Mdl, Mdl->Size); FreeMdl = Mdl; Mdl = &InPageSupport->Mdl; ASSERT (((ULONG_PTR)Mdl & (sizeof(QUAD) - 1)) == 0); InPageSupport->u1.e1.PrefetchMdlHighBits = ((ULONG_PTR)Mdl >> 3); }
LOGICAL FASTCALL MiCopyOnWrite ( IN PVOID FaultingAddress, IN PMMPTE PointerPte ) /*++ Routine Description: This routine performs a copy on write operation for the specified virtual address. Arguments: FaultingAddress - Supplies the virtual address which caused the fault. PointerPte - Supplies the pointer to the PTE which caused the page fault. Return Value: Returns TRUE if the page was actually split, FALSE if not. Environment: Kernel mode, APCs disabled, working set mutex held. --*/ { MMPTE TempPte; MMPTE TempPte2; PMMPTE MappingPte; PFN_NUMBER PageFrameIndex; PFN_NUMBER NewPageIndex; PVOID CopyTo; PVOID CopyFrom; KIRQL OldIrql; PMMPFN Pfn1; PEPROCESS CurrentProcess; PMMCLONE_BLOCK CloneBlock; PMMCLONE_DESCRIPTOR CloneDescriptor; WSLE_NUMBER WorkingSetIndex; LOGICAL FakeCopyOnWrite; PMMWSL WorkingSetList; PVOID SessionSpace; PLIST_ENTRY NextEntry; PIMAGE_ENTRY_IN_SESSION Image; // // This is called from MmAccessFault, the PointerPte is valid // and the working set mutex ensures it cannot change state. // // Capture the PTE contents to TempPte. // TempPte = *PointerPte; ASSERT (TempPte.u.Hard.Valid == 1); PageFrameIndex = MI_GET_PAGE_FRAME_FROM_PTE (&TempPte); Pfn1 = MI_PFN_ELEMENT (PageFrameIndex); // // Check to see if this is a prototype PTE with copy on write enabled. // FakeCopyOnWrite = FALSE; CurrentProcess = PsGetCurrentProcess (); CloneBlock = NULL; if (FaultingAddress >= (PVOID) MmSessionBase) { WorkingSetList = MmSessionSpace->Vm.VmWorkingSetList; ASSERT (Pfn1->u3.e1.PrototypePte == 1); SessionSpace = (PVOID) MmSessionSpace; MM_SESSION_SPACE_WS_LOCK_ASSERT (); if (MmSessionSpace->ImageLoadingCount != 0) { NextEntry = MmSessionSpace->ImageList.Flink; while (NextEntry != &MmSessionSpace->ImageList) { Image = CONTAINING_RECORD (NextEntry, IMAGE_ENTRY_IN_SESSION, Link); if ((FaultingAddress >= Image->Address) && (FaultingAddress <= Image->LastAddress)) { if (Image->ImageLoading) { ASSERT (Pfn1->u3.e1.PrototypePte == 1); TempPte.u.Hard.CopyOnWrite = 0; TempPte.u.Hard.Write = 1; // // The page is no longer copy on write, update the PTE // setting both the dirty bit and the accessed bit. // // Even though the page's current backing is the image // file, the modified writer will convert it to // pagefile backing when it notices the change later. // MI_SET_PTE_DIRTY (TempPte); MI_SET_ACCESSED_IN_PTE (&TempPte, 1); MI_WRITE_VALID_PTE_NEW_PROTECTION (PointerPte, TempPte); // // The TB entry must be flushed as the valid PTE with // the dirty bit clear has been fetched into the TB. If // it isn't flushed, another fault is generated as the // dirty bit is not set in the cached TB entry. // MI_FLUSH_SINGLE_TB (FaultingAddress, TRUE); return FALSE; } break; } NextEntry = NextEntry->Flink; } } } else { WorkingSetList = MmWorkingSetList; SessionSpace = NULL; // // If a fork operation is in progress, block until the fork is // completed, then retry the whole operation as the state of // everything may have changed between when the mutexes were // released and reacquired. // if (CurrentProcess->ForkInProgress != NULL) { if (MiWaitForForkToComplete (CurrentProcess) == TRUE) { return FALSE; } } if (TempPte.u.Hard.CopyOnWrite == 0) { // // This is a fork page which is being made private in order // to change the protection of the page. // Do not make the page writable. // FakeCopyOnWrite = TRUE; } } WorkingSetIndex = MiLocateWsle (FaultingAddress, WorkingSetList, Pfn1->u1.WsIndex, FALSE); // // The page must be copied into a new page. // LOCK_PFN (OldIrql); if ((MmAvailablePages < MM_HIGH_LIMIT) && (MiEnsureAvailablePageOrWait (SessionSpace != NULL ? HYDRA_PROCESS : CurrentProcess, OldIrql))) { // // A wait operation was performed to obtain an available // page and the working set mutex and PFN lock have // been released and various things may have changed for // the worse. Rather than examine all the conditions again, // return and if things are still proper, the fault will // be taken again. // UNLOCK_PFN (OldIrql); return FALSE; } // // This must be a prototype PTE. Perform the copy on write. // ASSERT (Pfn1->u3.e1.PrototypePte == 1); // // A page is being copied and made private, the global state of // the shared page needs to be updated at this point on certain // hardware. This is done by ORing the dirty bit into the modify bit in // the PFN element. // // Note that a session page cannot be dirty (no POSIX-style forking is // supported for these drivers). // if (SessionSpace != NULL) { ASSERT ((TempPte.u.Hard.Valid == 1) && (TempPte.u.Hard.Write == 0)); ASSERT (!MI_IS_PTE_DIRTY (TempPte)); NewPageIndex = MiRemoveAnyPage (MI_GET_PAGE_COLOR_FROM_SESSION(MmSessionSpace)); } else { MI_CAPTURE_DIRTY_BIT_TO_PFN (PointerPte, Pfn1); CloneBlock = (PMMCLONE_BLOCK) Pfn1->PteAddress; // // Get a new page with the same color as this page. // NewPageIndex = MiRemoveAnyPage ( MI_PAGE_COLOR_PTE_PROCESS(PageFrameIndex, &CurrentProcess->NextPageColor)); } MiInitializeCopyOnWritePfn (NewPageIndex, PointerPte, WorkingSetIndex, WorkingSetList); UNLOCK_PFN (OldIrql); InterlockedIncrement (&KeGetCurrentPrcb ()->MmCopyOnWriteCount); CopyFrom = PAGE_ALIGN (FaultingAddress); MappingPte = MiReserveSystemPtes (1, SystemPteSpace); if (MappingPte != NULL) { MI_MAKE_VALID_KERNEL_PTE (TempPte2, NewPageIndex, MM_READWRITE, MappingPte); MI_SET_PTE_DIRTY (TempPte2); if (Pfn1->u3.e1.CacheAttribute == MiNonCached) { MI_DISABLE_CACHING (TempPte2); } else if (Pfn1->u3.e1.CacheAttribute == MiWriteCombined) { MI_SET_PTE_WRITE_COMBINE (TempPte2); } MI_WRITE_VALID_PTE (MappingPte, TempPte2); CopyTo = MiGetVirtualAddressMappedByPte (MappingPte); } else { CopyTo = MiMapPageInHyperSpace (CurrentProcess, NewPageIndex, &OldIrql); } KeCopyPage (CopyTo, CopyFrom); if (MappingPte != NULL) { MiReleaseSystemPtes (MappingPte, 1, SystemPteSpace); } else { MiUnmapPageInHyperSpace (CurrentProcess, CopyTo, OldIrql); } if (!FakeCopyOnWrite) { // // If the page was really a copy on write page, make it // accessed, dirty and writable. Also, clear the copy-on-write // bit in the PTE. // MI_SET_PTE_DIRTY (TempPte); TempPte.u.Hard.Write = 1; MI_SET_ACCESSED_IN_PTE (&TempPte, 1); TempPte.u.Hard.CopyOnWrite = 0; } // // Regardless of whether the page was really a copy on write, // the frame field of the PTE must be updated. // TempPte.u.Hard.PageFrameNumber = NewPageIndex; // // If the modify bit is set in the PFN database for the // page, the data cache must be flushed. This is due to the // fact that this process may have been cloned and the cache // still contains stale data destined for the page we are // going to remove. // ASSERT (TempPte.u.Hard.Valid == 1); MI_WRITE_VALID_PTE_NEW_PAGE (PointerPte, TempPte); // // Flush the TB entry for this page. // if (SessionSpace == NULL) { MI_FLUSH_SINGLE_TB (FaultingAddress, FALSE); // // Increment the number of private pages. // CurrentProcess->NumberOfPrivatePages += 1; } else { MI_FLUSH_SINGLE_TB (FaultingAddress, TRUE); ASSERT (Pfn1->u3.e1.PrototypePte == 1); } // // Decrement the share count for the page which was copied // as this PTE no longer refers to it. // LOCK_PFN (OldIrql); MiDecrementShareCount (Pfn1, PageFrameIndex); if (SessionSpace == NULL) { CloneDescriptor = MiLocateCloneAddress (CurrentProcess, (PVOID)CloneBlock); if (CloneDescriptor != NULL) { // // Decrement the reference count for the clone block, // note that this could release and reacquire the mutexes. // MiDecrementCloneBlockReference (CloneDescriptor, CloneBlock, CurrentProcess, NULL, OldIrql); } } UNLOCK_PFN (OldIrql); return TRUE; }