Error PDBFile::parseFileHeaders() { std::error_code EC; MemoryBufferRef BufferRef = *Context->Buffer; // Make sure the file is sufficiently large to hold a super block. // Do this before attempting to read the super block. if (BufferRef.getBufferSize() < sizeof(SuperBlock)) return make_error<RawError>(raw_error_code::corrupt_file, "Does not contain superblock"); Context->SB = reinterpret_cast<const SuperBlock *>(BufferRef.getBufferStart()); const SuperBlock *SB = Context->SB; // Check the magic bytes. if (memcmp(SB->MagicBytes, Magic, sizeof(Magic)) != 0) return make_error<RawError>(raw_error_code::corrupt_file, "MSF magic header doesn't match"); // We don't support blocksizes which aren't a multiple of four bytes. if (SB->BlockSize % sizeof(support::ulittle32_t) != 0) return make_error<RawError>(raw_error_code::corrupt_file, "Block size is not multiple of 4."); switch (SB->BlockSize) { case 512: case 1024: case 2048: case 4096: break; default: // An invalid block size suggests a corrupt PDB file. return make_error<RawError>(raw_error_code::corrupt_file, "Unsupported block size."); } if (BufferRef.getBufferSize() % SB->BlockSize != 0) return make_error<RawError>(raw_error_code::corrupt_file, "File size is not a multiple of block size"); // We don't support directories whose sizes aren't a multiple of four bytes. if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0) return make_error<RawError>(raw_error_code::corrupt_file, "Directory size is not multiple of 4."); // The number of blocks which comprise the directory is a simple function of // the number of bytes it contains. uint64_t NumDirectoryBlocks = getNumDirectoryBlocks(); // The block map, as we understand it, is a block which consists of a list of // block numbers. // It is unclear what would happen if the number of blocks couldn't fit on a // single block. if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t)) return make_error<RawError>(raw_error_code::corrupt_file, "Too many directory blocks."); // Make sure the directory block array fits within the file. if (auto EC = checkOffset(BufferRef, getDirectoryBlockArray())) return EC; return Error::success(); }
// Returns false if size is greater than the buffer size. And sets ec. static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) { if (M.getBufferSize() < Size) { EC = object_error::unexpected_eof; return false; } return true; }
void notifyObjectCompiled(const Module *M, MemoryBufferRef Obj) override { const std::string &ModuleID = M->getModuleIdentifier(); std::string CacheName; if (!getCacheFilename(ModuleID, CacheName)) return; if (!CacheDir.empty()) { // Create user-defined cache dir. SmallString<128> dir(sys::path::parent_path(CacheName)); sys::fs::create_directories(Twine(dir)); } std::error_code EC; raw_fd_ostream outfile(CacheName, EC, sys::fs::F_None); outfile.write(Obj.getBufferStart(), Obj.getBufferSize()); outfile.close(); }
// Parse the module summary index out of an IR file and return the summary // index object if found, or nullptr if not. Expected<std::unique_ptr<ModuleSummaryIndex>> llvm::getModuleSummaryIndexForFile(StringRef Path) { ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = MemoryBuffer::getFileOrSTDIN(Path); std::error_code EC = FileOrErr.getError(); if (EC) return errorCodeToError(EC); MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) return nullptr; Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = object::ModuleSummaryIndexObjectFile::create(BufferRef); if (!ObjOrErr) return ObjOrErr.takeError(); object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; return Obj.takeIndex(); }
std::error_code PDBFile::parseFileHeaders() { std::error_code EC; MemoryBufferRef BufferRef = *Context->Buffer; Context->SB = reinterpret_cast<const SuperBlock *>(BufferRef.getBufferStart()); const SuperBlock *SB = Context->SB; // Make sure the file is sufficiently large to hold a super block. if (BufferRef.getBufferSize() < sizeof(SuperBlock)) return std::make_error_code(std::errc::illegal_byte_sequence); // Check the magic bytes. if (memcmp(SB->MagicBytes, Magic, sizeof(Magic)) != 0) return std::make_error_code(std::errc::illegal_byte_sequence); // We don't support blocksizes which aren't a multiple of four bytes. if (SB->BlockSize == 0 || SB->BlockSize % sizeof(support::ulittle32_t) != 0) return std::make_error_code(std::errc::not_supported); // We don't support directories whose sizes aren't a multiple of four bytes. if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0) return std::make_error_code(std::errc::not_supported); // The number of blocks which comprise the directory is a simple function of // the number of bytes it contains. uint64_t NumDirectoryBlocks = getNumDirectoryBlocks(); // The block map, as we understand it, is a block which consists of a list of // block numbers. // It is unclear what would happen if the number of blocks couldn't fit on a // single block. if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t)) return std::make_error_code(std::errc::illegal_byte_sequence); return std::error_code(); }
static Expected<std::vector<MemberData>> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; // This ignores the symbol table, but we only need the value mod 8 and the // symbol table is aligned to be a multiple of 8 bytes uint64_t Pos = 0; std::vector<MemberData> Ret; bool HasObject = false; // Deduplicate long member names in the string table and reuse earlier name // offsets. This especially saves space for COFF Import libraries where all // members have the same name. StringMap<uint64_t> MemberNames; // UniqueTimestamps is a special case to improve debugging on Darwin: // // The Darwin linker does not link debug info into the final // binary. Instead, it emits entries of type N_OSO in in the output // binary's symbol table, containing references to the linked-in // object files. Using that reference, the debugger can read the // debug data directly from the object files. Alternatively, an // invocation of 'dsymutil' will link the debug data from the object // files into a dSYM bundle, which can be loaded by the debugger, // instead of the object files. // // For an object file, the N_OSO entries contain the absolute path // path to the file, and the file's timestamp. For an object // included in an archive, the path is formatted like // "/absolute/path/to/archive.a(member.o)", and the timestamp is the // archive member's timestamp, rather than the archive's timestamp. // // However, this doesn't always uniquely identify an object within // an archive -- an archive file can have multiple entries with the // same filename. (This will happen commonly if the original object // files started in different directories.) The only way they get // distinguished, then, is via the timestamp. But this process is // unable to find the correct object file in the archive when there // are two files of the same name and timestamp. // // Additionally, timestamp==0 is treated specially, and causes the // timestamp to be ignored as a match criteria. // // That will "usually" work out okay when creating an archive not in // deterministic timestamp mode, because the objects will probably // have been created at different timestamps. // // To ameliorate this problem, in deterministic archive mode (which // is the default), on Darwin we will emit a unique non-zero // timestamp for each entry with a duplicated name. This is still // deterministic: the only thing affecting that timestamp is the // order of the files in the resultant archive. // // See also the functions that handle the lookup: // in lldb: ObjectContainerBSDArchive::Archive::FindObject() // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). bool UniqueTimestamps = Deterministic && isDarwin(Kind); std::map<StringRef, unsigned> FilenameCount; if (UniqueTimestamps) { for (const NewArchiveMember &M : NewMembers) FilenameCount[M.MemberName]++; for (auto &Entry : FilenameCount) Entry.second = Entry.second > 1 ? 1 : 0; } for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); MemoryBufferRef Buf = M.Buf->getMemBufferRef(); StringRef Data = Thin ? "" : Buf.getBuffer(); // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. This matches the behaviour with cctools and ensures that ld64 // is happy with archives that we generate. unsigned MemberPadding = isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0; unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); sys::TimePoint<std::chrono::seconds> ModTime; if (UniqueTimestamps) // Increment timestamp for each file of a given name. ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); else ModTime = M.ModTime; printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, ModTime, Buf.getBufferSize() + MemberPadding); Out.flush(); Expected<std::vector<unsigned>> Symbols = getSymbols(Buf, SymNames, HasObject); if (auto E = Symbols.takeError()) return std::move(E); Pos += Header.size() + Data.size() + Padding.size(); Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding}); } // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty // archive, regardless of whether there are any symbols in it. if (HasObject && SymNames.tell() == 0) SymNames << '\0' << '\0' << '\0'; return Ret; }
Error PDBFile::parseStreamData() { assert(Context && Context->SB); bool SeenNumStreams = false; uint32_t NumStreams = 0; uint32_t StreamIdx = 0; uint64_t DirectoryBytesRead = 0; MemoryBufferRef M = *Context->Buffer; const SuperBlock *SB = Context->SB; auto DirectoryBlocks = getDirectoryBlockArray(); // The structure of the directory is as follows: // struct PDBDirectory { // uint32_t NumStreams; // uint32_t StreamSizes[NumStreams]; // uint32_t StreamMap[NumStreams][]; // }; // // Empty streams don't consume entries in the StreamMap. for (uint32_t DirectoryBlockAddr : DirectoryBlocks) { uint64_t DirectoryBlockOffset = blockToOffset(DirectoryBlockAddr, SB->BlockSize); auto DirectoryBlock = makeArrayRef(reinterpret_cast<const support::ulittle32_t *>( M.getBufferStart() + DirectoryBlockOffset), SB->BlockSize / sizeof(support::ulittle32_t)); if (auto EC = checkOffset(M, DirectoryBlock)) return EC; // We read data out of the directory four bytes at a time. Depending on // where we are in the directory, the contents may be: the number of streams // in the directory, a stream's size, or a block in the stream map. for (uint32_t Data : DirectoryBlock) { // Don't read beyond the end of the directory. if (DirectoryBytesRead == SB->NumDirectoryBytes) break; DirectoryBytesRead += sizeof(Data); // This data must be the number of streams if we haven't seen it yet. if (!SeenNumStreams) { NumStreams = Data; SeenNumStreams = true; continue; } // This data must be a stream size if we have not seen them all yet. if (Context->StreamSizes.size() < NumStreams) { // It seems like some streams have their set to -1 when their contents // are not present. Treat them like empty streams for now. if (Data == UINT32_MAX) Context->StreamSizes.push_back(0); else Context->StreamSizes.push_back(Data); continue; } // This data must be a stream block number if we have seen all of the // stream sizes. std::vector<uint32_t> *StreamBlocks = nullptr; // Figure out which stream this block number belongs to. while (StreamIdx < NumStreams) { uint64_t NumExpectedStreamBlocks = bytesToBlocks(Context->StreamSizes[StreamIdx], SB->BlockSize); StreamBlocks = &Context->StreamMap[StreamIdx]; if (NumExpectedStreamBlocks > StreamBlocks->size()) break; ++StreamIdx; } // It seems this block doesn't belong to any stream? The stream is either // corrupt or something more mysterious is going on. if (StreamIdx == NumStreams) return make_error<RawError>(raw_error_code::corrupt_file, "Orphaned block found?"); uint64_t BlockOffset = blockToOffset(Data, getBlockSize()); if (BlockOffset + getBlockSize() < BlockOffset) return make_error<RawError>(raw_error_code::corrupt_file, "Bogus stream block number"); if (BlockOffset + getBlockSize() > M.getBufferSize()) return make_error<RawError>(raw_error_code::corrupt_file, "Stream block number is out of bounds"); StreamBlocks->push_back(Data); } } if (Context->StreamSizes.size() != NumStreams) return make_error<RawError>( raw_error_code::corrupt_file, "The directory has fewer streams then expected"); for (uint32_t I = 0; I != NumStreams; ++I) { uint64_t NumExpectedStreamBlocks = bytesToBlocks(getStreamByteSize(I), getBlockSize()); size_t NumStreamBlocks = getStreamBlockList(I).size(); if (NumExpectedStreamBlocks != NumStreamBlocks) return make_error<RawError>(raw_error_code::corrupt_file, "The number of stream blocks is not " "sufficient for the size of this stream"); } // We should have read exactly SB->NumDirectoryBytes bytes. assert(DirectoryBytesRead == SB->NumDirectoryBytes); return Error::success(); }