void Fuzzer::WriteToOutputCorpus(const Unit &U) { if (Options.OutputCorpus.empty()) return; std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); WriteToFile(U, Path); if (Options.Verbosity >= 2) std::cerr << "Written to " << Path << std::endl; }
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector<std::string> *V, bool TopDir) { auto E = GetEpoch(Dir); if (Epoch) if (E && *Epoch >= E) return; DIR *D = opendir(Dir.c_str()); if (!D) { Printf("No such directory: %s; exiting\n", Dir.c_str()); exit(1); } while (auto E = readdir(D)) { std::string Path = DirPlusFile(Dir, E->d_name); if (E->d_type == DT_REG || E->d_type == DT_LNK || (E->d_type == DT_UNKNOWN && IsFile(Path))) V->push_back(Path); else if ((E->d_type == DT_DIR || (E->d_type == DT_UNKNOWN && IsDirectory(Path))) && *E->d_name != '.') ListFilesInDirRecursive(Path, Epoch, V, false); } closedir(D); if (Epoch && TopDir) *Epoch = E; }
void Fuzzer::WriteToOutputCorpus(const Unit &U) { if (Options.OutputCorpus.empty()) return; std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); WriteToFile(U, Path); if (Options.Verbosity >= 2) Printf("Written to %s\n", Path.c_str()); }
void Fuzzer::SaveCorpus() { if (Options.OutputCorpus.empty()) return; for (const auto &U : Corpus) WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U))); if (Options.Verbosity) Printf("Written corpus of %zd files to %s\n", Corpus.size(), Options.OutputCorpus.c_str()); }
void Fuzzer::SaveCorpus() { if (Options.OutputCorpus.empty()) return; for (const auto &U : Corpus) WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U))); if (Options.Verbosity) std::cerr << "Written corpus of " << Corpus.size() << " files to " << Options.OutputCorpus << "\n"; }
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch) { long E = Epoch ? *Epoch : 0; for (auto &X : ListFilesInDir(Path, Epoch)) { auto FilePath = DirPlusFile(Path, X); if (Epoch && GetEpoch(FilePath) < E) continue; V->push_back(FileToVector(FilePath)); } }
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch) { long E = Epoch ? *Epoch : 0; auto Files = ListFilesInDir(Path, Epoch); for (size_t i = 0; i < Files.size(); i++) { auto &X = Files[i]; auto FilePath = DirPlusFile(Path, X); if (Epoch && GetEpoch(FilePath) < E) continue; if ((i % 1000) == 0 && i) Printf("Loaded %zd/%zd files from %s\n", i, Files.size(), Path); V->push_back(FileToVector(FilePath)); } }
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch, size_t MaxSize) { long E = Epoch ? *Epoch : 0; auto Files = ListFilesInDir(Path, Epoch); size_t NumLoaded = 0; for (size_t i = 0; i < Files.size(); i++) { auto &X = Files[i]; auto FilePath = DirPlusFile(Path, X); if (Epoch && GetEpoch(FilePath) < E) continue; NumLoaded++; if ((NumLoaded & (NumLoaded - 1)) == 0 && NumLoaded >= 1024) Printf("Loaded %zd/%zd files from %s\n", NumLoaded, Files.size(), Path); V->push_back(FileToVector(FilePath, MaxSize)); } }
void IterateDirRecursive(const std::string &Dir, void (*DirPreCallback)(const std::string &Dir), void (*DirPostCallback)(const std::string &Dir), void (*FileCallback)(const std::string &Dir)) { DirPreCallback(Dir); DIR *D = opendir(Dir.c_str()); if (!D) return; while (auto E = readdir(D)) { std::string Path = DirPlusFile(Dir, E->d_name); if (E->d_type == DT_REG || E->d_type == DT_LNK || (E->d_type == DT_UNKNOWN && IsFile(Path))) FileCallback(Path); else if ((E->d_type == DT_DIR || (E->d_type == DT_UNKNOWN && IsDirectory(Path))) && *E->d_name != '.') IterateDirRecursive(Path, DirPreCallback, DirPostCallback, FileCallback); } closedir(D); DirPostCallback(Dir); }
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V) { for (auto &X : ListFilesInDir(Path)) V->push_back(FileToVector(DirPlusFile(Path, X))); }
int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, const Vector<SizedFile> &CorporaFiles) { Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n", DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size()); MkDir(DirPath); auto Temp = TempPath(".dft"); for (auto &F : CorporaFiles) { // For every input F we need to collect the data flow and the coverage. // Data flow collection may fail if we request too many DFSan tags at once. // So, we start from requesting all tags in range [0,Size) and if that fails // we then request tags in [0,Size/2) and [Size/2, Size), and so on. // Function number => DFT. std::unordered_map<size_t, Vector<uint8_t>> DFTMap; std::unordered_set<std::string> Cov; std::queue<std::pair<size_t, size_t>> Q; Q.push({0, F.Size}); while (!Q.empty()) { auto R = Q.front(); Printf("\n\n\n********* Trying: [%zd, %zd)\n", R.first, R.second); Q.pop(); Command Cmd; Cmd.addArgument(DFTBinary); Cmd.addArgument(std::to_string(R.first)); Cmd.addArgument(std::to_string(R.second)); Cmd.addArgument(F.File); Cmd.addArgument(Temp); Printf("CMD: %s\n", Cmd.toString().c_str()); if (ExecuteCommand(Cmd)) { // DFSan has failed, collect tags for two subsets. if (R.second - R.first >= 2) { size_t Mid = (R.second + R.first) / 2; Q.push({R.first, Mid}); Q.push({Mid, R.second}); } } else { Printf("********* Success: [%zd, %zd)\n", R.first, R.second); std::ifstream IF(Temp); std::string L; while (std::getline(IF, L, '\n')) { // Data flow collection has succeeded. // Merge the results with the other runs. if (L.empty()) continue; if (L[0] == 'C') { // Take coverage lines as is, they will be the same in all attempts. Cov.insert(L); } else if (L[0] == 'F') { size_t FunctionNum = 0; std::string DFTString; if (ParseDFTLine(L, &FunctionNum, &DFTString)) { auto &DFT = DFTMap[FunctionNum]; if (DFT.empty()) { // Haven't seen this function before, take DFT as is. DFT = DFTStringToVector(DFTString); } else if (DFT.size() == DFTString.size()) { // Have seen this function already, merge DFTs. DFTStringAppendToVector(&DFT, DFTString); } } } } } } auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File))); // Dump combined DFT to disk. Printf("Producing DFT for %s\n", OutPath.c_str()); std::ofstream OF(OutPath); for (auto &DFT: DFTMap) OF << "F" << DFT.first << " " << DFT.second << std::endl; for (auto &C : Cov) OF << C << std::endl; } RemoveFile(Temp); // Write functions.txt. Command Cmd; Cmd.addArgument(DFTBinary); Cmd.setOutputFile(DirPlusFile(DirPath, "functions.txt")); ExecuteCommand(Cmd); return 0; }
bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction, Random &Rand) { if (DirPath.empty()) return false; Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); Vector<SizedFile> Files; GetSizedFilesFromDir(DirPath, &Files); std::string L; size_t FocusFuncIdx = SIZE_MAX; Vector<std::string> FunctionNames; // Read functions.txt std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt)); size_t NumFunctions = 0; while (std::getline(IF, L, '\n')) { FunctionNames.push_back(L); NumFunctions++; if (*FocusFunction == L) FocusFuncIdx = NumFunctions - 1; } if (*FocusFunction == "auto") { // AUTOFOCUS works like this: // * reads the coverage data from the DFT files. // * assigns weights to functions based on coverage. // * chooses a random function according to the weights. ReadCoverage(DirPath); auto Weights = Coverage.FunctionWeights(NumFunctions); Vector<double> Intervals(NumFunctions + 1); std::iota(Intervals.begin(), Intervals.end(), 0); auto Distribution = std::piecewise_constant_distribution<double>( Intervals.begin(), Intervals.end(), Weights.begin()); FocusFuncIdx = static_cast<size_t>(Distribution(Rand)); *FocusFunction = FunctionNames[FocusFuncIdx]; assert(FocusFuncIdx < NumFunctions); Printf("INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx, FunctionNames[FocusFuncIdx].c_str()); for (size_t i = 0; i < NumFunctions; i++) { if (!Weights[i]) continue; Printf(" [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i, Weights[i], Coverage.GetNumberOfBlocks(i), Coverage.GetNumberOfCoveredBlocks(i), Coverage.GetCounter(i, 0), FunctionNames[i].c_str()); } } if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) return false; // Read traces. size_t NumTraceFiles = 0; size_t NumTracesWithFocusFunction = 0; for (auto &SF : Files) { auto Name = Basename(SF.File); if (Name == kFunctionsTxt) continue; NumTraceFiles++; // Printf("=== %s\n", Name.c_str()); std::ifstream IF(SF.File); while (std::getline(IF, L, '\n')) { size_t FunctionNum = 0; std::string DFTString; if (ParseDFTLine(L, &FunctionNum, &DFTString) && FunctionNum == FocusFuncIdx) { NumTracesWithFocusFunction++; if (FunctionNum >= NumFunctions) return ParseError("N is greater than the number of functions", L); Traces[Name] = DFTStringToVector(DFTString); // Print just a few small traces. if (NumTracesWithFocusFunction <= 3 && DFTString.size() <= 16) Printf("%s => |%s|\n", Name.c_str(), std::string(DFTString).c_str()); break; // No need to parse the following lines. } } } assert(NumTraceFiles == Files.size() - 1); Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " "%zd traces with focus function\n", NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); return true; }
std::string SharedMemoryRegion::Path(const char *Name) { return DirPlusFile(TmpDir(), Name); }
void DataFlowTrace::Init(const std::string &DirPath, const std::string &FocusFunction) { if (DirPath.empty()) return; const char *kFunctionsTxt = "functions.txt"; Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); Vector<SizedFile> Files; GetSizedFilesFromDir(DirPath, &Files); std::string L; // Read functions.txt std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt)); size_t FocusFuncIdx = SIZE_MAX; size_t NumFunctions = 0; while (std::getline(IF, L, '\n')) { NumFunctions++; if (FocusFunction == L) FocusFuncIdx = NumFunctions - 1; } if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) return; // Read traces. size_t NumTraceFiles = 0; size_t NumTracesWithFocusFunction = 0; for (auto &SF : Files) { auto Name = Basename(SF.File); if (Name == kFunctionsTxt) continue; auto ParseError = [&](const char *Err) { Printf("DataFlowTrace: parse error: %s\n File: %s\n Line: %s\n", Err, Name.c_str(), L.c_str()); }; NumTraceFiles++; // Printf("=== %s\n", Name.c_str()); std::ifstream IF(SF.File); while (std::getline(IF, L, '\n')) { size_t SpacePos = L.find(' '); if (SpacePos == std::string::npos) return ParseError("no space in the trace line"); if (L.empty() || L[0] != 'F') return ParseError("the trace line doesn't start with 'F'"); size_t N = std::atol(L.c_str() + 1); if (N >= NumFunctions) return ParseError("N is greater than the number of functions"); if (N == FocusFuncIdx) { NumTracesWithFocusFunction++; const char *Beg = L.c_str() + SpacePos + 1; const char *End = L.c_str() + L.size(); assert(Beg < End); size_t Len = End - Beg; Vector<uint8_t> V(Len); for (size_t I = 0; I < Len; I++) { if (Beg[I] != '0' && Beg[I] != '1') ParseError("the trace should contain only 0 or 1"); V[I] = Beg[I] == '1'; } Traces[Name] = V; // Print just a few small traces. if (NumTracesWithFocusFunction <= 3 && Len <= 16) Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1); break; // No need to parse the following lines. } } } assert(NumTraceFiles == Files.size() - 1); Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " "%zd traces with focus function\n", NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); }