static std::vector<std::string> parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) { using namespace Sawyer::CommandLine; std::string purpose = "generates low-level source code from a binary specimen"; std::string description = "This command generates a C source file from a binary specimen. The binary specimen is parsed, disassembled, " "and partitioned into functions, basic blocks, and instructions. These are then traversed to build C source " "code which is emitted to standard output."; Parser parser = engine.commandLineParser(purpose, description); SwitchGroup tool("Tool-specific switches"); tool.insert(Switch("trace-generation") .intrinsicValue(true, settings.generator.traceRiscOps) .doc("Cause the source generation phase to emit information about the basic RISC-like steps performed for " "each instruction. This can preserve a developer's sanity because the C expressions often become large, " "deeply nested, and not always intuitive about from whence each part came. The @s{no-trace-generation} " "switch turns this off. The default is to " + std::string(settings.generator.traceRiscOps?"":"not ") + "show this information.")); tool.insert(Switch("no-trace-generation") .key("trace-generation") .intrinsicValue(false, settings.generator.traceRiscOps) .hidden(true)); tool.insert(Switch("trace-instructions") .intrinsicValue(true, settings.generator.traceInsnExecution) .doc("Cause the generated source to contain extra \"printf\" calls to emit each instruction as it is " "processed. The @s{no-trace-instructions} switch turns this off. The default is to " + std::string(settings.generator.traceInsnExecution?"":"not ") + "add these printf calls.")); tool.insert(Switch("no-trace-instructions") .key("trace-instructions") .intrinsicValue(false, settings.generator.traceInsnExecution) .hidden(true)); tool.insert(Switch("ip") .longName("instruction-pointer") .argument("address", nonNegativeIntegerParser(settings.generator.initialInstructionPointer)) .doc("Initial value for the instruction pointer. The default is to not initialize the instruction pointer.")); tool.insert(Switch("sp") .longName("stack-pointer") .argument("address", nonNegativeIntegerParser(settings.generator.initialStackPointer)) .doc("Initial value for the stack pointer. The default is to not initialize the stack pointer.")); tool.insert(Switch("allocate-memory") .argument("size", nonNegativeIntegerParser(settings.generator.allocateMemoryArray)) .doc("Causes the global \"mem\" array to be allocated instead of being declared \"extern\". The switch " "argument is the amount of memory to allocate. If the argument is zero, then the memory array is " "allocated to be just large enough to hold the value at the maximum initialized address. The default " "is to not allocate the array.")); return parser.with(tool).parse(argc, argv).apply().unreachedArgs(); }
SgProject* buildAst(int argc, char *argv[], Settings &settings) { using namespace Sawyer::CommandLine; P2::Engine engine; // Parse the commane-line Parser p = engine.commandLineParser("transcode to LLVM", "Convert an ELF/PE specimen to LLVM assembly for testing."); SwitchGroup tool("Tool-specific switches"); tool.insert(Switch("llvm") .argument("version", anyParser(settings.llvmVersionString)) .doc("Version number for LLVM. The version number is a doublet or triplet of integers such as \"3.5\" or " "\"3.5.0\" and indicates which dialect of assembly should be emitted. The LLVM assembly syntax, being " "mostly an LLVM internal language, changes in incompatible ways between LLVM versions. This transcoder " "supports only certain versions (e.g., 3.5.0 and 3.7.0 as of December 2015).")); std::vector<std::string> specimen = p.with(tool).parse(argc, argv).apply().unreachedArgs(); if (specimen.empty()) { ::mlog[FATAL] <<"no binary specimen; see --help for usage\n"; exit(1); } // Parse the LLVM version number specified on the command-line if (!settings.llvmVersionString.empty()) { const char *s = settings.llvmVersionString.c_str(); char *rest = NULL; errno = 0; int a = strtol(s, &rest, 10), b = 0, c = 0; if ('.'==*rest && 0==errno) { b = strtol(rest+1, &rest, 10); if ('.'==*rest && 0==errno) c = strtol(rest+1, &rest, 10); } settings.llvmVersion = 1000000 * a + 1000 * b + c; } // Parse, load, disassemble, and partition the specimen. (void) engine.buildAst(specimen); SgProject *project = SageInterface::getProject(); if (!project) { ::mlog[FATAL] <<"This tool only supports ELF/PE specimens.\n"; exit(1); } return project; }
static std::vector<std::string> parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) { using namespace Sawyer::CommandLine; std::string purpose = "decode encoded strings"; std::string description = "Demonstrates the use of ROSE instruction semantics and ability to start execution an an arbitrary address and " "machine state. The @s{decoder} switch is required--it is the entry address of a string decoding function. This " "analysis finds all statically-detected calls to that function, obtains three arguments from the call's basic block, " "and calls the function. The third argument is used as the address of a buffer where the decoded string is stored, " "and the string will be printed as the result.\n\n" "This tool can also run in a mode where the calls are synthesized by varying the first of three arguments."; Parser parser = engine.commandLineParser(purpose, description); parser.doc("Bugs", "z", "Being a demo, this tool is not very flexible when it comes to how the decrypted string is located or " "what argument values are used in the synthesis mode."); SwitchGroup sg("Tool-specific switches"); sg.name("tool"); sg.insert(Switch("decoder") .argument("address", nonNegativeIntegerParser(settings.decoderVa)) .doc("Virtual address of the string decoding function.")); sg.insert(Switch("stack") .argument("address", nonNegativeIntegerParser(settings.stackVa)) .doc("Initial value for the stack pointer. The default is " + StringUtility::addrToString(settings.stackVa) + ".")); sg.insert(Switch("trace-insns") .intrinsicValue(true, settings.traceInsns) .doc("Cause instructions to be printed to standard error as they are executed. The @s{no-trace-insns} switch " "turns tracing off. The default is to " + std::string(settings.traceInsns?"":"not ") + "show tracing.")); sg.insert(Switch("no-trace-insns") .key("trace-insns") .intrinsicValue(false, settings.traceInsns) .hidden(true)); sg.insert(Switch("trace-semantics") .intrinsicValue(true, settings.traceSemantics) .doc("Cause instruction semantics (the RISC-like operations) to be printed to standard error as they are " "executed. The @s{no-trace-semantics} switch turns tracing off. The default is to " + std::string(settings.traceSemantics?"":"not ") + "show tracing.")); sg.insert(Switch("no-trace-semantics") .key("trace-semantics") .intrinsicValue(false, settings.traceSemantics) .hidden(true)); sg.insert(Switch("insn-limit") .argument("n", nonNegativeIntegerParser(settings.insnLimit)) .doc("Maximum number of instructions to execute per decoder call before giving up. The default is " + StringUtility::plural(settings.insnLimit, "instructions") + ".")); sg.insert(Switch("show-call") .argument("n", nonNegativeIntegerParser(settings.showCall)) .doc("Show calls to the decryption function along with their arguments. The @v{n} specifies how many arguments " "(each being the natural length of a word) to display. If @v{n} is zero then call information is not " "displayed. The default is " + StringUtility::plural(settings.showCall, "arguments") + ".")); sg.insert(Switch("synthesized") .intrinsicValue(true, settings.synthesized) .doc("Synthesize calls from scratch instead of looking for existing calls.")); return parser.with(sg).parse(argc, argv).apply().unreachedArgs(); }
// Parse command-line and apply to settings. static std::vector<std::string> parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) { using namespace Sawyer::CommandLine; std::string purpose = "finds similar functions"; std::string description = "This tool attempts to correlate functions in one binary specimen with related functions in the other specimen. " "It does so by parsing, loading, disassembling, and partitioning each specimen to obtain a list of functions. " "Then it computes a syntactic distance between all pairs of functions using a specified distance metric " "(see @s{metric}) to create an edge-weighted, bipartite graph. Finally, a minimum weight perfect matching is " "found using the Kuhn-Munkres algorithm. The answer is output as a list of function correlations and their " "distance from each other. The specimens need not have the same number of functions, in which case one of " "the specimens will have null functions inserted to make them the same size. The distance between a null " "function and some other function is always zero regardless of metric.\n\n" "The specimens can be specified as two files or resources, or multiple files and/or resources per specimen. When " "more than two arguments are specified, a \"--\" must separate the files and resources of the first secimen from " "those of the second."; Parser parser = engine.commandLineParser(purpose, description); parser.doc("Synopsis", "@prop{programName} [@v{switches}] @v{specimen1} [--] @v{specimen2}"); SwitchGroup tool("Switches for this tool"); tool.name("tool"); tool.insert(Switch("metric") .argument("name", enumParser(settings.metric) ->with("tree", METRIC_TREE) ->with("linear", METRIC_LINEAR) ->with("insn", METRIC_INSN) ->with("size", METRIC_SIZE) ->with("sizeaddr", METRIC_SIZE_ADDR)) .doc("Metric to use when comparing two functions. The following metrics are implemented:" "@named{linear}{The \"linear\" method creates a list consisting of AST node types and, in the case " "of SgAsmInstruction nodes, the instruction kind (e.g., \"x86_pop\", \"x86_mov\", etc) for each function. " "It then computes an edit distance for any pair of lists by using the Levenshtein algorithm and normalizes " "the edit cost according to the size of the lists that were compared.}" "@named{insn}{This is the same as the \"linear\" method but it computes the edit distance for only " "the instruction types without considering their operands.}" "@named{tree}{The \"tree\" method is similar to the \"linear\" method but restricts edit operations " "according to the depth of the nodes in the functions' ASTs. This method is orders of magnitude slower " "than the \"linear\" method and doesn't seem to give better results.}" "@named{size}{Uses difference in AST size as the distance metric. The difference between two functions " "is the absolute value of the difference in the size of their ASTs. This is easily the fastest metric.}" "@named{sizeaddr}{Uses difference in AST size and difference in entry address as the distance metric. " "Functions are sorted into a vector according to their entry address and the difference in vector index " "contributes to the distance between two functions.}" "The default metric is \"" + metricName(settings.metric) + "\".")); tool.insert(Switch("list") .intrinsicValue(true, settings.listPairings) .doc("Produce a listing that indicates how functions in the first specimen map into functions into the " "second specimen. The default is to " + std::string(settings.listPairings?"":"not ") + " show " "this information. The @s{no-list} switch is the inverse. Regardless of whether the pairings are " "listed, the output will contain summary information.")); tool.insert(Switch("no-list") .key("list") .intrinsicValue(false, settings.listPairings) .hidden(true)); return parser.expandIncludedFiles(parser.with(tool).parse(argc, argv).apply().unreachedArgs()); }