Exemple #1
0
static std::vector<std::string>
parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) {
    using namespace Sawyer::CommandLine;

    std::string purpose = "generates low-level source code from a binary specimen";
    std::string description =
        "This command generates a C source file from a binary specimen. The binary specimen is parsed, disassembled, "
        "and partitioned into functions, basic blocks, and instructions. These are then traversed to build C source "
        "code which is emitted to standard output.";

    Parser parser = engine.commandLineParser(purpose, description);

    SwitchGroup tool("Tool-specific switches");
    tool.insert(Switch("trace-generation")
                .intrinsicValue(true, settings.generator.traceRiscOps)
                .doc("Cause the source generation phase to emit information about the basic RISC-like steps performed for "
                     "each instruction. This can preserve a developer's sanity because the C expressions often become large, "
                     "deeply nested, and not always intuitive about from whence each part came. The @s{no-trace-generation} "
                     "switch turns this off.  The default is to " + std::string(settings.generator.traceRiscOps?"":"not ") +
                     "show this information."));
    tool.insert(Switch("no-trace-generation")
                .key("trace-generation")
                .intrinsicValue(false, settings.generator.traceRiscOps)
                .hidden(true));

    tool.insert(Switch("trace-instructions")
                .intrinsicValue(true, settings.generator.traceInsnExecution)
                .doc("Cause the generated source to contain extra \"printf\" calls to emit each instruction as it is "
                     "processed. The @s{no-trace-instructions} switch turns this off. The default is to " +
                     std::string(settings.generator.traceInsnExecution?"":"not ") + "add these printf calls."));
    tool.insert(Switch("no-trace-instructions")
                .key("trace-instructions")
                .intrinsicValue(false, settings.generator.traceInsnExecution)
                .hidden(true));

    tool.insert(Switch("ip")
                .longName("instruction-pointer")
                .argument("address", nonNegativeIntegerParser(settings.generator.initialInstructionPointer))
                .doc("Initial value for the instruction pointer.  The default is to not initialize the instruction pointer."));

    tool.insert(Switch("sp")
                .longName("stack-pointer")
                .argument("address", nonNegativeIntegerParser(settings.generator.initialStackPointer))
                .doc("Initial value for the stack pointer. The default is to not initialize the stack pointer."));

    tool.insert(Switch("allocate-memory")
                .argument("size", nonNegativeIntegerParser(settings.generator.allocateMemoryArray))
                .doc("Causes the global \"mem\" array to be allocated instead of being declared \"extern\". The switch "
                     "argument is the amount of memory to allocate. If the argument is zero, then the memory array is "
                     "allocated to be just large enough to hold the value at the maximum initialized address. The default "
                     "is to not allocate the array."));

    return parser.with(tool).parse(argc, argv).apply().unreachedArgs();
}
SgProject*
buildAst(int argc, char *argv[], Settings &settings) {
    using namespace Sawyer::CommandLine;
    P2::Engine engine;

    // Parse the commane-line
    Parser p = engine.commandLineParser("transcode to LLVM", "Convert an ELF/PE specimen to LLVM assembly for testing.");
    SwitchGroup tool("Tool-specific switches");
    tool.insert(Switch("llvm")
                .argument("version", anyParser(settings.llvmVersionString))
                .doc("Version number for LLVM.  The version number is a doublet or triplet of integers such as \"3.5\" or "
                     "\"3.5.0\" and indicates which dialect of assembly should be emitted. The LLVM assembly syntax, being "
                     "mostly an LLVM internal language, changes in incompatible ways between LLVM versions. This transcoder "
                     "supports only certain versions (e.g., 3.5.0 and 3.7.0 as of December 2015)."));

    std::vector<std::string> specimen = p.with(tool).parse(argc, argv).apply().unreachedArgs();
    if (specimen.empty()) {
        ::mlog[FATAL] <<"no binary specimen; see --help for usage\n";
        exit(1);
    }

    // Parse the LLVM version number specified on the command-line
    if (!settings.llvmVersionString.empty()) {
        const char *s = settings.llvmVersionString.c_str();
        char *rest = NULL;
        errno = 0;
        int a = strtol(s, &rest, 10), b = 0, c = 0;
        if ('.'==*rest && 0==errno) {
            b = strtol(rest+1, &rest, 10);
            if ('.'==*rest && 0==errno)
                c = strtol(rest+1, &rest, 10);
        }
        settings.llvmVersion = 1000000 * a + 1000 * b + c;
    }

    // Parse, load, disassemble, and partition the specimen.
    (void) engine.buildAst(specimen);
    SgProject *project = SageInterface::getProject();
    if (!project) {
        ::mlog[FATAL] <<"This tool only supports ELF/PE specimens.\n";
        exit(1);
    }
    
    return project;
}
static std::vector<std::string>
parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) {
    using namespace Sawyer::CommandLine;
    std::string purpose = "decode encoded strings";
    std::string description =
        "Demonstrates the use of ROSE instruction semantics and ability to start execution an an arbitrary address and "
        "machine state. The @s{decoder} switch is required--it is the entry address of a string decoding function. This "
        "analysis finds all statically-detected calls to that function, obtains three arguments from the call's basic block, "
        "and calls the function.  The third argument is used as the address of a buffer where the decoded string is stored, "
        "and the string will be printed as the result.\n\n"
        "This tool can also run in a mode where the calls are synthesized by varying the first of three arguments.";

    Parser parser = engine.commandLineParser(purpose, description);
    parser.doc("Bugs", "z",
               "Being a demo, this tool is not very flexible when it comes to how the decrypted string is located or "
               "what argument values are used in the synthesis mode.");

    SwitchGroup sg("Tool-specific switches");
    sg.name("tool");

    sg.insert(Switch("decoder")
              .argument("address", nonNegativeIntegerParser(settings.decoderVa))
              .doc("Virtual address of the string decoding function."));

    sg.insert(Switch("stack")
              .argument("address", nonNegativeIntegerParser(settings.stackVa))
              .doc("Initial value for the stack pointer.  The default is " +
                   StringUtility::addrToString(settings.stackVa) + "."));

    sg.insert(Switch("trace-insns")
              .intrinsicValue(true, settings.traceInsns)
              .doc("Cause instructions to be printed to standard error as they are executed. The @s{no-trace-insns} switch "
                   "turns tracing off. The default is to " + std::string(settings.traceInsns?"":"not ") +
                   "show tracing."));
    sg.insert(Switch("no-trace-insns")
              .key("trace-insns")
              .intrinsicValue(false, settings.traceInsns)
              .hidden(true));

    sg.insert(Switch("trace-semantics")
              .intrinsicValue(true, settings.traceSemantics)
              .doc("Cause instruction semantics (the RISC-like operations) to be printed to standard error as they are "
                   "executed. The @s{no-trace-semantics} switch turns tracing off. The default is to " +
                   std::string(settings.traceSemantics?"":"not ") + "show tracing."));
    sg.insert(Switch("no-trace-semantics")
              .key("trace-semantics")
              .intrinsicValue(false, settings.traceSemantics)
              .hidden(true));

    sg.insert(Switch("insn-limit")
              .argument("n", nonNegativeIntegerParser(settings.insnLimit))
              .doc("Maximum number of instructions to execute per decoder call before giving up. The default is " +
                   StringUtility::plural(settings.insnLimit, "instructions") + "."));

    sg.insert(Switch("show-call")
              .argument("n", nonNegativeIntegerParser(settings.showCall))
              .doc("Show calls to the decryption function along with their arguments.  The @v{n} specifies how many arguments "
                   "(each being the natural length of a word) to display. If @v{n} is zero then call information is not "
                   "displayed.  The default is " + StringUtility::plural(settings.showCall, "arguments") + "."));

    sg.insert(Switch("synthesized")
              .intrinsicValue(true, settings.synthesized)
              .doc("Synthesize calls from scratch instead of looking for existing calls."));

    return parser.with(sg).parse(argc, argv).apply().unreachedArgs();
}
// Parse command-line and apply to settings.
static std::vector<std::string>
parseCommandLine(int argc, char *argv[], P2::Engine &engine, Settings &settings) {
    using namespace Sawyer::CommandLine;

    std::string purpose = "finds similar functions";
    std::string description =
         "This tool attempts to correlate functions in one binary specimen with related functions in the other specimen. "
         "It does so by parsing, loading, disassembling, and partitioning each specimen to obtain a list of functions. "
         "Then it computes a syntactic distance between all pairs of functions using a specified distance metric "
         "(see @s{metric}) to create an edge-weighted, bipartite graph.  Finally, a minimum weight perfect matching is "
         "found using the Kuhn-Munkres algorithm.  The answer is output as a list of function correlations and their "
         "distance from each other.  The specimens need not have the same number of functions, in which case one of "
         "the specimens will have null functions inserted to make them the same size.  The distance between a null "
         "function and some other function is always zero regardless of metric.\n\n"

        "The specimens can be specified as two files or resources, or multiple files and/or resources per specimen. When "
        "more than two arguments are specified, a \"--\" must separate the files and resources of the first secimen from "
        "those of the second.";

    Parser parser = engine.commandLineParser(purpose, description);
    parser.doc("Synopsis", "@prop{programName} [@v{switches}] @v{specimen1} [--] @v{specimen2}");

    SwitchGroup tool("Switches for this tool");
    tool.name("tool");

    tool.insert(Switch("metric")
                .argument("name", enumParser(settings.metric)
                          ->with("tree", METRIC_TREE)
                          ->with("linear", METRIC_LINEAR)
                          ->with("insn", METRIC_INSN)
                          ->with("size", METRIC_SIZE)
                          ->with("sizeaddr", METRIC_SIZE_ADDR))
                .doc("Metric to use when comparing two functions.  The following metrics are implemented:"

                     "@named{linear}{The \"linear\" method creates a list consisting of AST node types and, in the case "
                     "of SgAsmInstruction nodes, the instruction kind (e.g., \"x86_pop\", \"x86_mov\", etc) for each function. "
                     "It then computes an edit distance for any pair of lists by using the Levenshtein algorithm and normalizes "
                     "the edit cost according to the size of the lists that were compared.}"

                     "@named{insn}{This is the same as the \"linear\" method but it computes the edit distance for only "
                     "the instruction types without considering their operands.}"

                     "@named{tree}{The \"tree\" method is similar to the \"linear\" method but restricts edit operations "
                     "according to the depth of the nodes in the functions' ASTs.  This method is orders of magnitude slower "
                     "than the \"linear\" method and doesn't seem to give better results.}"

                     "@named{size}{Uses difference in AST size as the distance metric.  The difference between two functions "
                     "is the absolute value of the difference in the size of their ASTs. This is easily the fastest metric.}"

                     "@named{sizeaddr}{Uses difference in AST size and difference in entry address as the distance metric. "
                     "Functions are sorted into a vector according to their entry address and the difference in vector index "
                     "contributes to the distance between two functions.}"
                     
                     "The default metric is \"" + metricName(settings.metric) + "\"."));

    tool.insert(Switch("list")
                .intrinsicValue(true, settings.listPairings)
                .doc("Produce a listing that indicates how functions in the first specimen map into functions into the "
                     "second specimen.  The default is to " + std::string(settings.listPairings?"":"not ") + " show "
                     "this information.  The @s{no-list} switch is the inverse.  Regardless of whether the pairings are "
                     "listed, the output will contain summary information."));
    tool.insert(Switch("no-list")
                .key("list")
                .intrinsicValue(false, settings.listPairings)
                .hidden(true));


    return parser.expandIncludedFiles(parser.with(tool).parse(argc, argv).apply().unreachedArgs());
}