コード例 #1
0
ファイル: bbbuf.c プロジェクト: Arunpreet/dynamorio
/* iterate basic block to find a dead register */
static reg_id_t
bb_find_dead_reg(instrlist_t *ilist)
{
    instr_t *instr;
    int i;
    bool reg_is_read[DR_NUM_GPR_REGS] = { false,};

    for (instr  = instrlist_first(ilist);
         instr != NULL;
         instr  = instr_get_next(instr)) {
        if (instr_is_syscall(instr) || instr_is_interrupt(instr))
            return DR_REG_NULL;
        for (i = 0; i < DR_NUM_GPR_REGS; i++) {
            if (!reg_is_read[i] &&
                instr_reads_from_reg(instr, (reg_id_t)(DR_REG_START_GPR + i))) {
                reg_is_read[i] = true;
            }
            if (!reg_is_read[i] &&
                instr_writes_to_exact_reg(instr,
                                          (reg_id_t)(DR_REG_START_GPR + i))) {
                return (reg_id_t)(DR_REG_START_GPR + i);
            }
#ifdef X64
            /* in x64, update on 32-bit register kills the whole register */
            if (!reg_is_read[i] &&
                instr_writes_to_exact_reg(instr,
                                          reg_64_to_32
                                          ((reg_id_t)(DR_REG_START_GPR + i)))) {
                return (reg_id_t)(DR_REG_START_GPR + i);
            }
#endif
        }
    }
    return DR_REG_NULL;
}
コード例 #2
0
ファイル: syscall.dll.c プロジェクト: Arunpreet/dynamorio
static dr_emit_flags_t
bb_event(void* drcontext, void *tag, instrlist_t *bb, bool for_trace, bool translating)
{
    app_pc pc = dr_fragment_app_pc(tag);

    if (pc == start_pc) {
        dr_fprintf(STDERR, "starting syscall monitoring\n");
        monitoring = true;
    }
    else if (pc == stop_pc) {
        dr_fprintf(STDERR, "stopping syscall monitoring\n");
        monitoring = false;
    }
    else {
        instr_t* instr;
        instr_t* next_instr;

        for (instr = instrlist_first(bb);
             instr != NULL;
             instr = next_instr) {

            next_instr = instr_get_next(instr);

            /* Insert a callback to at_syscall before every system call */
            if (instr_is_syscall(instr)) {
                dr_insert_clean_call(drcontext, bb, instr, at_syscall, false, 0);
            }
        }
    }
    return DR_EMIT_DEFAULT;
}
コード例 #3
0
static
dr_emit_flags_t bb_event(void* drcontext, void *tag, instrlist_t* bb,
                         bool for_trace, bool translating)
{
    instr_t *instr;
    instr_t *next_instr;
    reg_t in_eax = -1;

    for (instr = instrlist_first(bb); instr != NULL; instr = next_instr) {
        next_instr = instr_get_next(instr);
        if (instr_get_opcode(instr) == OP_mov_imm &&
            opnd_get_reg(instr_get_dst(instr, 0)) == REG_EAX)
            in_eax = opnd_get_immed_int(instr_get_src(instr, 0));
        if (instr_is_syscall(instr) &&
            in_eax == SYS_getpid) {
            instr_t *myval = INSTR_CREATE_mov_imm
                (drcontext, opnd_create_reg(REG_EAX), OPND_CREATE_INT32(-7));
            instr_set_translation(myval, instr_get_app_pc(instr));
            instrlist_preinsert(bb, instr, myval);
            instrlist_remove(bb, instr);
            instr_destroy(drcontext, instr);
        }
    }
    return DR_EMIT_DEFAULT;
}
コード例 #4
0
ファイル: drx.c プロジェクト: boolking/dynamorio
DR_EXPORT
bool
drx_aflags_are_dead(instr_t *where)
{
    instr_t *instr;
    uint flags;
    for (instr = where; instr != NULL; instr = instr_get_next(instr)) {
        /* we treat syscall/interrupt as aflags read */
        if (instr_is_syscall(instr) || instr_is_interrupt(instr))
            return false;
        flags = instr_get_arith_flags(instr, DR_QUERY_DEFAULT);
        if (TESTANY(EFLAGS_READ_ARITH, flags))
            return false;
        if (TESTALL(EFLAGS_WRITE_ARITH, flags))
            return true;
        if (instr_is_cti(instr)) {
            if (instr_is_app(instr) &&
                (instr_is_ubr(instr) || instr_is_call_direct(instr))) {
                instr_t *next = instr_get_next(instr);
                opnd_t   tgt  = instr_get_target(instr);
                /* continue on elision */
                if (next != NULL && instr_is_app(next) &&
                    opnd_is_pc(tgt) &&
                    opnd_get_pc(tgt) == instr_get_app_pc(next))
                    continue;
            }
            /* unknown target, assume aflags is live */
            return false;
        }
    }
    return false;
}
コード例 #5
0
ファイル: winsysnums.c プロジェクト: AVGirl/dynamorio
/* returns whether found a syscall
 * - found_eax: whether the caller has seen "mov imm => %eax"
 * - found_edx: whether the caller has seen "mov $0x7ffe0300 => %edx",
 *              xref the comment below about "mov $0x7ffe0300 => %edx".
 */
static bool
process_syscall_instr(void *dcontext, instr_t *instr, bool found_eax, bool found_edx)
{
    /* ASSUMPTION: a mov imm of 0x7ffe0300 into edx followed by an
     * indirect call via edx is a system call on XP and later
     * On XP SP1 it's call *edx, while on XP SP2 it's call *(edx)
     * For wow it's a call through fs.
     * FIXME - core exports various is_*_syscall routines (such as
     * instr_is_wow64_syscall()) which we could use here instead of
     * duplicating if they were more flexible about when they could
     * be called (instr_is_wow64_syscall() for ex. asserts if not
     * in a wow process).
     */
    if (/* int 2e or x64 or win8 sysenter */
        (instr_is_syscall(instr) &&
         found_eax && (expect_int2e || expect_x64 || expect_sysenter)) ||
        /* sysenter case */
        (expect_sysenter && found_edx && found_eax &&
         instr_is_call_indirect(instr) &&
         /* XP SP{0,1}, 2003 SP0: call *edx */
         ((opnd_is_reg(instr_get_target(instr)) &&
           opnd_get_reg(instr_get_target(instr)) == REG_EDX) ||
          /* XP SP2, 2003 SP1: call *(edx) */
          (opnd_is_base_disp(instr_get_target(instr)) &&
           opnd_get_base(instr_get_target(instr)) == REG_EDX &&
           opnd_get_index(instr_get_target(instr)) == REG_NULL &&
           opnd_get_disp(instr_get_target(instr)) == 0))) ||
        /* wow case
         * we don't require found_ecx b/c win8 does not use ecx
         */
        (expect_wow && found_eax &&
         instr_is_call_indirect(instr) &&
         ((opnd_is_far_base_disp(instr_get_target(instr)) &&
           opnd_get_base(instr_get_target(instr)) == REG_NULL &&
           opnd_get_index(instr_get_target(instr)) == REG_NULL &&
           opnd_get_segment(instr_get_target(instr)) == SEG_FS) ||
          /* win10 has imm in edx and a near call */
          found_edx)))
        return true;
    return false;
}
コード例 #6
0
ファイル: stats.c プロジェクト: FirstBlue/dynamorio
/* This event is passed the instruction list for the whole bb. */
static dr_emit_flags_t
event_analyze_bb(void *drcontext, void *tag, instrlist_t *bb,
                 bool for_trace, bool translating, void **user_data)
{
    /* Count the instructions and pass the result to event_insert_instrumentation. */
    per_bb_data_t *per_bb = dr_thread_alloc(drcontext, sizeof(*per_bb));
    instr_t *instr;
    uint num_instrs = 0;
    uint num_flops = 0;
    uint num_syscalls = 0;
    dr_fp_type_t fp_type;

    for (instr  = instrlist_first_app(bb);
         instr != NULL;
         instr  = instr_get_next_app(instr)) {
        num_instrs++;
        if (instr_is_floating_ex(instr, &fp_type) &&
            /* We exclude loads and stores (and reg-reg moves) and state preservation */
            (fp_type == DR_FP_CONVERT || fp_type == DR_FP_MATH)) {
#ifdef VERBOSE
            dr_print_instr(drcontext, STDOUT, instr, "Found flop: ");
#endif
            num_flops++;
        }
        if (instr_is_syscall(instr)) {
            num_syscalls++;
        }
    }

    per_bb->num_instrs = num_instrs;
    per_bb->num_flops = num_flops;
    per_bb->num_syscalls = num_syscalls;
    *(per_bb_data_t**)user_data = per_bb;

    return DR_EMIT_DEFAULT;
}
コード例 #7
0
ファイル: winsysnums.c プロジェクト: AVGirl/dynamorio
/* returns false on failure */
static bool
decode_syscall_num(void *dcontext, byte *entry, syscall_info_t *info, LOADED_IMAGE *img)
{
    /* FIXME: would like to fail gracefully rather than have a DR assertion
     * on non-code! => use DEBUG=0 INTERNAL=1 DR build!
     */
    bool found_syscall = false, found_eax = false, found_edx = false, found_ecx = false;
    bool found_ret = false;
    byte *pc, *pre_pc;
    int num_instr = 0;
    instr_t *instr;
    byte *preferred = get_preferred_base(img);
    if (entry == NULL)
        return false;
    info->num_args = -1; /* if find sysnum but not args */
    info->sysnum = -1;
    info->fixup_index = -1;
    instr = instr_create(dcontext);
    pc = entry;
    /* FIXME - we don't support decoding 64bit instructions in 32bit mode, but I want
     * this to work on 32bit machines.  Hack fix based on the wrapper pattern, we skip
     * the first instruction (mov r10, rcx) here, the rest should decode ok.
     * Xref PR 236203. */
    if (expect_x64 && *pc == 0x4c && *(pc+1) == 0x8b && *(pc+2) == 0xd1)
        pc += 3;
    while (true) {
        instr_reset(dcontext, instr);
        pre_pc = pc;
        pc = decode(dcontext, pc, instr);
        if (verbose) {
            instr_set_translation(instr, pre_pc);
            dr_print_instr(dcontext, STDOUT, instr, "");
        }
        if (pc == NULL || !instr_valid(instr))
            break;
        if (instr_is_syscall(instr) || instr_is_call_indirect(instr)) {
            /* If we see a syscall instr or an indirect call which is not syscall,
             * we assume this is not a syscall wrapper.
             */
            found_syscall = process_syscall_instr(dcontext, instr, found_eax, found_edx);
            if (!found_syscall)
                break; /* assume not a syscall wrapper, give up gracefully */
        } else if (instr_is_return(instr)) {
            /* we must break on return to avoid case like win8 x86
             * which has sysenter callee adjacent-"inlined"
             *     ntdll!NtYieldExecution:
             *     77d7422c b801000000  mov     eax,1
             *     77d74231 e801000000  call    ntdll!NtYieldExecution+0xb (77d74237)
             *     77d74236 c3          ret
             *     77d74237 8bd4        mov     edx,esp
             *     77d74239 0f34        sysenter
             *     77d7423b c3          ret
             */
            if (!found_ret) {
                process_ret(instr, info);
                found_ret = true;
            }
            break;
        } else if (instr_get_opcode(instr) == OP_call) {
            found_syscall = process_syscall_call(dcontext, pc, instr,
                                                 found_eax, found_edx);
            /* If we see a call and it is not a sysenter callee,
             * we assume this is not a syscall wrapper.
             */
            if (!found_syscall)
                break; /* assume not a syscall wrapper, give up gracefully */
        } else if (instr_is_cti(instr)) {
            /* We expect only ctis like ret or ret imm, syscall, and call, which are
             * handled above. Give up gracefully if we hit any other cti.
             * XXX: what about jmp to shared ret (seen in the past on some syscalls)?
             */
            /* Update: win10 TH2 1511 x64 has a cti:
             *   ntdll!NtContinue:
             *   00007ff9`13185630 4c8bd1          mov     r10,rcx
             *   00007ff9`13185633 b843000000      mov     eax,43h
             *   00007ff9`13185638 f604250803fe7f01 test    byte ptr [SharedUserData+0x308 (00000000`7ffe0308)],1
             *   00007ff9`13185640 7503            jne     ntdll!NtContinue+0x15 (00007ff9`13185645)
             *   00007ff9`13185642 0f05            syscall
             *   00007ff9`13185644 c3              ret
             *   00007ff9`13185645 cd2e            int     2Eh
             *   00007ff9`13185647 c3              ret
             */
            if (expect_x64 && instr_is_cbr(instr) &&
                opnd_get_pc(instr_get_target(instr)) == pc + 3/*syscall;ret*/) {
                /* keep going */
            } else
                break;
        } else if ((!found_eax || !found_edx || !found_ecx) &&
                   instr_get_opcode(instr) == OP_mov_imm &&
                   opnd_is_reg(instr_get_dst(instr, 0))) {
            if (!found_eax && opnd_get_reg(instr_get_dst(instr, 0)) == REG_EAX) {
                info->sysnum = (int) opnd_get_immed_int(instr_get_src(instr, 0));
                found_eax = true;
            } else if (!found_edx && opnd_get_reg(instr_get_dst(instr, 0)) == REG_EDX) {
                uint imm = (uint) opnd_get_immed_int(instr_get_src(instr, 0));
                if (imm == 0x7ffe0300 ||
                    /* On Win10 the immed is ntdll!Wow64SystemServiceCall */
                    (expect_wow && imm > (ptr_uint_t)preferred &&
                     imm < (ptr_uint_t)preferred + img->SizeOfImage))
                    found_edx = true;
            } else if (!found_ecx && opnd_get_reg(instr_get_dst(instr, 0)) == REG_ECX) {
                found_ecx = true;
                info->fixup_index = (int) opnd_get_immed_int(instr_get_src(instr, 0));
            }
        } else if (instr_get_opcode(instr) == OP_xor &&
                   opnd_is_reg(instr_get_src(instr, 0)) &&
                   opnd_get_reg(instr_get_src(instr, 0)) == REG_ECX &&
                   opnd_is_reg(instr_get_dst(instr, 0)) &&
                   opnd_get_reg(instr_get_dst(instr, 0)) == REG_ECX) {
            /* xor to 0 */
            found_ecx = true;
            info->fixup_index = 0;
        }
        num_instr++;
        if (num_instr > MAX_INSTRS_BEFORE_SYSCALL) /* wrappers should be short! */
            break; /* avoid weird cases like NPXEMULATORTABLE */
    }
    instr_destroy(dcontext, instr);
    return found_syscall;
}
コード例 #8
0
ファイル: winsysnums.c プロジェクト: AVGirl/dynamorio
/* returns whether found a syscall
 * - found_eax: whether the caller has seen "mov imm => %eax"
 * - found_edx: whether the caller has seen "mov $0x7ffe0300 => %edx",
 *              xref the comment in process_syscall_instr.
 */
static bool
process_syscall_call(void *dcontext, byte *next_pc, instr_t *call,
                     bool found_eax, bool found_edx)
{
    int num_instr;
    byte *pc;
    instr_t instr;
    bool found_syscall = false;

    assert(instr_get_opcode(call) == OP_call && opnd_is_pc(instr_get_target(call)));
    pc = opnd_get_pc(instr_get_target(call));
    if (pc > next_pc + MAX_SYSENTER_CALLEE_OFFSET ||
        pc <= next_pc /* assuming the call won't go backward */)
        return false;
    /* handle win8 x86 which has sysenter callee adjacent-"inlined"
     *     ntdll!NtYieldExecution:
     *     77d7422c b801000000  mov     eax,1
     *     77d74231 e801000000  call    ntdll!NtYieldExecution+0xb (77d74237)
     *     77d74236 c3          ret
     *     77d74237 8bd4        mov     edx,esp
     *     77d74239 0f34        sysenter
     *     77d7423b c3          ret
     *
     * or DrMem-i#1366-c#2
     *     USER32!NtUserCreateWindowStation:
     *     75caea7a b841110000  mov     eax,0x1141
     *     75caea7f e838000000  call    user32!...+0xd (75caeabc)
     *     75caea84 c22000      ret     0x20
     *     ...
     *     USER32!GetWindowStationName:
     *     75caea8c 8bff        mov     edi,edi
     *     75caea8e 55          push    ebp
     *     ...
     *     75caeabc 8bd4        mov     edx,esp
     *     75caeabe 0f34        sysenter
     *     75caeac0 c3          ret
     */
    /* We expect the win8 x86 sysenter adjacent "inlined" callee to be as simple as
     *     75caeabc 8bd4        mov     edx,esp
     *     75caeabe 0f34        sysenter
     *     75caeac0 c3          ret
     */
    instr_init(dcontext, &instr);
    num_instr = 0;
    do {
        instr_reset(dcontext, &instr);
        pc = decode(dcontext, pc, &instr);
        if (verbose)
            dr_print_instr(dcontext, STDOUT, &instr, "");
        if (pc == NULL || !instr_valid(&instr))
            break;
        if (instr_is_syscall(&instr) || instr_is_call_indirect(&instr)) {
            found_syscall = process_syscall_instr(dcontext, &instr, found_eax, found_edx);
            break;
        } else if (instr_is_cti(&instr)) {
            break;
        }
        num_instr++;
    } while (num_instr <= MAX_INSTRS_SYSENTER_CALLEE);
    instr_free(dcontext, &instr);
    return found_syscall;
}
コード例 #9
0
ファイル: winsysnums.c プロジェクト: Arunpreet/dynamorio
/* returns false on failure */
static bool
decode_syscall_num(void *dcontext, byte *entry, syscall_info_t *info)
{
    /* FIXME: would like to fail gracefully rather than have a DR assertion
     * on non-code! => use DEBUG=0 INTERNAL=1 DR build!
     */
    bool found_syscall = false, found_eax = false, found_edx = false, found_ecx = false;
    bool found_ret = false;
    byte *pc;
    int num_instr = 0;
    instr_t *instr;
    if (entry == NULL)
        return false;
    info->num_args = -1; /* if find sysnum but not args */
    info->sysnum = -1;
    info->fixup_index = -1;
    instr = instr_create(dcontext);
    pc = entry;
    /* FIXME - we don't support decoding 64bit instructions in 32bit mode, but I want
     * this to work on 32bit machines.  Hack fix based on the wrapper pattern, we skip
     * the first instruction (mov r10, rcx) here, the rest should decode ok.
     * Xref PR 236203. */
    if (expect_x64 && *pc == 0x4c && *(pc+1) == 0x8b && *(pc+2) == 0xd1)
        pc += 3;
    while (true) {
        instr_reset(dcontext, instr);
        pc = decode(dcontext, pc, instr);
        if (verbose)
            dr_print_instr(dcontext, STDOUT, instr, "");
        if (pc == NULL || !instr_valid(instr))
            break;
        /* ASSUMPTION: a mov imm of 0x7ffe0300 into edx followed by an
         * indirect call via edx is a system call on XP and later
         * On XP SP1 it's call *edx, while on XP SP2 it's call *(edx)
         * For wow it's a call through fs.
         * FIXME - core exports various is_*_syscall routines (such as
         * instr_is_wow64_syscall()) which we could use here instead of
         * duplicating if they were more flexible about when they could
         * be called (instr_is_wow64_syscall() for ex. asserts if not
         * in a wow process).
         */
        if (/* int 2e or x64 or win8 sysenter */
            (instr_is_syscall(instr) && found_eax && (expect_int2e || expect_x64 || expect_sysenter)) ||
            /* sysenter case */
            (expect_sysenter && found_edx && found_eax &&
             instr_is_call_indirect(instr) &&
             /* XP SP{0,1}, 2003 SP0: call *edx */
             ((opnd_is_reg(instr_get_target(instr)) &&
               opnd_get_reg(instr_get_target(instr)) == REG_EDX) ||
              /* XP SP2, 2003 SP1: call *(edx) */
              (opnd_is_base_disp(instr_get_target(instr)) &&
               opnd_get_base(instr_get_target(instr)) == REG_EDX &&
               opnd_get_index(instr_get_target(instr)) == REG_NULL &&
               opnd_get_disp(instr_get_target(instr)) == 0))) ||
            /* wow case 
             * we don't require found_ecx b/c win8 does not use ecx
             */
            (expect_wow && found_eax &&
             instr_is_call_indirect(instr) &&
             opnd_is_far_base_disp(instr_get_target(instr)) &&
             opnd_get_base(instr_get_target(instr)) == REG_NULL &&
             opnd_get_index(instr_get_target(instr)) == REG_NULL &&
             opnd_get_segment(instr_get_target(instr)) == SEG_FS)) {
            found_syscall = true;
        } else if (instr_is_return(instr)) {
            if (!found_ret) {
                process_ret(instr, info);
                found_ret = true;
            }
            break;
        } else if (instr_is_cti(instr)) {
            if (instr_get_opcode(instr) == OP_call) {
                /* handle win8 x86 which has sysenter callee adjacent-"inlined"
                 *     ntdll!NtYieldExecution:
                 *     77d7422c b801000000      mov     eax,1
                 *     77d74231 e801000000      call    ntdll!NtYieldExecution+0xb (77d74237)
                 *     77d74236 c3              ret
                 *     77d74237 8bd4            mov     edx,esp
                 *     77d74239 0f34            sysenter
                 *     77d7423b c3              ret
                 */
                byte *tgt;
                assert(opnd_is_pc(instr_get_target(instr)));
                tgt = opnd_get_pc(instr_get_target(instr));
                /* we expect only ret or ret imm, and possibly some nops (in gdi32).
                 * XXX: what about jmp to shared ret (seen in the past on some syscalls)?
                 */
                if (tgt > pc && tgt <= pc + 16) {
                    bool ok = false;
                    do {
                        if (pc == tgt) {
                            ok = true;
                            break;
                        }
                        instr_reset(dcontext, instr);
                        pc = decode(dcontext, pc, instr);
                        if (verbose)
                            dr_print_instr(dcontext, STDOUT, instr, "");
                        if (instr_is_return(instr)) {
                            process_ret(instr, info);
                            found_ret = true;
                        } else if (!instr_is_nop(instr))
                            break;
                        num_instr++;
                    } while (num_instr <= MAX_INSTRS_BEFORE_SYSCALL);
                    if (ok)
                        continue;
                }
            }
            /* assume not a syscall wrapper if we hit a cti */
            break; /* give up gracefully */
        } else if ((!found_eax || !found_edx || !found_ecx) &&
            instr_get_opcode(instr) == OP_mov_imm &&
            opnd_is_reg(instr_get_dst(instr, 0))) {
            if (!found_eax && opnd_get_reg(instr_get_dst(instr, 0)) == REG_EAX) {
                info->sysnum = (int) opnd_get_immed_int(instr_get_src(instr, 0));
                found_eax = true;
            } else if (!found_edx && opnd_get_reg(instr_get_dst(instr, 0)) == REG_EDX) {
                int imm = (int) opnd_get_immed_int(instr_get_src(instr, 0));
                if (imm == 0x7ffe0300)
                    found_edx = true;
            } else if (!found_ecx && opnd_get_reg(instr_get_dst(instr, 0)) == REG_ECX) {
                found_ecx = true;
                info->fixup_index = (int) opnd_get_immed_int(instr_get_src(instr, 0));
            }
        } else if (instr_get_opcode(instr) == OP_xor &&
                   opnd_is_reg(instr_get_src(instr, 0)) &&
                   opnd_get_reg(instr_get_src(instr, 0)) == REG_ECX &&
                   opnd_is_reg(instr_get_dst(instr, 0)) &&
                   opnd_get_reg(instr_get_dst(instr, 0)) == REG_ECX) {
            /* xor to 0 */
            found_ecx = true;
            info->fixup_index = 0;
        }
        num_instr++;
        if (num_instr > MAX_INSTRS_BEFORE_SYSCALL) /* wrappers should be short! */
            break; /* avoid weird cases like NPXEMULATORTABLE */
    }
    instr_destroy(dcontext, instr);
    return found_syscall;
}
コード例 #10
0
ファイル: build_ldstex.c プロジェクト: FirstBlue/dynamorio
/* Here we attempt to combine a loop involving ldex (load exclusive) and
 * stex (store exclusive) into an OP_ldstex macro-instruction. The algorithm
 * is roughly this:
 *
 * Decode up to (2 * N) instructions while:
 * - none of them are indirect branches or system calls
 * - none of them is a direct branch out of these (2 * N) instructions
 * - none of them is OP_xx (to be safe)
 * - there is, or might yet be, both ldex and stex in the first N
 * - none of them is a non-branch PC-relative instruction: ADR, ADRP,
 *   PC-relative PRFM, literal load (this last condition could be removed
 *   if we mangled such instructions as we encountered them)
 *
 * To save time, give up if the first instruction is neither ldex nor stex
 * and there is no branch to it.
 * Take a sub-block containing both ldex and stex from the first N instructions.
 * Expand this sub-block to a minimal single-entry single-exit block.
 * Give up if the sub-block grows beyond N instructions.
 * Finally, give up if the sub-block does not contain the first instruction.
 * Also give up if the sub-block uses all of X0-X5 and the stolen register
 * because we would be unable to mangle such a block.
 *
 * XXX: This function uses a lot of CPU time. It could be made faster in
 * several ways, for example by caching decoded instructions or using a
 * custom decoder to recognise the particular instructions that we care
 * about here.
 */
byte *
decode_ldstex(dcontext_t *dcontext, byte *pc_, byte *orig_pc_, instr_t *instr_ldstex)
{
# define N (MAX_INSTR_LENGTH / AARCH64_INSTR_SIZE)
    instr_t ibuf[2 * N];
    uint *pc = (uint *)pc_;
    uint *orig_pc = (uint *)orig_pc_;
    bool seen_ldex = false;
    bool seen_stex = false;
    bool seen_branch_to_start = false;
    bool failed = false;
    int ldstex_beg = -1;
    int ldstex_end = -1;
    int i, len;

    /* Decode up to 2 * N instructions. */
    for (i = 0; i < N; i++) {
        instr_t *instr = &ibuf[i];
        instr_init(dcontext, instr);
        decode_from_copy(dcontext, (byte *)(pc + i), (byte *)(orig_pc + i), instr);
        if (instr_is_mbr_arch(instr) || instr_is_syscall(instr) ||
            instr_get_opcode(instr) == OP_xx || instr_is_nonbranch_pcrel(instr))
            break;
        if (instr_is_ubr_arch(instr) || instr_is_cbr_arch(instr)) {
            ptr_uint_t target = (ptr_uint_t)instr_get_branch_target_pc(instr);
            if (target < (ptr_uint_t)pc || target > (ptr_uint_t)(pc + 2 * N))
                break;
            if (target == (ptr_uint_t)pc)
                seen_branch_to_start = true;
        }
        if (instr_is_exclusive_load(instr))
            seen_ldex = true;
        if (instr_is_exclusive_store(instr))
            seen_stex = true;
        if (i + 1 >= N && !(seen_ldex && seen_stex))
            break;
        if (ldstex_beg == -1 && (seen_ldex || seen_stex))
            ldstex_beg = i;
        if (ldstex_end == -1 && (seen_ldex && seen_stex))
            ldstex_end = i + 1;
    }
    if (i < N) {
        instr_reset(dcontext, &ibuf[i]);
        len = i;
    } else
        len = N;

    /* Quick check for hopeless situations. */
    if (len == 0 || !(seen_ldex && seen_stex) ||
        !(seen_branch_to_start || (instr_is_exclusive_load(&ibuf[0]) ||
                                   instr_is_exclusive_store(&ibuf[0])))) {
        for (i = 0; i < len; i++)
            instr_reset(dcontext, &ibuf[i]);
        return NULL;
    }

    /* There are several ways we could choose a sub-block containing both ldex
     * and stex from the first N instructions. Investigate further, perhaps.
     * We have already set ldstex_beg and ldstex_end.
     */
    ASSERT(ldstex_beg != -1 && ldstex_end != -1 && ldstex_beg < ldstex_end);

    /* Expand ldstex sub-block until it is a single-entry single-exit block. */
    for (;;) {
        int new_beg = ldstex_beg;
        int new_end = ldstex_end;
        for (i = ldstex_beg; i < ldstex_end; i++) {
            instr_t *instr = &ibuf[i];
            if (instr_is_ubr_arch(instr) || instr_is_cbr_arch(instr)) {
                int target = (uint *)instr_get_branch_target_pc(instr) - pc;
                if (target > len) {
                    failed = true;
                    break;
                }
                if (target < new_beg)
                    new_beg = target;
                if (target > new_end)
                    new_end = target;
            }
        }
        if (new_beg == ldstex_beg && new_end == ldstex_end)
            break;
        ldstex_beg = new_beg;
        ldstex_end = new_end;
    }

    if (ldstex_beg != 0)
        failed = true;

    if (!failed) {
        /* Check whether the sub-block uses the stolen register and all of X0-X5.
         * If it does, it would be impossible to mangle it so it is better not to
         * create an OP_ldstex.
         */
        reg_id_t regs[] = { dr_reg_stolen,
                            DR_REG_X0, DR_REG_X1, DR_REG_X2,
                            DR_REG_X3, DR_REG_X4, DR_REG_X5 };
        int r;
        for (r = 0; r < sizeof(regs) / sizeof(*regs); r++) {
            for (i = ldstex_beg; i < ldstex_end; i++) {
                if (instr_uses_reg(&ibuf[i], regs[r]))
                    break;
            }
            if (i >= ldstex_end)
                break;
        }
        if (r >= sizeof(regs) / sizeof(*regs))
            failed = true;
    }

    if (!failed) {
        instr_create_ldstex(dcontext, ldstex_end - ldstex_beg,
                            pc + ldstex_beg, &ibuf[ldstex_beg], instr_ldstex);
    }

    for (i = 0; i < len; i++)
        instr_reset(dcontext, &ibuf[i]);
    return failed ? NULL : (byte *)(pc + ldstex_end);
}