Exemple #1
0
/*
 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
 *   instruction as having them.  Also sets the instruction's default operand,
 *   address, and other relevant data sizes to report operands correctly.
 *
 * @param insn  - The instruction whose prefixes are to be read.
 * @return      - 0 if the instruction could be read until the end of the prefix
 *                bytes, and no prefixes conflicted; nonzero otherwise.
 */
static int readPrefixes(struct InternalInstruction* insn) {
  BOOL isPrefix = TRUE;
  BOOL prefixGroups[4] = { FALSE };
  uint64_t prefixLocation;
  uint8_t byte = 0;
  
  BOOL hasAdSize = FALSE;
  BOOL hasOpSize = FALSE;
  
  dbgprintf(insn, "readPrefixes()");
    
  while (isPrefix) {
    prefixLocation = insn->readerCursor;
    
    if (consumeByte(insn, &byte))
      return -1;
    
    switch (byte) {
    case 0xf0:  /* LOCK */
    case 0xf2:  /* REPNE/REPNZ */
    case 0xf3:  /* REP or REPE/REPZ */
      if (prefixGroups[0])
        dbgprintf(insn, "Redundant Group 1 prefix");
      prefixGroups[0] = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x2e:  /* CS segment override -OR- Branch not taken */
    case 0x36:  /* SS segment override -OR- Branch taken */
    case 0x3e:  /* DS segment override */
    case 0x26:  /* ES segment override */
    case 0x64:  /* FS segment override */
    case 0x65:  /* GS segment override */
      switch (byte) {
      case 0x2e:
        insn->segmentOverride = SEG_OVERRIDE_CS;
        break;
      case 0x36:
        insn->segmentOverride = SEG_OVERRIDE_SS;
        break;
      case 0x3e:
        insn->segmentOverride = SEG_OVERRIDE_DS;
        break;
      case 0x26:
        insn->segmentOverride = SEG_OVERRIDE_ES;
        break;
      case 0x64:
        insn->segmentOverride = SEG_OVERRIDE_FS;
        break;
      case 0x65:
        insn->segmentOverride = SEG_OVERRIDE_GS;
        break;
      default:
        debug("Unhandled override");
        return -1;
      }
      if (prefixGroups[1])
        dbgprintf(insn, "Redundant Group 2 prefix");
      prefixGroups[1] = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x66:  /* Operand-size override */
      if (prefixGroups[2])
        dbgprintf(insn, "Redundant Group 3 prefix");
      prefixGroups[2] = TRUE;
      hasOpSize = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    case 0x67:  /* Address-size override */
      if (prefixGroups[3])
        dbgprintf(insn, "Redundant Group 4 prefix");
      prefixGroups[3] = TRUE;
      hasAdSize = TRUE;
      setPrefixPresent(insn, byte, prefixLocation);
      break;
    default:    /* Not a prefix byte */
      isPrefix = FALSE;
      break;
    }
    
    if (isPrefix)
      dbgprintf(insn, "Found prefix 0x%hhx", byte);
  }
    
  insn->vexSize = 0;
  
  if (byte == 0xc4) {
    uint8_t byte1;
      
    if (lookAtByte(insn, &byte1)) {
      dbgprintf(insn, "Couldn't read second byte of VEX");
      return -1;
    }
    
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
      insn->vexSize = 3;
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
    else {
      unconsumeByte(insn);
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
    
    if (insn->vexSize == 3) {
      insn->vexPrefix[0] = byte;
      consumeByte(insn, &insn->vexPrefix[1]);
      consumeByte(insn, &insn->vexPrefix[2]);

      /* We simulate the REX prefix for simplicity's sake */
   
      if (insn->mode == MODE_64BIT) {
        insn->rexPrefix = 0x40 
                        | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
                        | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
                        | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
                        | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
      }
    
      switch (ppFromVEX3of3(insn->vexPrefix[2]))
      {
      default:
        break;
      case VEX_PREFIX_66:
        hasOpSize = TRUE;      
        break;
      }
    
      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
    }
  }
  else if (byte == 0xc5) {
    uint8_t byte1;
    
    if (lookAtByte(insn, &byte1)) {
      dbgprintf(insn, "Couldn't read second byte of VEX");
      return -1;
    }
      
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
      insn->vexSize = 2;
    }
    else {
      unconsumeByte(insn);
    }
    
    if (insn->vexSize == 2) {
      insn->vexPrefix[0] = byte;
      consumeByte(insn, &insn->vexPrefix[1]);
        
      if (insn->mode == MODE_64BIT) {
        insn->rexPrefix = 0x40 
                        | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
      }
        
      switch (ppFromVEX2of2(insn->vexPrefix[1]))
      {
      default:
        break;
      case VEX_PREFIX_66:
        hasOpSize = TRUE;      
        break;
      }
         
      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
    }
  }
  else {
    if (insn->mode == MODE_64BIT) {
      if ((byte & 0xf0) == 0x40) {
        uint8_t opcodeByte;
          
        if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
          dbgprintf(insn, "Redundant REX prefix");
          return -1;
        }
          
        insn->rexPrefix = byte;
        insn->necessaryPrefixLocation = insn->readerCursor - 2;
          
        dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
      } else {                
        unconsumeByte(insn);
        insn->necessaryPrefixLocation = insn->readerCursor - 1;
      }
    } else {
      unconsumeByte(insn);
      insn->necessaryPrefixLocation = insn->readerCursor - 1;
    }
  }

  if (insn->mode == MODE_16BIT) {
    insn->registerSize       = (hasOpSize ? 4 : 2);
    insn->addressSize        = (hasAdSize ? 4 : 2);
    insn->displacementSize   = (hasAdSize ? 4 : 2);
    insn->immediateSize      = (hasOpSize ? 4 : 2);
  } else if (insn->mode == MODE_32BIT) {
    insn->registerSize       = (hasOpSize ? 2 : 4);
    insn->addressSize        = (hasAdSize ? 2 : 4);
    insn->displacementSize   = (hasAdSize ? 2 : 4);
    insn->immediateSize      = (hasOpSize ? 2 : 4);
  } else if (insn->mode == MODE_64BIT) {
    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
      insn->registerSize       = 8;
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = 4;
      insn->immediateSize      = 4;
    } else if (insn->rexPrefix) {
      insn->registerSize       = (hasOpSize ? 2 : 4);
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = (hasOpSize ? 2 : 4);
      insn->immediateSize      = (hasOpSize ? 2 : 4);
    } else {
      insn->registerSize       = (hasOpSize ? 2 : 4);
      insn->addressSize        = (hasAdSize ? 4 : 8);
      insn->displacementSize   = (hasOpSize ? 2 : 4);
      insn->immediateSize      = (hasOpSize ? 2 : 4);
    }
  }
  
  return 0;
}
Exemple #2
0
/*
 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
 *   appropriate for extended and escape opcodes.  Determines the attributes and 
 *   context for the instruction before doing so.
 *
 * @param insn  - The instruction whose ID is to be determined.
 * @return      - 0 if the ModR/M could be read when needed or was not needed;
 *                nonzero otherwise.
 */
static int getID(struct InternalInstruction* insn, void *miiArg) {
  uint8_t attrMask;
  uint16_t instructionID;
  
  dbgprintf(insn, "getID()");
    
  attrMask = ATTR_NONE;

  if (insn->mode == MODE_64BIT)
    attrMask |= ATTR_64BIT;
    
  if (insn->vexSize) {
    attrMask |= ATTR_VEX;

    if (insn->vexSize == 3) {
      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (lFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_VEXL;
    }
    else if (insn->vexSize == 2) {
      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (lFromVEX2of2(insn->vexPrefix[1]))
        attrMask |= ATTR_VEXL;
    }
    else {
      return -1;
    }
  }
  else {
    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
      attrMask |= ATTR_OPSIZE;
    else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
      attrMask |= ATTR_ADSIZE;
    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XS;
    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XD;
  }

  if (insn->rexPrefix & 0x08)
    attrMask |= ATTR_REXW;

  if (getIDWithAttrMask(&instructionID, insn, attrMask))
    return -1;

  /* The following clauses compensate for limitations of the tables. */

  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
      !(attrMask & ATTR_OPSIZE)) {
    /*
     * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
     * has precedence since there are no L-bit with W-bit entries in the tables.
     * So if the L-bit isn't significant we should use the W-bit instead.
     * We only need to do this if the instruction doesn't specify OpSize since
     * there is a VEX_L_W_OPSIZE table.
     */

    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithWBit;
    const struct InstructionSpecifier *specWithWBit;

    spec = specifierForUID(instructionID);

    if (getIDWithAttrMask(&instructionIDWithWBit,
                          insn,
                          (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }

    specWithWBit = specifierForUID(instructionIDWithWBit);

    if (instructionID != instructionIDWithWBit) {
      insn->instructionID = instructionIDWithWBit;
      insn->spec = specWithWBit;
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }

  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    /*
     * The instruction tables make no distinction between instructions that
     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
     * particular spot (i.e., many MMX operations).  In general we're
     * conservative, but in the specific case where OpSize is present but not
     * in the right place we check if there's a 16-bit operation.
     */
    
    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithOpsize;
    const char *specName, *specWithOpSizeName;
    
    spec = specifierForUID(instructionID);
    
    if (getIDWithAttrMask(&instructionIDWithOpsize,
                          insn,
                          attrMask | ATTR_OPSIZE)) {
      /* 
       * ModRM required with OpSize but not present; give up and return version
       * without OpSize set
       */
      
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }
    
    specName = x86DisassemblerGetInstrName(instructionID, miiArg);
    specWithOpSizeName =
      x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);

    if (is16BitEquvalent(specName, specWithOpSizeName)) {
      insn->instructionID = instructionIDWithOpsize;
      insn->spec = specifierForUID(instructionIDWithOpsize);
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }

  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
      insn->rexPrefix & 0x01) {
    /*
     * NOOP shouldn't decode as NOOP if REX.b is set. Instead
     * it should decode as XCHG %r8, %eax.
     */

    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithNewOpcode;
    const struct InstructionSpecifier *specWithNewOpcode;

    spec = specifierForUID(instructionID);
    
    /* Borrow opcode from one of the other XCHGar opcodes */
    insn->opcode = 0x91;
   
    if (getIDWithAttrMask(&instructionIDWithNewOpcode,
                          insn,
                          attrMask)) {
      insn->opcode = 0x90;

      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }

    specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);

    /* Change back */
    insn->opcode = 0x90;

    insn->instructionID = instructionIDWithNewOpcode;
    insn->spec = specWithNewOpcode;

    return 0;
  }
  
  insn->instructionID = instructionID;
  insn->spec = specifierForUID(insn->instructionID);
  
  return 0;
}
Exemple #3
0
/*
 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
 *   appropriate for extended and escape opcodes.  Determines the attributes and 
 *   context for the instruction before doing so.
 *
 * @param insn  - The instruction whose ID is to be determined.
 * @return      - 0 if the ModR/M could be read when needed or was not needed;
 *                nonzero otherwise.
 */
static int getID(struct InternalInstruction* insn) {  
  uint8_t attrMask;
  uint16_t instructionID;
  
  dbgprintf(insn, "getID()");
    
  attrMask = ATTR_NONE;

  if (insn->mode == MODE_64BIT)
    attrMask |= ATTR_64BIT;
    
  if (insn->vexSize) {
    attrMask |= ATTR_VEX;

    if (insn->vexSize == 3) {
      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (wFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_REXW;
      if (lFromVEX3of3(insn->vexPrefix[2]))
        attrMask |= ATTR_VEXL;
    }
    else if (insn->vexSize == 2) {
      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
      case VEX_PREFIX_66:
        attrMask |= ATTR_OPSIZE;    
        break;
      case VEX_PREFIX_F3:
        attrMask |= ATTR_XS;
        break;
      case VEX_PREFIX_F2:
        attrMask |= ATTR_XD;
        break;
      }
    
      if (lFromVEX2of2(insn->vexPrefix[1]))
        attrMask |= ATTR_VEXL;
    }
    else {
      return -1;
    }
  }
  else {
    if (insn->rexPrefix & 0x08)
      attrMask |= ATTR_REXW;
  
    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
      attrMask |= ATTR_OPSIZE;
    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XS;
    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
      attrMask |= ATTR_XD;
    
  }

  if (getIDWithAttrMask(&instructionID, insn, attrMask))
    return -1;
  
  /* The following clauses compensate for limitations of the tables. */
  
  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
    /*
     * Although for SSE instructions it is usually necessary to treat REX.W+F2
     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
     * an occasional instruction where F2 is incidental and REX.W is the more
     * significant.  If the decoded instruction is 32-bit and adding REX.W
     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
     */
    
    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithREXw;
    const struct InstructionSpecifier *specWithREXw;
    
    spec = specifierForUID(instructionID);
    
    if (getIDWithAttrMask(&instructionIDWithREXw,
                          insn,
                          attrMask & (~ATTR_XD))) {
      /*
       * Decoding with REX.w would yield nothing; give up and return original
       * decode.
       */
      
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }
    
    specWithREXw = specifierForUID(instructionIDWithREXw);
    
    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
      insn->instructionID = instructionIDWithREXw;
      insn->spec = specWithREXw;
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }
  
  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    /*
     * The instruction tables make no distinction between instructions that
     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
     * particular spot (i.e., many MMX operations).  In general we're
     * conservative, but in the specific case where OpSize is present but not
     * in the right place we check if there's a 16-bit operation.
     */
    
    const struct InstructionSpecifier *spec;
    uint16_t instructionIDWithOpsize;
    const struct InstructionSpecifier *specWithOpsize;
    
    spec = specifierForUID(instructionID);
    
    if (getIDWithAttrMask(&instructionIDWithOpsize,
                          insn,
                          attrMask | ATTR_OPSIZE)) {
      /* 
       * ModRM required with OpSize but not present; give up and return version
       * without OpSize set
       */
      
      insn->instructionID = instructionID;
      insn->spec = spec;
      return 0;
    }
    
    specWithOpsize = specifierForUID(instructionIDWithOpsize);
    
    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
      insn->instructionID = instructionIDWithOpsize;
      insn->spec = specWithOpsize;
    } else {
      insn->instructionID = instructionID;
      insn->spec = spec;
    }
    return 0;
  }
  
  insn->instructionID = instructionID;
  insn->spec = specifierForUID(insn->instructionID);
  
  return 0;
}