/** Wxy --> xyy */ static Object * apply_W( Array *spine, unsigned int nargs, Manager *m ) { Object *a1, *a2; if ( nargs >= 2 ) { a1 = array__pop( spine ); a2 = array__pop( spine ); /* Replace the function of the Apply with new object @xy. */ SET_FUNCTION( a2, manager__object( m, apply_type, apply__new( OPERAND( a1 ), OPERAND( a2 ) ), NOFLAGS ) ); /* Replace the operand of the Apply with new object y. */ SET_OPERAND( a2, OPERAND( a2 ) ); array__push( spine, a2 ); return a2; } else return 0; }
size_t CRegExp::regrepeat(TCHAR *node) { size_t count; TCHAR *scan; TCHAR ch; switch (OP(node)) { case ANY: return(_tcslen(reginput)); break; case EXACTLY: ch = *OPERAND(node); count = 0; for (scan = reginput; *scan == ch; scan++) count++; return(count); break; case ANYOF: return(_tcsspn(reginput, OPERAND(node))); break; case ANYBUT: return(_tcscspn(reginput, OPERAND(node))); break; default: // Oh dear. Called inappropriately. TRACE0("internal error: bad call of regrepeat\n"); return(0); // Best compromise. break; } // NOTREACHED }
/** Rxyz --> yzx */ static Object * apply_R( Array *spine, unsigned int nargs, Manager *m ) { Object *a1, *a2, *a3; if ( nargs >= 3 ) { a1 = array__pop( spine ); a2 = array__pop( spine ); a3 = array__pop( spine ); /* Replace the function of the Apply with new object @yz. */ SET_FUNCTION( a3, manager__object( m, apply_type, apply__new( OPERAND( a2 ), OPERAND( a3 ) ), NOFLAGS ) ); /* Replace the operand of the Apply with x. */ SET_OPERAND( a3, OPERAND( a1 ) ); return a3; } else return 0; }
/* - regoptail - regtail on operand of first argument; nop if operandless */ static void regoptail (char * p, char * val) { /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p == (char *) NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val); }
static void regoptail(regex_t *preg, int p, int val ) { /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p != 0 && OP(preg, p) == BRANCH) { regtail(preg, OPERAND(p), val); } }
/** Yf --> f(Yf) */ static Object * apply_Y( Array *spine, unsigned int nargs, Manager *m ) { Object *a1, *f; if ( nargs >= 1 ) { a1 = array__pop( spine ); f = OPERAND( a1 ); /* Replace the operand of the Apply with new object @Yf. */ SET_OPERAND( a1, manager__object( m, apply_type, apply__new( FUNCTION( a1 ), f ), NOFLAGS ) ); /* Replace the function of the Apply with f. */ SET_FUNCTION( a1, f ); return a1; } else return 0; }
void CRegExp::regoptail(TCHAR *p, TCHAR *val) { // "Operandless" and "op != BRANCH" are synonymous in practice. if (!bEmitCode || OP(p) != BRANCH) return; regtail(OPERAND(p), val); }
static int regmatchsimplerepeat(regex_t *preg, int scan, int matchmin) { int nextch = '\0'; const char *save; int no; int c; int max = preg->program[scan + 2]; int min = preg->program[scan + 3]; int next = regnext(preg, scan); /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ if (OP(preg, next) == EXACTLY) { nextch = preg->program[OPERAND(next)]; } save = preg->reginput; no = regrepeat(preg, scan + 5, max); if (no < min) { return 0; } if (matchmin) { /* from min up to no */ max = no; no = min; } /* else from no down to min */ while (1) { if (matchmin) { if (no > max) { break; } } else { if (no < min) { break; } } preg->reginput = save + utf8_index(save, no); reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE)); /* If it could work, try it. */ if (reg_iseol(preg, nextch) || c == nextch) { if (regmatch(preg, next)) { return(1); } } if (matchmin) { /* Couldn't or didn't, add one more */ no++; } else { /* Couldn't or didn't -- back up. */ no--; } } return(0); }
static int parse_reg_name(RRegItem *reg, csh handle, cs_insn *insn, int reg_num) { if (!reg) { return -1; } switch (OPERAND (reg_num).type) { case MIPS_OP_REG: reg->name = (char *)cs_reg_name (handle, OPERAND (reg_num).reg); break; case MIPS_OP_MEM: if (OPERAND (reg_num).mem.base != MIPS_REG_INVALID) { reg->name = (char *)cs_reg_name (handle, OPERAND (reg_num).mem.base); } default: break; } return 0; }
/* - regrepeat - repeatedly match something simple, report how many */ static int regrepeat(regex_t *preg, int p, int max) { int count = 0; const char *scan; int opnd; int ch; int n; scan = preg->reginput; opnd = OPERAND(p); switch (OP(preg, p)) { case ANY: /* No need to handle utf8 specially here */ while (!reg_iseol(preg, *scan) && count < max) { count++; scan++; } break; case EXACTLY: while (count < max) { n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); if (preg->program[opnd] != ch) { break; } count++; scan += n; } break; case ANYOF: while (count < max) { n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); if (reg_iseol(preg, ch) || reg_range_find(preg->program + opnd, ch) == 0) { break; } count++; scan += n; } break; case ANYBUT: while (count < max) { n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); if (reg_iseol(preg, ch) || reg_range_find(preg->program + opnd, ch) != 0) { break; } count++; scan += n; } break; default: /* Oh dear. Called inappropriately. */ preg->err = REG_ERR_INTERNAL; count = 0; /* Best compromise. */ break; } preg->reginput = scan; return(count); }
/* - regrepeat - repeatedly match something simple, report how many */ static int regrepeat(char *p) { register int count = 0, len = 0; register char *scan; register char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: while (( (len = CHARLEN(scan)) > 0)) { count++; scan += len; reglmlen = len; } break; case EXACTLY: { int len = 0; len = CHARLEN(opnd); while (len > 0 && (CHARLEN(scan) == len) && !strncmp(opnd, scan, len)) { count++; scan += len; reglmlen = len; } } break; case ANYOF: while (( (len = CHARLEN(scan)) > 0) && inclass(opnd, scan)) { count++; scan += len; reglmlen = len; } break; case ANYBUT: while ( ((len = CHARLEN(scan)) > 0) && !inclass(opnd, scan)) { count++; scan += len; reglmlen = len; } break; default: /* Oh dear. Called inappropriately. */ count = 0; /* Best compromise. */ break; } reginput = scan; return(count); }
static void op_fillval(RAnal *anal, RAnalOp *op, csh *handle, cs_insn *insn) { static RRegItem reg; switch (op->type & R_ANAL_OP_TYPE_MASK) { case R_ANAL_OP_TYPE_LOAD: if (OPERAND(1).type == MIPS_OP_MEM) { ZERO_FILL (reg); op->src[0] = r_anal_value_new (); op->src[0]->reg = ® parse_reg_name (op->src[0]->reg, *handle, insn, 1); op->src[0]->delta = OPERAND(1).mem.disp; } break; case R_ANAL_OP_TYPE_STORE: if (OPERAND(1).type == MIPS_OP_MEM) { ZERO_FILL (reg); op->dst = r_anal_value_new (); op->dst->reg = ® parse_reg_name (op->dst->reg, *handle, insn, 1); op->dst->delta = OPERAND(1).mem.disp; } break; case R_ANAL_OP_TYPE_SHL: case R_ANAL_OP_TYPE_SHR: case R_ANAL_OP_TYPE_SAR: case R_ANAL_OP_TYPE_XOR: case R_ANAL_OP_TYPE_SUB: case R_ANAL_OP_TYPE_AND: case R_ANAL_OP_TYPE_ADD: case R_ANAL_OP_TYPE_OR: SET_SRC_DST_3_REG_OR_IMM (op); break; case R_ANAL_OP_TYPE_MOV: SET_SRC_DST_2_REGS (op); break; case R_ANAL_OP_TYPE_DIV: SET_SRC_DST_3_REGS (op); break; } if (insn && (insn->id == MIPS_INS_SLTI || insn->id == MIPS_INS_SLTIU)) { SET_SRC_DST_3_IMM (op); } }
static void op_fillval(RAnalOp *op, csh handle, cs_insn *insn) { static RRegItem reg; switch (op->type & R_ANAL_OP_TYPE_MASK) { case R_ANAL_OP_TYPE_MOV: ZERO_FILL (reg); if (OPERAND(1).type == M68K_OP_MEM) { op->src[0] = r_anal_value_new (); op->src[0]->reg = ® parse_reg_name (op->src[0]->reg, handle, insn, 1); op->src[0]->delta = OPERAND(0).mem.disp; } else if (OPERAND(0).type == M68K_OP_MEM) { op->dst = r_anal_value_new (); op->dst->reg = ® parse_reg_name (op->dst->reg, handle, insn, 0); op->dst->delta = OPERAND(1).mem.disp; } break; case R_ANAL_OP_TYPE_LEA: ZERO_FILL (reg); if (OPERAND(1).type == M68K_OP_MEM) { op->dst = r_anal_value_new (); op->dst->reg = ® parse_reg_name (op->dst->reg, handle, insn, 1); op->dst->delta = OPERAND(1).mem.disp; } break; } }
/** Txy --> yx */ static Object * apply_T( Array *spine, unsigned int nargs ) { Object *a1, *a2; if ( nargs >= 2 ) { a1 = array__pop( spine ); a2 = array__pop( spine ); /* Replace the function of the Apply with y. */ SET_FUNCTION( a2, OPERAND( a2 ) ); /* Replace the operand of the Apply with x. */ SET_OPERAND( a2, OPERAND( a1 ) ); return a2; } else return 0; }
/** wx --> xx */ static Object * apply_w( Array *spine, unsigned int nargs ) { Object *a1; if ( nargs >= 1 ) { a1 = array__pop( spine ); /* Replace the function of the Apply with x. */ SET_FUNCTION( a1, OPERAND( a1 ) ); return a1; } else return 0; }
/** Ix --> x */ static Object * apply_I( Array *spine, unsigned int nargs ) { Object *a1; if ( nargs >= 1 ) { a1 = array__pop( spine ); /* Replace the Apply with an indirection node to x. */ substitute_boxed( a1, OPERAND( a1 ) ); return a1; } else return 0; }
/** Kxy --> x */ static Object * apply_K( Array *spine, unsigned int nargs ) { Object *a1, *a2; if ( nargs >= 2 ) { a1 = array__pop( spine ); a2 = array__pop( spine ); /* Replace the top-level Apply with an indirection node to x. */ substitute_boxed( a2, OPERAND( a1 ) ); return a2; } else return 0; }
/* - regrepeat - repeatedly match something simple, report how many */ static int regrepeat( char *p ) { register int count = 0; register const char *scan; register char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = strlen(scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { count++; scan++; } break; case ANYOF: while (*scan != '\0' && strchr(opnd, *scan) != NULL) { count++; scan++; } break; case ANYBUT: while (*scan != '\0' && strchr(opnd, *scan) == NULL) { count++; scan++; } break; default: /* Oh dear. Called inappropriately. */ regerror("internal foulup"); count = 0; /* Best compromise. */ break; } reginput = scan; return(count); }
/* * regrepeat - repeatedly match something simple, report how many */ static i4 regrepeat(char *p) { register i4 count = 0; register char *scan; register char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = STlength(scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { CMbyteinc( count, scan ); CMnext( scan ); } break; case ANYOF: while (*scan != '\0' && STchr(opnd, *scan) != NULL) { CMbyteinc( count, scan ); CMnext( scan ); } break; case ANYBUT: while (*scan != '\0' && STchr(opnd, *scan) == NULL) { CMbyteinc( count, scan ); CMnext( scan ); } break; default: /* Oh dear. Called inappropriately. */ _error("internal foulup"); count = 0; /* Best compromise. */ break; } reginput = scan; return(count); }
/* - regrepeat - repeatedly match something simple, report how many */ int ossimRegExp::regrepeat (const char* p) { int count = 0; const char* scan; const char* opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = (int)strlen(scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { count++; scan++; } break; case ANYOF: while (*scan != '\0' && strchr(opnd, *scan) != NULL) { count++; scan++; } break; case ANYBUT: while (*scan != '\0' && strchr(opnd, *scan) == NULL) { count++; scan++; } break; default: // Oh dear. Called inappropriately. //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::find(): Internal error.\n"); return 0; } reginput = scan; return (count); }
/* * REcompile - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled RE_EXP. */ STATUS REcompile( char *exp, RE_EXP **re_exp, i4 mem_tag ) { register RE_EXP *r; register char *scan; register char *longest; register i4 len; i4 flags; u_char magic = MAGIC; if (exp == NULL) { _error("NULL argument"); return (FAIL); } /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc( (char *) &magic ); if (reg(0, &flags) == NULL) return( FAIL ); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ { _error("regular expression too big"); return (FAIL); } /* Allocate space. */ r = (RE_EXP *) MEreqmem( mem_tag, sizeof(RE_EXP) + (unsigned) regsize, FALSE, NULL); if (r == NULL) { _error("out of space"); return (FAIL); } /* Second pass: emit code. */ regparse = exp; regnpar = 1; regcode = r->program; regc( (char *) &magic ); if (reg(0, &flags) == NULL) return( FAIL ); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && STlength(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = STlength(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } *re_exp = r; return( OK ); }
/* - RegComp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp *RegComp( const char *instr ) { regexp *r; char *scan; char *longest; const char *exp; char buff[MAX_STR*2]; int flags, ignmag = FALSE; unsigned j; size_t i, k, len; #ifdef WANT_EXCLAMATION if( instr[0] == '!' ) { instr++; ignmag = TRUE; } #endif /* * flip roles of magic chars */ if( !ignmag && ( !MAGICFLAG && MAGICSTR != NULL ) ) { j = 0; k = strlen( instr ); for( i = 0; i < k; i++ ) { if( instr[i] == '\\' ) { if( strchr( MAGICSTR, instr[i + 1] ) == NULL ) { buff[j++] = '\\'; } i++; } else { if( strchr( MAGICSTR, instr[i] ) != NULL ) { buff[j++] = '\\'; } } buff[j++] = instr[i]; } buff[j] = 0; exp = buff; } else { exp = instr; } regError( ERR_NO_ERR ); if( exp == NULL ) { FAIL( ERR_RE_NULL_ARGUMENT ); } /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc( MAGIC ); if( reg( 0, &flags ) == NULL ) { return( NULL ); } /* Allocate space. */ r = ALLOC( sizeof( regexp ) + ( unsigned ) regsize ); /* Second pass: emit code. */ regparse = exp; regnpar = 1; regcode = r->program; regc( MAGIC ); if( reg( 0, &flags ) == NULL ) { return( NULL ); } /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program + 1; /* First BRANCH. */ if( OP( regnext( scan ) ) == END ) { /* Only one top-level choice. */ scan = OPERAND( scan ); /* Starting-point info. */ if( OP( scan ) == EXACTLY ) { r->regstart = *OPERAND( scan ); } else if( OP( scan ) == BOL ) { r->reganch++; } /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if( flags & SPSTART ) { longest = NULL; len = 0; for( ; scan != NULL; scan = regnext( scan ) ) { if( OP( scan ) == EXACTLY && strlen( OPERAND( scan ) ) >= len ) { longest = OPERAND( scan ); len = strlen( OPERAND( scan ) ); } } r->regmust = longest; r->regmlen = (short)len; } } return( r ); }
/* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp * regcomp( const char *exp ) { register regexp *r; register char *scan; register char *longest; register unsigned len; int flags; if (exp == NULL) FAIL("NULL argument"); /* First pass: determine size, legality. */ #ifdef notdef if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */ #endif regparse = (char *)exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ FAIL("regexp too big"); /* Allocate space. */ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); if (r == NULL) FAIL("out of space"); /* Second pass: emit code. */ regparse = (char *)exp; regnpar = 1; regcode = r->program; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = strlen(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } return(r); }
int CRegExp::regmatch(TCHAR *prog) { TCHAR *scan; // Current node. TCHAR *next; // Next node. for (scan = prog; scan != NULL; scan = next) { next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != _T('\0')) return(0); break; case ANY: if (*reginput == _T('\0')) return(0); reginput++; break; case EXACTLY: { size_t len; TCHAR *const opnd = OPERAND(scan); // Inline the first character, for speed. if (*opnd != *reginput) return(0); len = _tcslen(opnd); if (len > 1 && _tcsncmp(opnd, reginput, len) != 0) return(0); reginput += len; break; } case ANYOF: if (*reginput == _T('\0') || _tcschr(OPERAND(scan), *reginput) == NULL) return(0); reginput++; break; case ANYBUT: if (*reginput == _T('\0') || _tcschr(OPERAND(scan), *reginput) != NULL) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { const int no = OP(scan) - OPEN; TCHAR *const input = reginput; if (regmatch(next)) { // Don't set startp if some later // invocation of the same parentheses // already has. if (startp[no] == NULL) startp[no] = input; return(1); } else return(0); break; } case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { const int no = OP(scan) - CLOSE; TCHAR *const input = reginput; if (regmatch(next)) { // Don't set endp if some later // invocation of the same parentheses // already has. if (endp[no] == NULL) endp[no] = input; return(1); } else return(0); break; } case BRANCH: { TCHAR *const save = reginput; if (OP(next) != BRANCH) // No choice. next = OPERAND(scan); // Avoid recursion. else { while (OP(scan) == BRANCH) { if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } return(0); // NOTREACHED } break; } case STAR: case PLUS: { const TCHAR nextch = (OP(next) == EXACTLY) ? *OPERAND(next) : _T('\0'); size_t no; TCHAR *const save = reginput; const size_t min = (OP(scan) == STAR) ? 0 : 1; for (no = regrepeat(OPERAND(scan)) + 1; no > min; no--) { reginput = save + no - 1; // If it could work, try it. if (nextch == _T('\0') || *reginput == nextch) if (regmatch(next)) return(1); } return(0); break; } case END: return(1); // Success! break; default: TRACE0("regexp corruption\n"); return(0); break; } } // We get here only if there's trouble -- normally "case END" is // the terminating point. TRACE0("corrupted pointers\n"); return(0); }
CRegExp* CRegExp::RegComp(const TCHAR *exp) { TCHAR *scan; int flags; if (exp == NULL) return NULL; bCompiled = TRUE; // First pass: determine size, legality. bEmitCode = FALSE; regparse = (TCHAR *)exp; regnpar = 1; regsize = 0L; regdummy[0] = NOTHING; regdummy[1] = regdummy[2] = 0; regcode = regdummy; if (reg(0, &flags) == NULL) return(NULL); // Allocate space. delete program; program = new TCHAR[regsize]; memset( program, 0, regsize * sizeof(TCHAR) ); if (program == NULL) return NULL; // Second pass: emit code. bEmitCode = TRUE; regparse = (TCHAR *)exp; regnpar = 1; regcode = program; if (reg(0, &flags) == NULL) return NULL; // Dig out information for optimizations. regstart = _T('\0'); // Worst-case defaults. reganch = 0; regmust = NULL; regmlen = 0; scan = program; // First BRANCH. if (OP(regnext(scan)) == END) { // Only one top-level choice. scan = OPERAND(scan); // Starting-point info. if (OP(scan) == EXACTLY) regstart = *OPERAND(scan); else if (OP(scan) == BOL) reganch = 1; // If there's something expensive in the r.e., find the // longest literal string that must appear and make it the // regmust. Resolve ties in favor of later strings, since // the regstart check works with the beginning of the r.e. // and avoiding duplication strengthens checking. Not a // strong reason, but sufficient in the absence of others. if (flags&SPSTART) { char *longest = NULL; size_t len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && _tcslen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = _tcslen(OPERAND(scan)); } regmust = longest; regmlen = (int)len; } } return this; }
static int analop(RAnal *anal, RAnalOp *op, ut64 addr, const ut8 *buf, int len) { int n, ret, opsize = -1; static csh hndl = 0; static csh *handle = &hndl; static int omode = -1; static int obits = 32; cs_insn* insn; int mode = anal->big_endian? CS_MODE_BIG_ENDIAN: CS_MODE_LITTLE_ENDIAN; mode |= (anal->bits==64)? CS_MODE_64: CS_MODE_32; if (mode != omode || anal->bits != obits) { cs_close (&hndl); hndl = 0; omode = mode; obits = anal->bits; } // XXX no arch->cpu ?!?! CS_MODE_MICRO, N64 op->delay = 0; op->type = R_ANAL_OP_TYPE_ILL; if (len<4) return -1; op->size = 4; if (hndl == 0) { ret = cs_open (CS_ARCH_MIPS, mode, &hndl); if (ret != CS_ERR_OK) goto fin; cs_option (hndl, CS_OPT_DETAIL, CS_OPT_ON); } n = cs_disasm (hndl, (ut8*)buf, len, addr, 1, &insn); if (n<1 || insn->size<1) goto beach; op->type = R_ANAL_OP_TYPE_NULL; op->delay = 0; op->jump = UT64_MAX; op->fail = UT64_MAX; opsize = op->size = insn->size; switch (insn->id) { case MIPS_INS_INVALID: op->type = R_ANAL_OP_TYPE_ILL; break; case MIPS_INS_LB: case MIPS_INS_LBU: case MIPS_INS_LBUX: case MIPS_INS_LW: case MIPS_INS_LWC1: case MIPS_INS_LWC2: case MIPS_INS_LWL: case MIPS_INS_LWR: case MIPS_INS_LWXC1: case MIPS_INS_LD: case MIPS_INS_LDC1: case MIPS_INS_LDC2: case MIPS_INS_LDL: case MIPS_INS_LDR: case MIPS_INS_LDXC1: op->type = R_ANAL_OP_TYPE_LOAD; op->refptr = 4; switch (OPERAND(1).type) { case MIPS_OP_MEM: if (OPERAND(1).mem.base == MIPS_REG_GP) { op->ptr = anal->gp + OPERAND(1).mem.disp; op->refptr = 4; } break; case MIPS_OP_IMM: op->ptr = OPERAND(1).imm; break; case MIPS_OP_REG: // wtf? break; default: break; } // TODO: fill break; case MIPS_INS_SW: case MIPS_INS_SWC1: case MIPS_INS_SWC2: case MIPS_INS_SWL: case MIPS_INS_SWR: case MIPS_INS_SWXC1: op->type = R_ANAL_OP_TYPE_STORE; break; case MIPS_INS_NOP: op->type = R_ANAL_OP_TYPE_NOP; break; case MIPS_INS_SYSCALL: case MIPS_INS_BREAK: op->type = R_ANAL_OP_TYPE_TRAP; break; case MIPS_INS_JALR: op->type = R_ANAL_OP_TYPE_UCALL; op->delay = 1; break; case MIPS_INS_JAL: case MIPS_INS_JALS: case MIPS_INS_JALX: case MIPS_INS_JRADDIUSP: case MIPS_INS_BAL: // (no blezal/bgtzal or blezall/bgtzall, only blezalc/bgtzalc) case MIPS_INS_BLTZAL: // Branch on <0 and link case MIPS_INS_BGEZAL: // Branch on >=0 and link case MIPS_INS_BLTZALL: // "likely" versions case MIPS_INS_BGEZALL: case MIPS_INS_BLTZALC: // compact versions case MIPS_INS_BLEZALC: case MIPS_INS_BGEZALC: case MIPS_INS_BGTZALC: case MIPS_INS_JIALC: case MIPS_INS_JIC: op->type = R_ANAL_OP_TYPE_CALL; op->jump = IMM(0); switch (insn->id) { case MIPS_INS_JIALC: case MIPS_INS_JIC: case MIPS_INS_BLTZALC: case MIPS_INS_BLEZALC: case MIPS_INS_BGEZALC: case MIPS_INS_BGTZALC: // compact vesions (no delay) op->delay = 0; op->fail = addr+4; break; default: op->delay = 1; op->fail = addr+8; break; } break; case MIPS_INS_LUI: case MIPS_INS_MOVE: op->type = R_ANAL_OP_TYPE_MOV; SET_SRC_DST_2_REGS (op); break; case MIPS_INS_ADD: case MIPS_INS_ADDI: case MIPS_INS_ADDU: case MIPS_INS_ADDIU: case MIPS_INS_DADD: case MIPS_INS_DADDI: case MIPS_INS_DADDIU: SET_VAL (op, 2); SET_SRC_DST_3_REG_OR_IMM (op); op->type = R_ANAL_OP_TYPE_ADD; break; case MIPS_INS_SUB: case MIPS_INS_SUBV: case MIPS_INS_SUBVI: case MIPS_INS_DSUBU: case MIPS_INS_FSUB: case MIPS_INS_FMSUB: case MIPS_INS_SUBU: case MIPS_INS_DSUB: case MIPS_INS_SUBS_S: case MIPS_INS_SUBS_U: case MIPS_INS_SUBUH: case MIPS_INS_SUBUH_R: SET_VAL (op,2); SET_SRC_DST_3_REG_OR_IMM (op); op->type = R_ANAL_OP_TYPE_SUB; break; case MIPS_INS_MULV: case MIPS_INS_MULT: case MIPS_INS_MULSA: case MIPS_INS_FMUL: case MIPS_INS_MUL: case MIPS_INS_DMULT: case MIPS_INS_DMULTU: op->type = R_ANAL_OP_TYPE_MUL; break; case MIPS_INS_XOR: case MIPS_INS_XORI: SET_VAL (op,2); SET_SRC_DST_3_REG_OR_IMM (op); op->type = R_ANAL_OP_TYPE_XOR; break; case MIPS_INS_AND: case MIPS_INS_ANDI: SET_VAL (op,2); SET_SRC_DST_3_REG_OR_IMM (op); op->type = R_ANAL_OP_TYPE_AND; break; case MIPS_INS_NOT: op->type = R_ANAL_OP_TYPE_NOT; break; case MIPS_INS_OR: case MIPS_INS_ORI: SET_VAL (op,2); SET_SRC_DST_3_REG_OR_IMM (op); op->type = R_ANAL_OP_TYPE_OR; break; case MIPS_INS_DIV: case MIPS_INS_DIVU: case MIPS_INS_DDIV: case MIPS_INS_DDIVU: case MIPS_INS_FDIV: case MIPS_INS_DIV_S: case MIPS_INS_DIV_U: SET_SRC_DST_3_REGS (op); op->type = R_ANAL_OP_TYPE_DIV; break; case MIPS_INS_CMPGDU: case MIPS_INS_CMPGU: case MIPS_INS_CMPU: case MIPS_INS_CMPI: op->type = R_ANAL_OP_TYPE_CMP; break; case MIPS_INS_J: case MIPS_INS_B: case MIPS_INS_BZ: case MIPS_INS_BEQ: case MIPS_INS_BNZ: case MIPS_INS_BNE: case MIPS_INS_BEQZ: case MIPS_INS_BNEG: case MIPS_INS_BNEGI: case MIPS_INS_BNEZ: case MIPS_INS_BTEQZ: case MIPS_INS_BTNEZ: case MIPS_INS_BLTZ: case MIPS_INS_BLTZL: case MIPS_INS_BLEZ: case MIPS_INS_BLEZL: case MIPS_INS_BGEZ: case MIPS_INS_BGEZL: case MIPS_INS_BGTZ: case MIPS_INS_BGTZL: case MIPS_INS_BLEZC: case MIPS_INS_BGEZC: case MIPS_INS_BLTZC: case MIPS_INS_BGTZC: if (insn->id == MIPS_INS_J || insn->id == MIPS_INS_B ) { op->type = R_ANAL_OP_TYPE_JMP; } else { op->type = R_ANAL_OP_TYPE_CJMP; } if (OPERAND(0).type == MIPS_OP_IMM) { op->jump = IMM(0); } else if (OPERAND(1).type == MIPS_OP_IMM) { op->jump = IMM(1); } else if (OPERAND(2).type == MIPS_OP_IMM) { op->jump = IMM(2); } switch (insn->id) { case MIPS_INS_BLEZC: case MIPS_INS_BGEZC: case MIPS_INS_BLTZC: case MIPS_INS_BGTZC: // compact vesions (no delay) op->delay = 0; op->fail = addr+4; break; default: op->delay = 1; op->fail = addr+8; break; } break; case MIPS_INS_JR: case MIPS_INS_JRC: op->type = R_ANAL_OP_TYPE_JMP; op->delay = 1; // register is $ra, so jmp is a return if (insn->detail->mips.operands[0].reg == MIPS_REG_RA) { op->type = R_ANAL_OP_TYPE_RET; } break; case MIPS_INS_SLTI: case MIPS_INS_SLTIU: SET_SRC_DST_3_IMM (op); SET_VAL (op,2); break; case MIPS_INS_SHRAV: case MIPS_INS_SHRAV_R: case MIPS_INS_SHRA: case MIPS_INS_SHRA_R: case MIPS_INS_SRA: op->type = R_ANAL_OP_TYPE_SAR; SET_SRC_DST_3_REG_OR_IMM (op); SET_VAL (op,2); break; case MIPS_INS_SHRL: case MIPS_INS_SRLV: case MIPS_INS_SRL: op->type = R_ANAL_OP_TYPE_SHR; SET_SRC_DST_3_REG_OR_IMM (op); SET_VAL (op,2); break; case MIPS_INS_SLLV: case MIPS_INS_SLL: op->type = R_ANAL_OP_TYPE_SHL; SET_SRC_DST_3_REG_OR_IMM (op); SET_VAL (op,2); break; } beach: if (anal->decode) { if (analop_esil (anal, op, addr, buf, len, &hndl, insn) != 0) r_strbuf_fini (&op->esil); } cs_free (insn, n); //cs_close (&handle); fin: return opsize; }
/* - regoptail - regtail on operand of first argument; nop if operandless */ void ossimRegExp::regoptail (char* p, const char* val) { // "Operandless" and "op != BRANCH" are synonymous in practice. if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val); }
void ossimRegExp::compile (const char* exp) { const char* scan; const char* longest; unsigned long len; int flags; if (exp == NULL) { //RAISE Error, SYM(ossimRegExp), SYM(No_Expr), printf ("ossimRegExp::compile(): No expression supplied.\n"); return; } // First pass: determine size, legality. regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc(MAGIC); if(!reg(0, &flags)) { printf ("ossimRegExp::compile(): Error in compile.\n"); return; } this->startp[0] = this->endp[0] = this->searchstring = NULL; // Small enough for pointer-storage convention? if (regsize >= 32767L) { // Probably could be 65535L. //RAISE Error, SYM(ossimRegExp), SYM(Expr_Too_Big), printf ("ossimRegExp::compile(): Expression too big.\n"); return; } // Allocate space. //#ifndef WIN32 if (this->program != NULL) delete [] this->program; //#endif this->program = new char[regsize]; this->progsize = (int) regsize; if (this->program == NULL) { //RAISE Error, SYM(ossimRegExp), SYM(Out_Of_Memory), printf ("ossimRegExp::compile(): Out of memory.\n"); return; } // Second pass: emit code. regparse = exp; regnpar = 1; regcode = this->program; regc(MAGIC); reg(0, &flags); // Dig out information for optimizations. this->regstart = '\0'; // Worst-case defaults. this->reganch = 0; this->regmust = NULL; this->regmlen = 0; scan = this->program + 1; // First BRANCH. if (OP(regnext(scan)) == END) { // Only one top-level choice. scan = OPERAND(scan); // Starting-point info. if (OP(scan) == EXACTLY) this->regstart = *OPERAND(scan); else if (OP(scan) == BOL) this->reganch++; // // If there's something expensive in the r.e., find the longest // literal string that must appear and make it the regmust. Resolve // ties in favor of later strings, since the regstart check works // with the beginning of the r.e. and avoiding duplication // strengthens checking. Not a strong reason, but sufficient in the // absence of others. // if (flags & SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = (unsigned long)strlen(OPERAND(scan)); } this->regmust = longest; this->regmlen = len; } } }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int /* 0 failure, 1 success */ regmatch( char *prog ) { register char *scan; /* Current node. */ char *next; /* Next node. */ scan = prog; #ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan)); #endif while (scan != NULL) { #ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan)); #endif next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != '\0') return(0); break; case WORDA: /* Must be looking at a letter, digit, or _ */ if ((!isalnum(*reginput)) && *reginput != '_') return(0); /* Prev must be BOL or nonword */ if (reginput > regbol && (isalnum(reginput[-1]) || reginput[-1] == '_')) return(0); break; case WORDZ: /* Must be looking at non letter, digit, or _ */ if (isalnum(*reginput) || *reginput == '_') return(0); /* We don't care what the previous char was */ break; case ANY: if (*reginput == '\0') return(0); reginput++; break; case EXACTLY: { register int len; register char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return(0); len = strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) return(0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) return(0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register const char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register const char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register const char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register char nextch; register int no; register const char *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case END: return(1); /* Success! */ break; default: regerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ regerror("corrupted pointers"); return(0); }
/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. * 0 failure, 1 success */ int ossimRegExp::regmatch (const char* prog) { const char* scan; // Current node. const char* next; // Next node. scan = prog; while (scan != NULL) { next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return (0); break; case EOL: if (*reginput != '\0') return (0); break; case ANY: if (*reginput == '\0') return (0); reginput++; break; case EXACTLY: { int len; const char* opnd; opnd = OPERAND(scan); // Inline the first character, for speed. if (*opnd != *reginput) return (0); len = (int)strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) return (0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) return (0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) return (0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN + 1: case OPEN + 2: case OPEN + 3: case OPEN + 4: case OPEN + 5: case OPEN + 6: case OPEN + 7: case OPEN + 8: case OPEN + 9: { int no; const char* save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { // // Don't set startp if some later invocation of the // same parentheses already has. // if (regstartp[no] == NULL) regstartp[no] = save; return (1); } else return (0); } // break; case CLOSE + 1: case CLOSE + 2: case CLOSE + 3: case CLOSE + 4: case CLOSE + 5: case CLOSE + 6: case CLOSE + 7: case CLOSE + 8: case CLOSE + 9: { int no; const char* save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { // // Don't set endp if some later invocation of the // same parentheses already has. // if (regendp[no] == NULL) regendp[no] = save; return (1); } else return (0); } // break; case BRANCH: { const char* save; if (OP(next) != BRANCH) // No choice. next = OPERAND(scan); // Avoid recursion. else { do { save = reginput; if (regmatch(OPERAND(scan))) return (1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return (0); // NOTREACHED } } break; case STAR: case PLUS: { char nextch; int no; const char* save; int min_no; // // Lookahead to avoid useless match attempts when we know // what character comes next. // nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min_no = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min_no) { // If it could work, try it. if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return (1); // Couldn't or didn't -- back up. no--; reginput = save + no; } return (0); } // break; case END: return (1); // Success! default: //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::find(): Internal error -- memory corrupted.\n"); return 0; } scan = next; } // // We get here only if there's trouble -- normally "case END" is the // terminating point. // //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error), printf ("ossimRegExp::find(): Internal error -- corrupted pointers.\n"); return (0); }