Example #1
0
int yr_parser_reduce_rule_declaration_phase_2(
    yyscan_t yyscanner,
    YR_RULE* rule)
{
  uint32_t max_strings_per_rule;
  uint32_t strings_in_rule = 0;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  // Check for unreferenced (unused) strings.

  YR_STRING* string = rule->strings;

  yr_get_configuration(
      YR_CONFIG_MAX_STRINGS_PER_RULE,
      (void*) &max_strings_per_rule);

  while (!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      compiler->last_result = ERROR_UNREFERENCED_STRING;
      return compiler->last_result;
    }

    strings_in_rule++;

    if (strings_in_rule > max_strings_per_rule) {
      yr_compiler_set_error_extra_info(compiler, rule->identifier);
      compiler->last_result = ERROR_TOO_MANY_STRINGS;
      return compiler->last_result;
    }

    string = (YR_STRING*) yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  compiler->last_result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_MATCH_RULE,
      rule,
      NULL,
      NULL);

  return compiler->last_result;
}
Example #2
0
File: exec.c Project: metthal/yara
int yr_execute_code(
    YR_SCAN_CONTEXT* context)
{
  int64_t mem[MEM_SIZE];
  int32_t sp = 0;

  const uint8_t* ip = context->rules->code_start;

  YR_VALUE args[YR_MAX_FUNCTION_ARGS];
  YR_VALUE *stack;
  YR_VALUE r1;
  YR_VALUE r2;
  YR_VALUE r3;

  uint64_t elapsed_time;

  #ifdef PROFILING_ENABLED
  uint64_t start_time;
  YR_RULE* current_rule = NULL;
  #endif

  YR_INIT_RULE_ARGS init_rule_args;

  YR_RULE* rule;
  YR_MATCH* match;
  YR_OBJECT_FUNCTION* function;
  YR_OBJECT** obj_ptr;
  YR_ARENA* obj_arena;

  char* identifier;
  char* args_fmt;

  int i;
  int found;
  int count;
  int result = ERROR_SUCCESS;
  int cycle = 0;
  int tidx = context->tidx;
  int stack_size;

  bool stop = false;

  uint8_t opcode;

  yr_get_configuration(YR_CONFIG_STACK_SIZE, (void*) &stack_size);

  stack = (YR_VALUE*) yr_malloc(stack_size * sizeof(YR_VALUE));

  if (stack == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_arena_create(1024, 0, &obj_arena),
      yr_free(stack));

  #ifdef PROFILING_ENABLED
  start_time = yr_stopwatch_elapsed_us(&context->stopwatch);
  #endif

  #if PARANOID_EXEC
  memset(mem, 0, MEM_SIZE * sizeof(mem[0]));
  #endif

  while(!stop)
  {
    opcode = *ip;
    ip++;

    switch(opcode)
    {
      case OP_NOP:
        break;

      case OP_HALT:
        assert(sp == 0); // When HALT is reached the stack should be empty.
        stop = true;
        break;

      case OP_PUSH:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        push(r1);
        break;

      case OP_POP:
        pop(r1);
        break;

      case OP_CLEAR_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        mem[r1.i] = 0;
        break;

      case OP_ADD_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        if (!is_undef(r2))
          mem[r1.i] += r2.i;
        break;

      case OP_INCR_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        mem[r1.i]++;
        break;

      case OP_PUSH_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        r1.i = mem[r1.i];
        push(r1);
        break;

      case OP_POP_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        mem[r1.i] = r2.i;
        break;

      case OP_SET_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        push(r2);
        if (!is_undef(r2))
          mem[r1.i] = r2.i;
        break;

      case OP_SWAPUNDEF:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);

        if (is_undef(r2))
        {
          r1.i = mem[r1.i];
          push(r1);
        }
        else
        {
          push(r2);
        }
        break;

      case OP_JNUNDEF:
        pop(r1);
        push(r1);
        ip = jmp_if(!is_undef(r1), ip);
        break;

      case OP_JLE_P:
        pop(r2);
        pop(r1);
        ip = jmp_if(r1.i <= r2.i, ip);
        break;

      case OP_JTRUE:
        pop(r1);
        push(r1);
        ip = jmp_if(!is_undef(r1) && r1.i, ip);
        break;

      case OP_JFALSE:
        pop(r1);
        push(r1);
        ip = jmp_if(is_undef(r1) || !r1.i, ip);
        break;

      case OP_JFALSE_P:
        pop(r1);
        ip = jmp_if(is_undef(r1) || !r1.i, ip);
        break;

      case OP_AND:
        pop(r2);
        pop(r1);

        if (is_undef(r1) || is_undef(r2))
          r1.i = 0;
        else
          r1.i = r1.i && r2.i;

        push(r1);
        break;

      case OP_OR:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          push(r2);
        }
        else if (is_undef(r2))
        {
          push(r1);
        }
        else
        {
          r1.i = r1.i || r2.i;
          push(r1);
        }
        break;

      case OP_NOT:
        pop(r1);

        if (is_undef(r1))
          r1.i = UNDEFINED;
        else
          r1.i = !r1.i;

        push(r1);
        break;

      case OP_MOD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i % r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_SHR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i < 0)
          r1.i = UNDEFINED;
        else if (r2.i < 64)
          r1.i = r1.i >> r2.i;
        else
          r1.i = 0;
        push(r1);
        break;

      case OP_SHL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i < 0)
          r1.i = UNDEFINED;
        else if (r2.i < 64)
          r1.i = r1.i << r2.i;
        else
          r1.i = 0;
        push(r1);
        break;

      case OP_BITWISE_NOT:
        pop(r1);
        ensure_defined(r1);
        r1.i = ~r1.i;
        push(r1);
        break;

      case OP_BITWISE_AND:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i & r2.i;
        push(r1);
        break;

      case OP_BITWISE_OR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i | r2.i;
        push(r1);
        break;

      case OP_BITWISE_XOR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i ^ r2.i;
        push(r1);
        break;

      case OP_PUSH_RULE:
        rule = *(YR_RULE**)(ip);
        ip += sizeof(uint64_t);
        if (RULE_IS_DISABLED(rule))
          r1.i = UNDEFINED;
        else
          r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0;
        push(r1);
        break;

      case OP_INIT_RULE:
        memcpy(&init_rule_args, ip, sizeof(init_rule_args));
        #ifdef PROFILING_ENABLED
        current_rule = init_rule_args.rule;
        #endif
        if (RULE_IS_DISABLED(init_rule_args.rule))
          ip = init_rule_args.jmp_addr;
        else
          ip += sizeof(init_rule_args);
        break;

      case OP_MATCH_RULE:
        pop(r1);
        rule = *(YR_RULE**)(ip);
        ip += sizeof(uint64_t);

        if (!is_undef(r1) && r1.i)
          rule->t_flags[tidx] |= RULE_TFLAGS_MATCH;
        else if (RULE_IS_GLOBAL(rule))
          rule->ns->t_flags[tidx] |= NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL;

        #ifdef PROFILING_ENABLED
        elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch);
        rule->time_cost_per_thread[tidx] += (elapsed_time - start_time);
        start_time = elapsed_time;
        #endif

        assert(sp == 0); // at this point the stack should be empty.
        break;

      case OP_OBJ_LOAD:
        identifier = *(char**)(ip);
        ip += sizeof(uint64_t);

        r1.o = (YR_OBJECT*) yr_hash_table_lookup(
            context->objects_table,
            identifier,
            NULL);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_FIELD:
        identifier = *(char**)(ip);
        ip += sizeof(uint64_t);

        pop(r1);
        ensure_defined(r1);

        r1.o = yr_object_lookup_field(r1.o, identifier);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_VALUE:
        pop(r1);
        ensure_defined(r1);

        #if PARANOID_EXEC
        check_object_canary(r1.o);
        #endif

        switch(r1.o->type)
        {
          case OBJECT_TYPE_INTEGER:
            r1.i = r1.o->value.i;
            break;

          case OBJECT_TYPE_FLOAT:
            if (isnan(r1.o->value.d))
              r1.i = UNDEFINED;
            else
              r1.d = r1.o->value.d;
            break;

          case OBJECT_TYPE_STRING:
            if (r1.o->value.ss == NULL)
              r1.i = UNDEFINED;
            else
              r1.ss = r1.o->value.ss;
            break;

          default:
            assert(false);
        }

        push(r1);
        break;

      case OP_INDEX_ARRAY:
        pop(r1);  // index
        pop(r2);  // array

        ensure_defined(r1);
        ensure_defined(r2);

        assert(r2.o->type == OBJECT_TYPE_ARRAY);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_LOOKUP_DICT:
        pop(r1);  // key
        pop(r2);  // dictionary

        ensure_defined(r1);
        ensure_defined(r2);

        assert(r2.o->type == OBJECT_TYPE_DICTIONARY);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        r1.o = yr_object_dict_get_item(
            r2.o, 0, r1.ss->c_string);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_CALL:
        args_fmt = *(char**)(ip);
        ip += sizeof(uint64_t);

        i = (int) strlen(args_fmt);
        count = 0;

        #if PARANOID_EXEC
        if (i > YR_MAX_FUNCTION_ARGS)
        {
          stop = true;
          result = ERROR_INTERNAL_FATAL_ERROR;
          break;
        }
        #endif

        // pop arguments from stack and copy them to args array

        while (i > 0)
        {
          pop(r1);

          if (is_undef(r1))  // count the number of undefined args
            count++;

          args[i - 1] = r1;
          i--;
        }

        pop(r2);
        ensure_defined(r2);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        if (count > 0)
        {
          // if there are undefined args, result for function call
          // is undefined as well.

          r1.i = UNDEFINED;
          push(r1);
          break;
        }

        function = object_as_function(r2.o);
        result = ERROR_INTERNAL_FATAL_ERROR;

        for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
        {
          if (function->prototypes[i].arguments_fmt == NULL)
            break;

          if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0)
          {
            result = function->prototypes[i].code(args, context, function);
            break;
          }
        }

        // if i == YR_MAX_OVERLOADED_FUNCTIONS at this point no matching
        // prototype was found, but this shouldn't happen.

        assert(i < YR_MAX_OVERLOADED_FUNCTIONS);

        // make a copy of the returned object and push the copy into the stack
        // function->return_obj can't be pushed because it can change in
        // subsequent calls to the same function.

        if (result == ERROR_SUCCESS)
          result = yr_object_copy(function->return_obj, &r1.o);

        // a pointer to the copied object is stored in a arena in order to
        // free the object before exiting yr_execute_code

        if (result == ERROR_SUCCESS)
          result = yr_arena_write_data(obj_arena, &r1.o, sizeof(r1.o), NULL);

        stop = (result != ERROR_SUCCESS);
        push(r1);
        break;

      case OP_FOUND:
        pop(r1);
        r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0;
        push(r1);
        break;

      case OP_FOUND_AT:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          r1.i = 0;
          push(r1);
          break;
        }

        match = r2.s->matches[tidx].head;
        r3.i = false;

        while (match != NULL)
        {
          if (r1.i == match->base + match->offset)
          {
            r3.i = true;
            break;
          }

          if (r1.i < match->base + match->offset)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_FOUND_IN:
        pop(r3);
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        match = r3.s->matches[tidx].head;
        r3.i = false;

        while (match != NULL && !r3.i)
        {
          if (match->base + match->offset >= r1.i &&
              match->base + match->offset <= r2.i)
          {
            r3.i = true;
          }

          if (match->base + match->offset > r2.i)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_COUNT:
        pop(r1);

        #if PARANOID_EXEC
        // Make sure that the string pointer is within the rules arena.
        if (yr_arena_page_for_address(context->rules->arena, r1.p) == NULL)
          return ERROR_INTERNAL_FATAL_ERROR;
        #endif

        r1.i = r1.s->matches[tidx].count;
        push(r1);
        break;

      case OP_OFFSET:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->base + match->offset;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_LENGTH:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->match_length;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_OF:
        found = 0;
        count = 0;
        pop(r1);

        while (!is_undef(r1))
        {
          if (r1.s->matches[tidx].tail != NULL)
            found++;
          count++;
          pop(r1);
        }

        pop(r2);

        if (is_undef(r2))
          r1.i = found >= count ? 1 : 0;
        else
          r1.i = found >= r2.i ? 1 : 0;

        push(r1);
        break;

      case OP_FILESIZE:
        r1.i = context->file_size;
        push(r1);
        break;

      case OP_ENTRYPOINT:
        r1.i = context->entry_point;
        push(r1);
        break;

      case OP_INT8:
        pop(r1);
        r1.i = read_int8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16:
        pop(r1);
        r1.i = read_int16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32:
        pop(r1);
        r1.i = read_int32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8:
        pop(r1);
        r1.i = read_uint8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16:
        pop(r1);
        r1.i = read_uint16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32:
        pop(r1);
        r1.i = read_uint32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT8BE:
        pop(r1);
        r1.i = read_int8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16BE:
        pop(r1);
        r1.i = read_int16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32BE:
        pop(r1);
        r1.i = read_int32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8BE:
        pop(r1);
        r1.i = read_uint8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16BE:
        pop(r1);
        r1.i = read_uint16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32BE:
        pop(r1);
        r1.i = read_uint32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_CONTAINS:
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        r1.i = memmem(r1.ss->c_string, r1.ss->length,
                      r2.ss->c_string, r2.ss->length) != NULL;
        push(r1);
        break;

      case OP_IMPORT:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);

        result = yr_modules_load((char*) r1.p, context);

        if (result != ERROR_SUCCESS)
          stop = true;

        break;

      case OP_MATCHES:

        pop(r2);
        pop(r1);

        ensure_defined(r2);
        ensure_defined(r1);

        if (r1.ss->length == 0)
        {
          r1.i = false;
          push(r1);
          break;
        }

        result = yr_re_exec(
          context,
          (uint8_t*) r2.re->code,
          (uint8_t*) r1.ss->c_string,
          r1.ss->length,
          0,
          r2.re->flags | RE_FLAGS_SCAN,
          NULL,
          NULL,
          &found);

        if (result != ERROR_SUCCESS)
          stop = true;

        r1.i = found >= 0;
        push(r1);
        break;

      case OP_INT_TO_DBL:

        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);

        #if PARANOID_EXEC
        if (r1.i > sp || sp - r1.i >= stack_size)
        {
          stop = true;
          result = ERROR_INTERNAL_FATAL_ERROR;
          break;
        }
        #endif

        r2 = stack[sp - r1.i];

        if (is_undef(r2))
          stack[sp - r1.i].i = UNDEFINED;
        else
          stack[sp - r1.i].d = (double) r2.i;
        break;

      case OP_STR_TO_BOOL:
        pop(r1);
        ensure_defined(r1);
        r1.i = r1.ss->length > 0;
        push(r1);
        break;

      case OP_INT_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i == r2.i;
        push(r1);
        break;

      case OP_INT_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i != r2.i;
        push(r1);
        break;

      case OP_INT_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i < r2.i;
        push(r1);
        break;

      case OP_INT_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i > r2.i;
        push(r1);
        break;

      case OP_INT_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i <= r2.i;
        push(r1);
        break;

      case OP_INT_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i >= r2.i;
        push(r1);
        break;

      case OP_INT_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i + r2.i;
        push(r1);
        break;

      case OP_INT_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i - r2.i;
        push(r1);
        break;

      case OP_INT_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i * r2.i;
        push(r1);
        break;

      case OP_INT_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i / r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_INT_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.i = -r1.i;
        push(r1);
        break;

      case OP_DBL_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d < r2.d;
        push(r1);
        break;

      case OP_DBL_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d > r2.d;
        push(r1);
        break;

      case OP_DBL_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d <= r2.d;
        push(r1);
        break;

      case OP_DBL_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d >= r2.d;
        push(r1);
        break;

      case OP_DBL_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = fabs(r1.d - r2.d) < DBL_EPSILON;
        push(r1);
        break;

      case OP_DBL_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = fabs(r1.d - r2.d) >= DBL_EPSILON;
        push(r1);
        break;

      case OP_DBL_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d + r2.d;
        push(r1);
        break;

      case OP_DBL_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d - r2.d;
        push(r1);
        break;

      case OP_DBL_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d * r2.d;
        push(r1);
        break;

      case OP_DBL_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d / r2.d;
        push(r1);
        break;

      case OP_DBL_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.d = -r1.d;
        push(r1);
        break;

      case OP_STR_EQ:
      case OP_STR_NEQ:
      case OP_STR_LT:
      case OP_STR_LE:
      case OP_STR_GT:
      case OP_STR_GE:

        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        switch(opcode)
        {
          case OP_STR_EQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0);
            break;
          case OP_STR_NEQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0);
            break;
          case OP_STR_LT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0);
            break;
          case OP_STR_LE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0);
            break;
          case OP_STR_GT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0);
            break;
          case OP_STR_GE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0);
            break;
        }

        push(r1);
        break;

      default:
        // Unknown instruction, this shouldn't happen.
        assert(false);
    }

    // Check for timeout every 10 instruction cycles. If timeout == 0 it means
    // no timeout at all.

    if (context->timeout > 0L && ++cycle == 10)
    {
      elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch);

      if (elapsed_time > context->timeout)
      {
        #ifdef PROFILING_ENABLED
        assert(current_rule != NULL);
        current_rule->time_cost_per_thread[tidx] += elapsed_time - start_time;
        #endif
        result = ERROR_SCAN_TIMEOUT;
        stop = true;
      }

      cycle = 0;
    }
  }
Example #3
0
int yr_execute_code(
    YR_RULES* rules,
    YR_SCAN_CONTEXT* context,
    int timeout,
    time_t start_time)
{
  int64_t mem[MEM_SIZE];
  int32_t sp = 0;
  uint8_t* ip = rules->code_start;

  YR_VALUE args[MAX_FUNCTION_ARGS];
  YR_VALUE *stack;
  YR_VALUE r1;
  YR_VALUE r2;
  YR_VALUE r3;

  #ifdef PROFILING_ENABLED
  YR_RULE* current_rule = NULL;
  #endif

  YR_RULE* rule;
  YR_MATCH* match;
  YR_OBJECT_FUNCTION* function;

  char* identifier;
  char* args_fmt;

  int i;
  int found;
  int count;
  int result = ERROR_SUCCESS;
  int stop = FALSE;
  int cycle = 0;
  int tidx = context->tidx;
  int stack_size;

  #ifdef PROFILING_ENABLED
  clock_t start = clock();
  #endif

  yr_get_configuration(YR_CONFIG_STACK_SIZE, (void*) &stack_size);

  stack = (YR_VALUE*) yr_malloc(stack_size * sizeof(YR_VALUE));

  if (stack == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  while(!stop)
  {
    switch(*ip)
    {
      case OP_HALT:
        assert(sp == 0); // When HALT is reached the stack should be empty.
        stop = TRUE;
        break;

      case OP_PUSH:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        push(r1);
        break;

      case OP_POP:
        pop(r1);
        break;

      case OP_CLEAR_M:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        mem[r1.i] = 0;
        break;

      case OP_ADD_M:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        pop(r2);
        if (!is_undef(r2))
          mem[r1.i] += r2.i;
        break;

      case OP_INCR_M:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        mem[r1.i]++;
        break;

      case OP_PUSH_M:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        r1.i = mem[r1.i];
        push(r1);
        break;

      case OP_POP_M:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        pop(r2);
        mem[r1.i] = r2.i;
        break;

      case OP_SWAPUNDEF:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        pop(r2);

        if (is_undef(r2))
        {
          r1.i = mem[r1.i];
          push(r1);
        }
        else
        {
          push(r2);
        }
        break;

      case OP_JNUNDEF:
        pop(r1);
        push(r1);

        ip = jmp_if(!is_undef(r1), ip);
        break;

      case OP_JLE:
        pop(r2);
        pop(r1);
        push(r1);
        push(r2);

        ip = jmp_if(r1.i <= r2.i, ip);
        break;

      case OP_JTRUE:
        pop(r1);
        push(r1);

        ip = jmp_if(!is_undef(r1) && r1.i, ip);
        break;

      case OP_JFALSE:
        pop(r1);
        push(r1);

        ip = jmp_if(is_undef(r1) || !r1.i, ip);
        break;

      case OP_AND:
        pop(r2);
        pop(r1);

        if (is_undef(r1) || is_undef(r2))
          r1.i = 0;
        else
          r1.i = r1.i && r2.i;

        push(r1);
        break;

      case OP_OR:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          push(r2);
        }
        else if (is_undef(r2))
        {
          push(r1);
        }
        else
        {
          r1.i = r1.i || r2.i;
          push(r1);
        }
        break;

      case OP_NOT:
        pop(r1);

        if (is_undef(r1))
          r1.i = UNDEFINED;
        else
          r1.i= !r1.i;

        push(r1);
        break;

      case OP_MOD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i % r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_SHR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i >> r2.i;
        push(r1);
        break;

      case OP_SHL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i << r2.i;
        push(r1);
        break;

      case OP_BITWISE_NOT:
        pop(r1);
        ensure_defined(r1);
        r1.i = ~r1.i;
        push(r1);
        break;

      case OP_BITWISE_AND:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i & r2.i;
        push(r1);
        break;

      case OP_BITWISE_OR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i | r2.i;
        push(r1);
        break;

      case OP_BITWISE_XOR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i ^ r2.i;
        push(r1);
        break;

      case OP_PUSH_RULE:
        rule = *(YR_RULE**)(ip + 1);
        ip += sizeof(uint64_t);
        r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0;
        push(r1);
        break;

      case OP_INIT_RULE:
        #ifdef PROFILING_ENABLED
        current_rule = *(YR_RULE**)(ip + 1);
        #endif
        ip += sizeof(uint64_t);
        break;

      case OP_MATCH_RULE:
        pop(r1);
        rule = *(YR_RULE**)(ip + 1);
        ip += sizeof(uint64_t);

        if (!is_undef(r1) && r1.i)
          rule->t_flags[tidx] |= RULE_TFLAGS_MATCH;
        else if (RULE_IS_GLOBAL(rule))
          rule->ns->t_flags[tidx] |= NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL;

        #ifdef PROFILING_ENABLED
        rule->clock_ticks += clock() - start;
        start = clock();
        #endif
        break;

      case OP_OBJ_LOAD:
        identifier = *(char**)(ip + 1);
        ip += sizeof(uint64_t);

        r1.o = (YR_OBJECT*) yr_hash_table_lookup(
            context->objects_table,
            identifier,
            NULL);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_FIELD:
        identifier = *(char**)(ip + 1);
        ip += sizeof(uint64_t);

        pop(r1);
        ensure_defined(r1);

        r1.o = yr_object_lookup_field(r1.o, identifier);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_VALUE:
        pop(r1);
        ensure_defined(r1);

        switch(r1.o->type)
        {
          case OBJECT_TYPE_INTEGER:
            r1.i = ((YR_OBJECT_INTEGER*) r1.o)->value;
            break;

          case OBJECT_TYPE_FLOAT:
            if (isnan(((YR_OBJECT_DOUBLE*) r1.o)->value))
              r1.i = UNDEFINED;
            else
              r1.d = ((YR_OBJECT_DOUBLE*) r1.o)->value;
            break;

          case OBJECT_TYPE_STRING:
            if (((YR_OBJECT_STRING*) r1.o)->value == NULL)
              r1.i = UNDEFINED;
            else
              r1.p = ((YR_OBJECT_STRING*) r1.o)->value;
            break;

          default:
            assert(FALSE);
        }

        push(r1);
        break;

      case OP_INDEX_ARRAY:
        pop(r1);  // index
        pop(r2);  // array

        ensure_defined(r1);
        ensure_defined(r2);
        assert(r2.o->type == OBJECT_TYPE_ARRAY);

        r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_LOOKUP_DICT:
        pop(r1);  // key
        pop(r2);  // dictionary

        ensure_defined(r1);
        ensure_defined(r2);
        assert(r2.o->type == OBJECT_TYPE_DICTIONARY);

        r1.o = yr_object_dict_get_item(
            r2.o, 0, r1.ss->c_string);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_CALL:
        args_fmt = *(char**)(ip + 1);
        ip += sizeof(uint64_t);

        i = (int) strlen(args_fmt);
        count = 0;

        // pop arguments from stack and copy them to args array

        while (i > 0)
        {
          pop(r1);

          if (is_undef(r1))  // count the number of undefined args
            count++;

          args[i - 1] = r1;
          i--;
        }

        pop(r2);
        ensure_defined(r2);

        if (count > 0)
        {
          // if there are undefined args, result for function call
          // is undefined as well.

          r1.i = UNDEFINED;
          push(r1);
          break;
        }

        function = (YR_OBJECT_FUNCTION*) r2.o;
        result = ERROR_INTERNAL_FATAL_ERROR;

        for (i = 0; i < MAX_OVERLOADED_FUNCTIONS; i++)
        {
          if (function->prototypes[i].arguments_fmt == NULL)
            break;

          if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0)
          {
            result = function->prototypes[i].code(args, context, function);
            break;
          }
        }

        assert(i < MAX_OVERLOADED_FUNCTIONS);

        if (result == ERROR_SUCCESS)
        {
          r1.o = function->return_obj;
          push(r1);
        }
        else
        {
          stop = TRUE;
        }

        break;

      case OP_FOUND:
        pop(r1);
        r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0;
        push(r1);
        break;

      case OP_FOUND_AT:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          r1.i = 0;
          push(r1);
          break;
        }

        match = r2.s->matches[tidx].head;
        r3.i = FALSE;

        while (match != NULL)
        {
          if (r1.i == match->base + match->offset)
          {
            r3.i = TRUE;
            break;
          }

          if (r1.i < match->base + match->offset)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_FOUND_IN:
        pop(r3);
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        match = r3.s->matches[tidx].head;
        r3.i = FALSE;

        while (match != NULL && !r3.i)
        {
          if (match->base + match->offset >= r1.i &&
              match->base + match->offset <= r2.i)
          {
            r3.i = TRUE;
          }

          if (match->base + match->offset > r2.i)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_COUNT:
        pop(r1);
        r1.i = r1.s->matches[tidx].count;
        push(r1);
        break;

      case OP_OFFSET:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->base + match->offset;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_LENGTH:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->match_length;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_OF:
        found = 0;
        count = 0;
        pop(r1);

        while (!is_undef(r1))
        {
          if (r1.s->matches[tidx].tail != NULL)
            found++;
          count++;
          pop(r1);
        }

        pop(r2);

        if (is_undef(r2))
          r1.i = found >= count ? 1 : 0;
        else
          r1.i = found >= r2.i ? 1 : 0;

        push(r1);
        break;

      case OP_FILESIZE:
        r1.i = context->file_size;
        push(r1);
        break;

      case OP_ENTRYPOINT:
        r1.i = context->entry_point;
        push(r1);
        break;

      case OP_INT8:
        pop(r1);
        r1.i = read_int8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16:
        pop(r1);
        r1.i = read_int16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32:
        pop(r1);
        r1.i = read_int32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8:
        pop(r1);
        r1.i = read_uint8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16:
        pop(r1);
        r1.i = read_uint16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32:
        pop(r1);
        r1.i = read_uint32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT8BE:
        pop(r1);
        r1.i = read_int8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16BE:
        pop(r1);
        r1.i = read_int16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32BE:
        pop(r1);
        r1.i = read_int32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8BE:
        pop(r1);
        r1.i = read_uint8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16BE:
        pop(r1);
        r1.i = read_uint16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32BE:
        pop(r1);
        r1.i = read_uint32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_CONTAINS:
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        r1.i = memmem(r1.ss->c_string, r1.ss->length,
                      r2.ss->c_string, r2.ss->length) != NULL;
        push(r1);
        break;

      case OP_IMPORT:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);

        result = yr_modules_load((char*) r1.p, context);

        if (result != ERROR_SUCCESS)
          stop = TRUE;

        break;

      case OP_MATCHES:
        pop(r2);
        pop(r1);

        ensure_defined(r2);
        ensure_defined(r1);

        if (r1.ss->length == 0)
        {
          r1.i = FALSE;
          push(r1);
          break;
        }

        r1.i = yr_re_exec(
          (uint8_t*) r2.p,
          (uint8_t*) r1.ss->c_string,
          r1.ss->length,
          RE_FLAGS_SCAN,
          NULL,
          NULL) >= 0;

        push(r1);
        break;

      case OP_INT_TO_DBL:
        r1.i = *(uint64_t*)(ip + 1);
        ip += sizeof(uint64_t);
        r2 = stack[sp - r1.i];
        if (is_undef(r2))
          stack[sp - r1.i].i = UNDEFINED;
        else
          stack[sp - r1.i].d = (double) r2.i;
        break;

      case OP_STR_TO_BOOL:
        pop(r1);
        ensure_defined(r1);
        r1.i = r1.ss->length > 0;
        push(r1);
        break;

      case OP_INT_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i == r2.i;
        push(r1);
        break;

      case OP_INT_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i != r2.i;
        push(r1);
        break;

      case OP_INT_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i < r2.i;
        push(r1);
        break;

      case OP_INT_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i > r2.i;
        push(r1);
        break;

      case OP_INT_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i <= r2.i;
        push(r1);
        break;

      case OP_INT_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i >= r2.i;
        push(r1);
        break;

      case OP_INT_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i + r2.i;
        push(r1);
        break;

      case OP_INT_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i - r2.i;
        push(r1);
        break;

      case OP_INT_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i * r2.i;
        push(r1);
        break;

      case OP_INT_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i / r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_INT_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.i = -r1.i;
        push(r1);
        break;

      case OP_DBL_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d < r2.d;
        push(r1);
        break;

      case OP_DBL_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d > r2.d;
        push(r1);
        break;

      case OP_DBL_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d <= r2.d;
        push(r1);
        break;

      case OP_DBL_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d >= r2.d;
        push(r1);
        break;

      case OP_DBL_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d == r2.d;
        push(r1);
        break;

      case OP_DBL_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d != r2.d;
        push(r1);
        break;

      case OP_DBL_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d + r2.d;
        push(r1);
        break;

      case OP_DBL_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d - r2.d;
        push(r1);
        break;

      case OP_DBL_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d * r2.d;
        push(r1);
        break;

      case OP_DBL_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d / r2.d;
        push(r1);
        break;

      case OP_DBL_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.d = -r1.d;
        push(r1);
        break;

      case OP_STR_EQ:
      case OP_STR_NEQ:
      case OP_STR_LT:
      case OP_STR_LE:
      case OP_STR_GT:
      case OP_STR_GE:

        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        switch(*ip)
        {
          case OP_STR_EQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0);
            break;
          case OP_STR_NEQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0);
            break;
          case OP_STR_LT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0);
            break;
          case OP_STR_LE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0);
            break;
          case OP_STR_GT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0);
            break;
          case OP_STR_GE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0);
            break;
        }

        push(r1);
        break;

      default:
        // Unknown instruction, this shouldn't happen.
        assert(FALSE);
    }

    if (timeout > 0)  // timeout == 0 means no timeout
    {
      // Check for timeout every 10 instruction cycles.

      if (++cycle == 10)
      {
        if (difftime(time(NULL), start_time) > timeout)
        {
          #ifdef PROFILING_ENABLED
          assert(current_rule != NULL);
          current_rule->clock_ticks += clock() - start;
          #endif
          result = ERROR_SCAN_TIMEOUT;
          stop = TRUE;
        }

        cycle = 0;
      }
    }

    ip++;
  }

  yr_modules_unload_all(context);
  yr_free(stack);

  return result;
}
Example #4
0
static int _yr_scan_match_callback(
    const uint8_t* match_data,
    int32_t match_length,
    int flags,
    void* args)
{
  CALLBACK_ARGS* callback_args = (CALLBACK_ARGS*) args;

  YR_STRING* string = callback_args->string;
  YR_MATCH* new_match;

  int result = ERROR_SUCCESS;
  int tidx = callback_args->context->tidx;

  size_t match_offset = match_data - callback_args->data;

  // total match length is the sum of backward and forward matches.
  match_length += callback_args->forward_matches;

  // make sure that match fits into the data.
  assert(match_offset + match_length <= callback_args->data_size);

  if (callback_args->full_word)
  {
    if (flags & RE_FLAGS_WIDE)
    {
      if (match_offset >= 2 &&
          *(match_data - 1) == 0 &&
          isalnum(*(match_data - 2)))
        return ERROR_SUCCESS;

      if (match_offset + match_length + 1 < callback_args->data_size &&
          *(match_data + match_length + 1) == 0 &&
          isalnum(*(match_data + match_length)))
        return ERROR_SUCCESS;
    }
    else
    {
      if (match_offset >= 1 &&
          isalnum(*(match_data - 1)))
        return ERROR_SUCCESS;

      if (match_offset + match_length < callback_args->data_size &&
          isalnum(*(match_data + match_length)))
        return ERROR_SUCCESS;
    }
  }

  if (STRING_IS_CHAIN_PART(string))
  {
    result = _yr_scan_verify_chained_string_match(
        string,
        callback_args->context,
        match_data,
        callback_args->data_base,
        match_offset,
        match_length);
  }
  else
  {
    uint32_t max_match_data;

    FAIL_ON_ERROR(yr_get_configuration(
        YR_CONFIG_MAX_MATCH_DATA,
        &max_match_data))

    if (string->matches[tidx].count == 0)
    {
      // If this is the first match for the string, put the string in the
      // list of strings whose flags needs to be cleared after the scan.

      FAIL_ON_ERROR(yr_arena_write_data(
          callback_args->context->matching_strings_arena,
          &string,
          sizeof(string),
          NULL));
    }

    FAIL_ON_ERROR(yr_arena_allocate_memory(
        callback_args->context->matches_arena,
        sizeof(YR_MATCH),
        (void**) &new_match));

    new_match->data_length = yr_min(match_length, max_match_data);

    if (new_match->data_length > 0)
    {
      FAIL_ON_ERROR(yr_arena_write_data(
          callback_args->context->matches_arena,
          match_data,
          new_match->data_length,
          (void**) &new_match->data));
    }
    else
    {
      new_match->data = NULL;
    }

    if (result == ERROR_SUCCESS)
    {
      new_match->base = callback_args->data_base;
      new_match->offset = match_offset;
      new_match->match_length = match_length;
      new_match->prev = NULL;
      new_match->next = NULL;

      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
          new_match,
          &string->matches[tidx],
          STRING_IS_GREEDY_REGEXP(string)));
    }
  }

  return result;
}
Example #5
0
static int _yr_scan_verify_chained_string_match(
    YR_STRING* matching_string,
    YR_SCAN_CONTEXT* context,
    const uint8_t* match_data,
    uint64_t match_base,
    uint64_t match_offset,
    int32_t match_length)
{
  YR_STRING* string;
  YR_MATCH* match;
  YR_MATCH* next_match;
  YR_MATCH* new_match;

  uint64_t lower_offset;
  uint64_t ending_offset;
  int32_t full_chain_length;

  int tidx = context->tidx;
  bool add_match = false;

  if (matching_string->chained_to == NULL)
  {
    add_match = true;
  }
  else
  {
    if (matching_string->unconfirmed_matches[tidx].head != NULL)
      lower_offset = matching_string->unconfirmed_matches[tidx].head->offset;
    else
      lower_offset = match_offset;

    match = matching_string->chained_to->unconfirmed_matches[tidx].head;

    while (match != NULL)
    {
      next_match = match->next;
      ending_offset = match->offset + match->match_length;

      if (ending_offset + matching_string->chain_gap_max < lower_offset)
      {
        _yr_scan_remove_match_from_list(
            match, &matching_string->chained_to->unconfirmed_matches[tidx]);
      }
      else
      {
        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
            ending_offset + matching_string->chain_gap_min <= match_offset)
        {
          add_match = true;
          break;
        }
      }

      match = next_match;
    }
  }

  if (add_match)
  {
    uint32_t max_match_data;

    FAIL_ON_ERROR(yr_get_configuration(
        YR_CONFIG_MAX_MATCH_DATA,
        &max_match_data))

    if (STRING_IS_CHAIN_TAIL(matching_string))
    {
      // Chain tails must be chained to some other string
      assert(matching_string->chained_to != NULL);

      match = matching_string->chained_to->unconfirmed_matches[tidx].head;

      while (match != NULL)
      {
        ending_offset = match->offset + match->match_length;

        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
            ending_offset + matching_string->chain_gap_min <= match_offset)
        {
          _yr_scan_update_match_chain_length(
              tidx, matching_string->chained_to, match, 1);
        }

        match = match->next;
      }

      full_chain_length = 0;
      string = matching_string;

      while(string->chained_to != NULL)
      {
        full_chain_length++;
        string = string->chained_to;
      }

      // "string" points now to the head of the strings chain

      match = string->unconfirmed_matches[tidx].head;

      while (match != NULL)
      {
        next_match = match->next;

        if (match->chain_length == full_chain_length)
        {
          _yr_scan_remove_match_from_list(
              match, &string->unconfirmed_matches[tidx]);

          match->match_length = (int32_t) \
              (match_offset - match->offset + match_length);

          match->data_length = yr_min(match->match_length, max_match_data);

          FAIL_ON_ERROR(yr_arena_write_data(
              context->matches_arena,
              match_data - match_offset + match->offset,
              match->data_length,
              (void**) &match->data));

          FAIL_ON_ERROR(_yr_scan_add_match_to_list(
              match, &string->matches[tidx], false));
        }

        match = next_match;
      }
    }
    else
    {
      if (matching_string->matches[tidx].count == 0 &&
          matching_string->unconfirmed_matches[tidx].count == 0)
      {
        // If this is the first match for the string, put the string in the
        // list of strings whose flags needs to be cleared after the scan.

        FAIL_ON_ERROR(yr_arena_write_data(
            context->matching_strings_arena,
            &matching_string,
            sizeof(matching_string),
            NULL));
      }

      FAIL_ON_ERROR(yr_arena_allocate_memory(
          context->matches_arena,
          sizeof(YR_MATCH),
          (void**) &new_match));

      new_match->data_length = yr_min(match_length, max_match_data);

      if (new_match->data_length > 0)
      {
        FAIL_ON_ERROR(yr_arena_write_data(
            context->matches_arena,
            match_data,
            new_match->data_length,
            (void**) &new_match->data));
      }
      else
      {
        new_match->data = NULL;
      }

      new_match->base = match_base;
      new_match->offset = match_offset;
      new_match->match_length = match_length;
      new_match->chain_length = 0;
      new_match->prev = NULL;
      new_match->next = NULL;

      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
          new_match,
          &matching_string->unconfirmed_matches[tidx],
          false));
    }
  }

  return ERROR_SUCCESS;
}
Example #6
0
int yr_parser_reduce_rule_declaration_phase_2(
    yyscan_t yyscanner,
    YR_RULE* rule)
{
  uint32_t max_strings_per_rule;
  uint32_t strings_in_rule = 0;
  uint8_t* nop_inst_addr = NULL;

  int result;

  YR_FIXUP *fixup;
  YR_STRING* string;
  YR_COMPILER* compiler = yyget_extra(yyscanner);

  yr_get_configuration(
      YR_CONFIG_MAX_STRINGS_PER_RULE,
      (void*) &max_strings_per_rule);

  // Show warning if the rule is generating too many atoms. The warning is
  // shown if the number of atoms is greater than 20 times the maximum number
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneosly.

  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
  {
    yywarning(
        yyscanner,
        "rule %s is slowing down scanning",
        rule->identifier);
  }

  // Check for unreferenced (unused) strings.
  string = rule->strings;

  while (!STRING_IS_NULL(string))
  {
    // Only the heading fragment in a chain of strings (the one with
    // chained_to == NULL) must be referenced. All other fragments
    // are never marked as referenced.

    if (!STRING_IS_REFERENCED(string) &&
        string->chained_to == NULL)
    {
      yr_compiler_set_error_extra_info(compiler, string->identifier);
      return ERROR_UNREFERENCED_STRING;
    }

    strings_in_rule++;

    if (strings_in_rule > max_strings_per_rule)
    {
      yr_compiler_set_error_extra_info(compiler, rule->identifier);
      return ERROR_TOO_MANY_STRINGS;
    }

    string = (YR_STRING*) yr_arena_next_address(
        compiler->strings_arena,
        string,
        sizeof(YR_STRING));
  }

  result = yr_parser_emit_with_arg_reloc(
      yyscanner,
      OP_MATCH_RULE,
      rule,
      NULL,
      NULL);

  // Generate a do-nothing instruction (NOP) in order to get its address
  // and use it as the destination for the OP_INIT_RULE skip jump. We can not
  // simply use the address of the OP_MATCH_RULE instruction +1 because we
  // can't be sure that the instruction following the OP_MATCH_RULE is going to
  // be in the same arena page. As we don't have a reliable way of getting the
  // address of the next instruction we generate the OP_NOP.

  if (result == ERROR_SUCCESS)
    result = yr_parser_emit(yyscanner, OP_NOP, &nop_inst_addr);

  fixup = compiler->fixup_stack_head;
  *(void**)(fixup->address) = (void*) nop_inst_addr;
  compiler->fixup_stack_head = fixup->next;
  yr_free(fixup);

  return result;
}