Esempio n. 1
0
int yr_parser_emit_with_arg_reloc(
    yyscan_t yyscanner,
    int8_t instruction,
    int64_t argument,
    int8_t** instruction_address)
{
  void* ptr;

  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(int64_t),
        &ptr);

  if (result == ERROR_SUCCESS)
    result = yr_arena_make_relocatable(
        yyget_extra(yyscanner)->code_arena,
        ptr,
        0,
        EOL);

  return result;
}
Esempio n. 2
0
int yr_arena_append(
    YR_ARENA* target_arena,
    YR_ARENA* source_arena)
{
  uint8_t padding_data[15];
  size_t padding_size = 16 - target_arena->current_page->used % 16;

  if (padding_size < 16)
  {
    memset(&padding_data, 0xCC, padding_size);

    FAIL_ON_ERROR(yr_arena_write_data(
        target_arena,
        padding_data,
        padding_size,
        NULL));
  }

  target_arena->current_page->next = source_arena->page_list_head;
  source_arena->page_list_head->prev = target_arena->current_page;
  target_arena->current_page = source_arena->current_page;

  yr_free(source_arena);

  return ERROR_SUCCESS;
}
Esempio n. 3
0
int yr_arena_write_string(
    YR_ARENA* arena,
    const char* string,
    char** written_string)
{
  return yr_arena_write_data(
      arena,
      (void*) string,
      strlen(string) + 1,
      (void**) written_string);
}
Esempio n. 4
0
int yr_parser_emit(
    yyscan_t yyscanner,
    int8_t instruction,
    int8_t** instruction_address)
{
  return yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);
}
Esempio n. 5
0
int yr_parser_emit_with_arg(
    yyscan_t yyscanner,
    int8_t instruction,
    int64_t argument,
    int8_t** instruction_address)
{
  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(int8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(int64_t),
        NULL);

  return result;
}
Esempio n. 6
0
int yr_parser_emit_with_arg_reloc(
    yyscan_t yyscanner,
    uint8_t instruction,
    void* argument,
    uint8_t** instruction_address,
    void** argument_address)
{
  int64_t* ptr = NULL;
  int result;

  DECLARE_REFERENCE(void*, ptr) arg;

  memset(&arg, 0, sizeof(arg));
  arg.ptr = argument;

  result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(uint8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &arg,
        sizeof(arg),
        (void**) &ptr);

  if (result == ERROR_SUCCESS)
    result = yr_arena_make_ptr_relocatable(
        yyget_extra(yyscanner)->code_arena,
        ptr,
        0,
        EOL);

  if (argument_address != NULL)
    *argument_address = (void*) ptr;

  return result;
}
Esempio n. 7
0
int yr_parser_emit_with_arg_double(
    yyscan_t yyscanner,
    uint8_t instruction,
    double argument,
    uint8_t** instruction_address,
    double** argument_address)
{
  int result = yr_arena_write_data(
      yyget_extra(yyscanner)->code_arena,
      &instruction,
      sizeof(uint8_t),
      (void**) instruction_address);

  if (result == ERROR_SUCCESS)
    result = yr_arena_write_data(
        yyget_extra(yyscanner)->code_arena,
        &argument,
        sizeof(double),
        (void**) argument_address);

  return result;
}
Esempio n. 8
0
File: re.c Progetto: ewil/yara
int _yr_emit_split(
    RE_EMIT_CONTEXT* emit_context,
    uint8_t opcode,
    int16_t argument,
    uint8_t** instruction_addr,
    int16_t** argument_addr,
    int* code_size)
{
  assert(opcode == RE_OPCODE_SPLIT_A || opcode == RE_OPCODE_SPLIT_B);

  if (emit_context->next_split_id == RE_MAX_SPLIT_ID)
    return ERROR_REGULAR_EXPRESSION_TOO_COMPLEX;

  FAIL_ON_ERROR(yr_arena_write_data(
      emit_context->arena,
      &opcode,
      sizeof(uint8_t),
      (void**) instruction_addr));

  FAIL_ON_ERROR(yr_arena_write_data(
      emit_context->arena,
      &emit_context->next_split_id,
      sizeof(RE_SPLIT_ID_TYPE),
      NULL));

  emit_context->next_split_id++;

  FAIL_ON_ERROR(yr_arena_write_data(
      emit_context->arena,
      &argument,
      sizeof(int16_t),
      (void**) argument_addr));

  *code_size = sizeof(uint8_t) + sizeof(RE_SPLIT_ID_TYPE) + sizeof(int16_t);

  return ERROR_SUCCESS;
}
Esempio n. 9
0
File: re.c Progetto: devilcoder/yara
int _yr_emit_inst_arg_int16(
    YR_ARENA* arena,
    uint8_t opcode,
    int16_t argument,
    uint8_t** instruction_addr,
    int16_t** argument_addr,
    int* code_size)
{
  FAIL_ON_ERROR(yr_arena_write_data(
      arena,
      &opcode,
      sizeof(uint8_t),
      (void**) instruction_addr));

  FAIL_ON_ERROR(yr_arena_write_data(
      arena,
      &argument,
      sizeof(int16_t),
      (void**) argument_addr));

  *code_size = sizeof(uint8_t) + sizeof(int16_t);

  return ERROR_SUCCESS;
}
Esempio n. 10
0
File: re.c Progetto: ewil/yara
int _yr_emit_inst_arg_int16(
    RE_EMIT_CONTEXT* emit_context,
    uint8_t opcode,
    int16_t argument,
    uint8_t** instruction_addr,
    int16_t** argument_addr,
    int* code_size)
{
  FAIL_ON_ERROR(yr_arena_write_data(
      emit_context->arena,
      &opcode,
      sizeof(uint8_t),
      (void**) instruction_addr));

  FAIL_ON_ERROR(yr_arena_write_data(
      emit_context->arena,
      &argument,
      sizeof(int16_t),
      (void**) argument_addr));

  *code_size = sizeof(uint8_t) + sizeof(int16_t);

  return ERROR_SUCCESS;
}
Esempio n. 11
0
static int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE_AST* re_ast,
    YR_STRING** string,
    int* min_atom_quality,
    int* num_atom)
{
  SIZED_STRING* literal_string;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int c, result;
  int max_string_len;
  bool free_literal = false;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      offsetof(YR_STRING, rule),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_ast_extract_literal(re_ast);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = true;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;
  (*string)->rule = compiler->current_rule;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          &compiler->atoms_config,
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list,
          min_atom_quality);
    }
  }
  else
  {
    // Emit forwards code
    result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, false);

    // Emit backwards code
    if (result == ERROR_SUCCESS)
      result = yr_re_ast_emit_code(re_ast, compiler->re_code_arena, true);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(
          &compiler->atoms_config,
          re_ast,
          flags,
          &atom_list,
          min_atom_quality);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.
    result = yr_ac_add_string(
        compiler->automaton,
        *string,
        atom_list,
        compiler->matches_arena);
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= YR_MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  atom = atom_list;
  c = 0;

  while (atom != NULL)
  {
    atom = atom->next;
    c++;
  }

  (*num_atom) += c;

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Esempio n. 12
0
File: exec.c Progetto: metthal/yara
int yr_execute_code(
    YR_SCAN_CONTEXT* context)
{
  int64_t mem[MEM_SIZE];
  int32_t sp = 0;

  const uint8_t* ip = context->rules->code_start;

  YR_VALUE args[YR_MAX_FUNCTION_ARGS];
  YR_VALUE *stack;
  YR_VALUE r1;
  YR_VALUE r2;
  YR_VALUE r3;

  uint64_t elapsed_time;

  #ifdef PROFILING_ENABLED
  uint64_t start_time;
  YR_RULE* current_rule = NULL;
  #endif

  YR_INIT_RULE_ARGS init_rule_args;

  YR_RULE* rule;
  YR_MATCH* match;
  YR_OBJECT_FUNCTION* function;
  YR_OBJECT** obj_ptr;
  YR_ARENA* obj_arena;

  char* identifier;
  char* args_fmt;

  int i;
  int found;
  int count;
  int result = ERROR_SUCCESS;
  int cycle = 0;
  int tidx = context->tidx;
  int stack_size;

  bool stop = false;

  uint8_t opcode;

  yr_get_configuration(YR_CONFIG_STACK_SIZE, (void*) &stack_size);

  stack = (YR_VALUE*) yr_malloc(stack_size * sizeof(YR_VALUE));

  if (stack == NULL)
    return ERROR_INSUFFICIENT_MEMORY;

  FAIL_ON_ERROR_WITH_CLEANUP(
      yr_arena_create(1024, 0, &obj_arena),
      yr_free(stack));

  #ifdef PROFILING_ENABLED
  start_time = yr_stopwatch_elapsed_us(&context->stopwatch);
  #endif

  #if PARANOID_EXEC
  memset(mem, 0, MEM_SIZE * sizeof(mem[0]));
  #endif

  while(!stop)
  {
    opcode = *ip;
    ip++;

    switch(opcode)
    {
      case OP_NOP:
        break;

      case OP_HALT:
        assert(sp == 0); // When HALT is reached the stack should be empty.
        stop = true;
        break;

      case OP_PUSH:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        push(r1);
        break;

      case OP_POP:
        pop(r1);
        break;

      case OP_CLEAR_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        mem[r1.i] = 0;
        break;

      case OP_ADD_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        if (!is_undef(r2))
          mem[r1.i] += r2.i;
        break;

      case OP_INCR_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        mem[r1.i]++;
        break;

      case OP_PUSH_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        r1.i = mem[r1.i];
        push(r1);
        break;

      case OP_POP_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        mem[r1.i] = r2.i;
        break;

      case OP_SET_M:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);
        push(r2);
        if (!is_undef(r2))
          mem[r1.i] = r2.i;
        break;

      case OP_SWAPUNDEF:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);
        #if PARANOID_EXEC
        ensure_within_mem(r1.i);
        #endif
        pop(r2);

        if (is_undef(r2))
        {
          r1.i = mem[r1.i];
          push(r1);
        }
        else
        {
          push(r2);
        }
        break;

      case OP_JNUNDEF:
        pop(r1);
        push(r1);
        ip = jmp_if(!is_undef(r1), ip);
        break;

      case OP_JLE_P:
        pop(r2);
        pop(r1);
        ip = jmp_if(r1.i <= r2.i, ip);
        break;

      case OP_JTRUE:
        pop(r1);
        push(r1);
        ip = jmp_if(!is_undef(r1) && r1.i, ip);
        break;

      case OP_JFALSE:
        pop(r1);
        push(r1);
        ip = jmp_if(is_undef(r1) || !r1.i, ip);
        break;

      case OP_JFALSE_P:
        pop(r1);
        ip = jmp_if(is_undef(r1) || !r1.i, ip);
        break;

      case OP_AND:
        pop(r2);
        pop(r1);

        if (is_undef(r1) || is_undef(r2))
          r1.i = 0;
        else
          r1.i = r1.i && r2.i;

        push(r1);
        break;

      case OP_OR:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          push(r2);
        }
        else if (is_undef(r2))
        {
          push(r1);
        }
        else
        {
          r1.i = r1.i || r2.i;
          push(r1);
        }
        break;

      case OP_NOT:
        pop(r1);

        if (is_undef(r1))
          r1.i = UNDEFINED;
        else
          r1.i = !r1.i;

        push(r1);
        break;

      case OP_MOD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i % r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_SHR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i < 0)
          r1.i = UNDEFINED;
        else if (r2.i < 64)
          r1.i = r1.i >> r2.i;
        else
          r1.i = 0;
        push(r1);
        break;

      case OP_SHL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i < 0)
          r1.i = UNDEFINED;
        else if (r2.i < 64)
          r1.i = r1.i << r2.i;
        else
          r1.i = 0;
        push(r1);
        break;

      case OP_BITWISE_NOT:
        pop(r1);
        ensure_defined(r1);
        r1.i = ~r1.i;
        push(r1);
        break;

      case OP_BITWISE_AND:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i & r2.i;
        push(r1);
        break;

      case OP_BITWISE_OR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i | r2.i;
        push(r1);
        break;

      case OP_BITWISE_XOR:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i ^ r2.i;
        push(r1);
        break;

      case OP_PUSH_RULE:
        rule = *(YR_RULE**)(ip);
        ip += sizeof(uint64_t);
        if (RULE_IS_DISABLED(rule))
          r1.i = UNDEFINED;
        else
          r1.i = rule->t_flags[tidx] & RULE_TFLAGS_MATCH ? 1 : 0;
        push(r1);
        break;

      case OP_INIT_RULE:
        memcpy(&init_rule_args, ip, sizeof(init_rule_args));
        #ifdef PROFILING_ENABLED
        current_rule = init_rule_args.rule;
        #endif
        if (RULE_IS_DISABLED(init_rule_args.rule))
          ip = init_rule_args.jmp_addr;
        else
          ip += sizeof(init_rule_args);
        break;

      case OP_MATCH_RULE:
        pop(r1);
        rule = *(YR_RULE**)(ip);
        ip += sizeof(uint64_t);

        if (!is_undef(r1) && r1.i)
          rule->t_flags[tidx] |= RULE_TFLAGS_MATCH;
        else if (RULE_IS_GLOBAL(rule))
          rule->ns->t_flags[tidx] |= NAMESPACE_TFLAGS_UNSATISFIED_GLOBAL;

        #ifdef PROFILING_ENABLED
        elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch);
        rule->time_cost_per_thread[tidx] += (elapsed_time - start_time);
        start_time = elapsed_time;
        #endif

        assert(sp == 0); // at this point the stack should be empty.
        break;

      case OP_OBJ_LOAD:
        identifier = *(char**)(ip);
        ip += sizeof(uint64_t);

        r1.o = (YR_OBJECT*) yr_hash_table_lookup(
            context->objects_table,
            identifier,
            NULL);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_FIELD:
        identifier = *(char**)(ip);
        ip += sizeof(uint64_t);

        pop(r1);
        ensure_defined(r1);

        r1.o = yr_object_lookup_field(r1.o, identifier);

        assert(r1.o != NULL);
        push(r1);
        break;

      case OP_OBJ_VALUE:
        pop(r1);
        ensure_defined(r1);

        #if PARANOID_EXEC
        check_object_canary(r1.o);
        #endif

        switch(r1.o->type)
        {
          case OBJECT_TYPE_INTEGER:
            r1.i = r1.o->value.i;
            break;

          case OBJECT_TYPE_FLOAT:
            if (isnan(r1.o->value.d))
              r1.i = UNDEFINED;
            else
              r1.d = r1.o->value.d;
            break;

          case OBJECT_TYPE_STRING:
            if (r1.o->value.ss == NULL)
              r1.i = UNDEFINED;
            else
              r1.ss = r1.o->value.ss;
            break;

          default:
            assert(false);
        }

        push(r1);
        break;

      case OP_INDEX_ARRAY:
        pop(r1);  // index
        pop(r2);  // array

        ensure_defined(r1);
        ensure_defined(r2);

        assert(r2.o->type == OBJECT_TYPE_ARRAY);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        r1.o = yr_object_array_get_item(r2.o, 0, (int) r1.i);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_LOOKUP_DICT:
        pop(r1);  // key
        pop(r2);  // dictionary

        ensure_defined(r1);
        ensure_defined(r2);

        assert(r2.o->type == OBJECT_TYPE_DICTIONARY);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        r1.o = yr_object_dict_get_item(
            r2.o, 0, r1.ss->c_string);

        if (r1.o == NULL)
          r1.i = UNDEFINED;

        push(r1);
        break;

      case OP_CALL:
        args_fmt = *(char**)(ip);
        ip += sizeof(uint64_t);

        i = (int) strlen(args_fmt);
        count = 0;

        #if PARANOID_EXEC
        if (i > YR_MAX_FUNCTION_ARGS)
        {
          stop = true;
          result = ERROR_INTERNAL_FATAL_ERROR;
          break;
        }
        #endif

        // pop arguments from stack and copy them to args array

        while (i > 0)
        {
          pop(r1);

          if (is_undef(r1))  // count the number of undefined args
            count++;

          args[i - 1] = r1;
          i--;
        }

        pop(r2);
        ensure_defined(r2);

        #if PARANOID_EXEC
        check_object_canary(r2.o);
        #endif

        if (count > 0)
        {
          // if there are undefined args, result for function call
          // is undefined as well.

          r1.i = UNDEFINED;
          push(r1);
          break;
        }

        function = object_as_function(r2.o);
        result = ERROR_INTERNAL_FATAL_ERROR;

        for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
        {
          if (function->prototypes[i].arguments_fmt == NULL)
            break;

          if (strcmp(function->prototypes[i].arguments_fmt, args_fmt) == 0)
          {
            result = function->prototypes[i].code(args, context, function);
            break;
          }
        }

        // if i == YR_MAX_OVERLOADED_FUNCTIONS at this point no matching
        // prototype was found, but this shouldn't happen.

        assert(i < YR_MAX_OVERLOADED_FUNCTIONS);

        // make a copy of the returned object and push the copy into the stack
        // function->return_obj can't be pushed because it can change in
        // subsequent calls to the same function.

        if (result == ERROR_SUCCESS)
          result = yr_object_copy(function->return_obj, &r1.o);

        // a pointer to the copied object is stored in a arena in order to
        // free the object before exiting yr_execute_code

        if (result == ERROR_SUCCESS)
          result = yr_arena_write_data(obj_arena, &r1.o, sizeof(r1.o), NULL);

        stop = (result != ERROR_SUCCESS);
        push(r1);
        break;

      case OP_FOUND:
        pop(r1);
        r1.i = r1.s->matches[tidx].tail != NULL ? 1 : 0;
        push(r1);
        break;

      case OP_FOUND_AT:
        pop(r2);
        pop(r1);

        if (is_undef(r1))
        {
          r1.i = 0;
          push(r1);
          break;
        }

        match = r2.s->matches[tidx].head;
        r3.i = false;

        while (match != NULL)
        {
          if (r1.i == match->base + match->offset)
          {
            r3.i = true;
            break;
          }

          if (r1.i < match->base + match->offset)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_FOUND_IN:
        pop(r3);
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        match = r3.s->matches[tidx].head;
        r3.i = false;

        while (match != NULL && !r3.i)
        {
          if (match->base + match->offset >= r1.i &&
              match->base + match->offset <= r2.i)
          {
            r3.i = true;
          }

          if (match->base + match->offset > r2.i)
            break;

          match = match->next;
        }

        push(r3);
        break;

      case OP_COUNT:
        pop(r1);

        #if PARANOID_EXEC
        // Make sure that the string pointer is within the rules arena.
        if (yr_arena_page_for_address(context->rules->arena, r1.p) == NULL)
          return ERROR_INTERNAL_FATAL_ERROR;
        #endif

        r1.i = r1.s->matches[tidx].count;
        push(r1);
        break;

      case OP_OFFSET:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->base + match->offset;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_LENGTH:
        pop(r2);
        pop(r1);

        ensure_defined(r1);

        match = r2.s->matches[tidx].head;
        i = 1;
        r3.i = UNDEFINED;

        while (match != NULL && r3.i == UNDEFINED)
        {
          if (r1.i == i)
            r3.i = match->match_length;

          i++;
          match = match->next;
        }

        push(r3);
        break;

      case OP_OF:
        found = 0;
        count = 0;
        pop(r1);

        while (!is_undef(r1))
        {
          if (r1.s->matches[tidx].tail != NULL)
            found++;
          count++;
          pop(r1);
        }

        pop(r2);

        if (is_undef(r2))
          r1.i = found >= count ? 1 : 0;
        else
          r1.i = found >= r2.i ? 1 : 0;

        push(r1);
        break;

      case OP_FILESIZE:
        r1.i = context->file_size;
        push(r1);
        break;

      case OP_ENTRYPOINT:
        r1.i = context->entry_point;
        push(r1);
        break;

      case OP_INT8:
        pop(r1);
        r1.i = read_int8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16:
        pop(r1);
        r1.i = read_int16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32:
        pop(r1);
        r1.i = read_int32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8:
        pop(r1);
        r1.i = read_uint8_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16:
        pop(r1);
        r1.i = read_uint16_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32:
        pop(r1);
        r1.i = read_uint32_t_little_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT8BE:
        pop(r1);
        r1.i = read_int8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT16BE:
        pop(r1);
        r1.i = read_int16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_INT32BE:
        pop(r1);
        r1.i = read_int32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT8BE:
        pop(r1);
        r1.i = read_uint8_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT16BE:
        pop(r1);
        r1.i = read_uint16_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_UINT32BE:
        pop(r1);
        r1.i = read_uint32_t_big_endian(context->iterator, (size_t) r1.i);
        push(r1);
        break;

      case OP_CONTAINS:
        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        r1.i = memmem(r1.ss->c_string, r1.ss->length,
                      r2.ss->c_string, r2.ss->length) != NULL;
        push(r1);
        break;

      case OP_IMPORT:
        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);

        result = yr_modules_load((char*) r1.p, context);

        if (result != ERROR_SUCCESS)
          stop = true;

        break;

      case OP_MATCHES:

        pop(r2);
        pop(r1);

        ensure_defined(r2);
        ensure_defined(r1);

        if (r1.ss->length == 0)
        {
          r1.i = false;
          push(r1);
          break;
        }

        result = yr_re_exec(
          context,
          (uint8_t*) r2.re->code,
          (uint8_t*) r1.ss->c_string,
          r1.ss->length,
          0,
          r2.re->flags | RE_FLAGS_SCAN,
          NULL,
          NULL,
          &found);

        if (result != ERROR_SUCCESS)
          stop = true;

        r1.i = found >= 0;
        push(r1);
        break;

      case OP_INT_TO_DBL:

        r1.i = *(uint64_t*)(ip);
        ip += sizeof(uint64_t);

        #if PARANOID_EXEC
        if (r1.i > sp || sp - r1.i >= stack_size)
        {
          stop = true;
          result = ERROR_INTERNAL_FATAL_ERROR;
          break;
        }
        #endif

        r2 = stack[sp - r1.i];

        if (is_undef(r2))
          stack[sp - r1.i].i = UNDEFINED;
        else
          stack[sp - r1.i].d = (double) r2.i;
        break;

      case OP_STR_TO_BOOL:
        pop(r1);
        ensure_defined(r1);
        r1.i = r1.ss->length > 0;
        push(r1);
        break;

      case OP_INT_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i == r2.i;
        push(r1);
        break;

      case OP_INT_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i != r2.i;
        push(r1);
        break;

      case OP_INT_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i < r2.i;
        push(r1);
        break;

      case OP_INT_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i > r2.i;
        push(r1);
        break;

      case OP_INT_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i <= r2.i;
        push(r1);
        break;

      case OP_INT_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i >= r2.i;
        push(r1);
        break;

      case OP_INT_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i + r2.i;
        push(r1);
        break;

      case OP_INT_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i - r2.i;
        push(r1);
        break;

      case OP_INT_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.i * r2.i;
        push(r1);
        break;

      case OP_INT_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        if (r2.i != 0)
          r1.i = r1.i / r2.i;
        else
          r1.i = UNDEFINED;
        push(r1);
        break;

      case OP_INT_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.i = -r1.i;
        push(r1);
        break;

      case OP_DBL_LT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d < r2.d;
        push(r1);
        break;

      case OP_DBL_GT:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d > r2.d;
        push(r1);
        break;

      case OP_DBL_LE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d <= r2.d;
        push(r1);
        break;

      case OP_DBL_GE:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = r1.d >= r2.d;
        push(r1);
        break;

      case OP_DBL_EQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = fabs(r1.d - r2.d) < DBL_EPSILON;
        push(r1);
        break;

      case OP_DBL_NEQ:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.i = fabs(r1.d - r2.d) >= DBL_EPSILON;
        push(r1);
        break;

      case OP_DBL_ADD:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d + r2.d;
        push(r1);
        break;

      case OP_DBL_SUB:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d - r2.d;
        push(r1);
        break;

      case OP_DBL_MUL:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d * r2.d;
        push(r1);
        break;

      case OP_DBL_DIV:
        pop(r2);
        pop(r1);
        ensure_defined(r2);
        ensure_defined(r1);
        r1.d = r1.d / r2.d;
        push(r1);
        break;

      case OP_DBL_MINUS:
        pop(r1);
        ensure_defined(r1);
        r1.d = -r1.d;
        push(r1);
        break;

      case OP_STR_EQ:
      case OP_STR_NEQ:
      case OP_STR_LT:
      case OP_STR_LE:
      case OP_STR_GT:
      case OP_STR_GE:

        pop(r2);
        pop(r1);

        ensure_defined(r1);
        ensure_defined(r2);

        switch(opcode)
        {
          case OP_STR_EQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) == 0);
            break;
          case OP_STR_NEQ:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) != 0);
            break;
          case OP_STR_LT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) < 0);
            break;
          case OP_STR_LE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) <= 0);
            break;
          case OP_STR_GT:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) > 0);
            break;
          case OP_STR_GE:
            r1.i = (sized_string_cmp(r1.ss, r2.ss) >= 0);
            break;
        }

        push(r1);
        break;

      default:
        // Unknown instruction, this shouldn't happen.
        assert(false);
    }

    // Check for timeout every 10 instruction cycles. If timeout == 0 it means
    // no timeout at all.

    if (context->timeout > 0L && ++cycle == 10)
    {
      elapsed_time = yr_stopwatch_elapsed_us(&context->stopwatch);

      if (elapsed_time > context->timeout)
      {
        #ifdef PROFILING_ENABLED
        assert(current_rule != NULL);
        current_rule->time_cost_per_thread[tidx] += elapsed_time - start_time;
        #endif
        result = ERROR_SCAN_TIMEOUT;
        stop = true;
      }

      cycle = 0;
    }
  }
Esempio n. 13
0
File: re.c Progetto: ewil/yara
int _yr_re_emit(
    RE_EMIT_CONTEXT* emit_context,
    RE_NODE* re_node,
    int flags,
    uint8_t** code_addr,
    int* code_size)
{
  int i;
  int branch_size;
  int split_size;
  int inst_size;
  int jmp_size;

  RE_NODE* left;
  RE_NODE* right;

  int16_t* split_offset_addr = NULL;
  int16_t* jmp_offset_addr = NULL;
  uint8_t* instruction_addr = NULL;

  *code_size = 0;

  switch(re_node->type)
  {
  case RE_NODE_LITERAL:

    FAIL_ON_ERROR(_yr_emit_inst_arg_uint8(
        emit_context,
        flags & EMIT_NO_CASE ?
          RE_OPCODE_LITERAL_NO_CASE :
          RE_OPCODE_LITERAL,
        re_node->value,
        &instruction_addr,
        NULL,
        code_size));
    break;

  case RE_NODE_MASKED_LITERAL:

    FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
        emit_context,
        RE_OPCODE_MASKED_LITERAL,
        re_node->mask << 8 | re_node->value,
        &instruction_addr,
        NULL,
        code_size));
    break;

  case RE_NODE_WORD_CHAR:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_WORD_CHAR,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_WORD_CHAR:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_NON_WORD_CHAR,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_WORD_BOUNDARY:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_WORD_BOUNDARY,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_WORD_BOUNDARY:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_NON_WORD_BOUNDARY,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_SPACE:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_SPACE,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_SPACE:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_NON_SPACE,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_DIGIT:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_DIGIT,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_DIGIT:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_NON_DIGIT,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_ANY:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        flags & EMIT_DOT_ALL ?
          RE_OPCODE_ANY :
          RE_OPCODE_ANY_EXCEPT_NEW_LINE,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_CLASS:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        (flags & EMIT_NO_CASE) ?
          RE_OPCODE_CLASS_NO_CASE :
          RE_OPCODE_CLASS,
        &instruction_addr,
        code_size));

    FAIL_ON_ERROR(yr_arena_write_data(
        emit_context->arena,
        re_node->class_vector,
        32,
        NULL));

    *code_size += 32;
    break;

  case RE_NODE_ANCHOR_START:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_MATCH_AT_START,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_ANCHOR_END:

    FAIL_ON_ERROR(_yr_emit_inst(
        emit_context,
        RE_OPCODE_MATCH_AT_END,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_CONCAT:

    if (flags & EMIT_BACKWARDS)
    {
      left = re_node->right;
      right = re_node->left;
    }
    else
    {
      left = re_node->left;
      right = re_node->right;
    }

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        left,
        flags,
        &instruction_addr,
        &branch_size));

    *code_size += branch_size;

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        right,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    break;

  case RE_NODE_PLUS:

    // Code for e+ looks like:
    //
    //          L1: code for e
    //              split L1, L2
    //          L2:

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        re_node->left,
        flags,
        &instruction_addr,
        &branch_size));

    *code_size += branch_size;

    FAIL_ON_ERROR(_yr_emit_split(
        emit_context,
        re_node->greedy ? RE_OPCODE_SPLIT_B : RE_OPCODE_SPLIT_A,
        -branch_size,
        NULL,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;
    break;

  case RE_NODE_STAR:

    // Code for e* looks like:
    //
    //          L1: split L1, L2
    //              code for e
    //              jmp L1
    //          L2:

    FAIL_ON_ERROR(_yr_emit_split(
        emit_context,
        re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
        0,
        &instruction_addr,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        re_node->left,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Emit jump with offset set to 0.

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        emit_context,
        RE_OPCODE_JUMP,
        -(branch_size + split_size),
        NULL,
        &jmp_offset_addr,
        &jmp_size));

    *code_size += jmp_size;

    // Update split offset.
    *split_offset_addr = split_size + branch_size + jmp_size;
    break;

  case RE_NODE_ALT:

    // Code for e1|e2 looks like:
    //
    //              split L1, L2
    //          L1: code for e1
    //              jmp L3
    //          L2: code for e2
    //          L3:

    // Emit a split instruction with offset set to 0 temporarily. Offset
    // will be updated after we know the size of the code generated for
    // the left node (e1).

    FAIL_ON_ERROR(_yr_emit_split(
        emit_context,
        RE_OPCODE_SPLIT_A,
        0,
        &instruction_addr,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        re_node->left,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Emit jump with offset set to 0.

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        emit_context,
        RE_OPCODE_JUMP,
        0,
        NULL,
        &jmp_offset_addr,
        &jmp_size));

    *code_size += jmp_size;

    // Update split offset.
    *split_offset_addr = split_size + branch_size + jmp_size;

    FAIL_ON_ERROR(_yr_re_emit(
        emit_context,
        re_node->right,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Update offset for jmp instruction.
    *jmp_offset_addr = branch_size + jmp_size;
    break;


  case RE_NODE_RANGE:

    // Code for e{n,m} looks like:
    //
    //            code for e       (repeated n times)
    //            push m-n-1
    //        L0: split L1, L2
    //        L1: code for e
    //            jnz L0
    //        L2: pop
    //            split L3, L4
    //        L3: code for e
    //        L4:
    //
    // Instead of generating a loop with m-n iterations, we generate a loop
    // with m-n-1 iterations and the last one is unrolled outside the loop.
    // This is because re_node->backward_code pointers *must* point to code
    // past the loop. If they point to code before the loop then when some atom
    // contained inside "e" is found, the loop will be executed in both
    // forward and backward code. This causes an overlap in forward and backward
    // matches and the reported matching string will be longer than expected.

    if (re_node->start > 0)
    {
      FAIL_ON_ERROR(_yr_re_emit(
          emit_context,
          re_node->left,
          flags,
          &instruction_addr,
          &branch_size));

      *code_size += branch_size;

      for (i = 0; i < re_node->start - 1; i++)
      {
        // Don't want re_node->forward_code updated in this call
        // forward_code must remain pointing to the code generated by
        // by the  _yr_re_emit above. However we want re_node->backward_code
        // being updated.

        FAIL_ON_ERROR(_yr_re_emit(
            emit_context,
            re_node->left,
            flags | EMIT_DONT_SET_FORWARDS_CODE,
            NULL,
            &branch_size));

        *code_size += branch_size;
      }
    }

    if (re_node->end > re_node->start + 1)
    {
      FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
          emit_context,
          RE_OPCODE_PUSH,
          re_node->end - re_node->start - 1,
          re_node->start == 0 ? &instruction_addr : NULL,
          NULL,
          &inst_size));

      *code_size += inst_size;

      FAIL_ON_ERROR(_yr_emit_split(
          emit_context,
          re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
          0,
          NULL,
          &split_offset_addr,
          &split_size));

      *code_size += split_size;

      FAIL_ON_ERROR(_yr_re_emit(
          emit_context,
          re_node->left,
          flags | EMIT_DONT_SET_FORWARDS_CODE | EMIT_DONT_SET_BACKWARDS_CODE,
          NULL,
          &branch_size));

      *code_size += branch_size;

      FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
          emit_context,
          RE_OPCODE_JNZ,
          -(branch_size + split_size),
          NULL,
          &jmp_offset_addr,
          &jmp_size));

      *code_size += jmp_size;
      *split_offset_addr = split_size + branch_size + jmp_size;

      FAIL_ON_ERROR(_yr_emit_inst(
          emit_context,
          RE_OPCODE_POP,
          NULL,
          &inst_size));

      *code_size += inst_size;
    }

    if (re_node->end > re_node->start)
    {
      FAIL_ON_ERROR(_yr_emit_split(
          emit_context,
          re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
          0,
          NULL,
          &split_offset_addr,
          &split_size));

      *code_size += split_size;

      FAIL_ON_ERROR(_yr_re_emit(
          emit_context,
          re_node->left,
          flags | EMIT_DONT_SET_FORWARDS_CODE,
          re_node->start == 0 && re_node->end == 1 ? &instruction_addr : NULL,
          &branch_size));

      *code_size += branch_size;
      *split_offset_addr = split_size + branch_size;
    }

    break;
  }

  if (flags & EMIT_BACKWARDS)
  {
    if (!(flags & EMIT_DONT_SET_BACKWARDS_CODE))
      re_node->backward_code = instruction_addr + *code_size;
  }
  else
  {
    if (!(flags & EMIT_DONT_SET_FORWARDS_CODE))
      re_node->forward_code = instruction_addr;
  }

  if (code_addr != NULL)
    *code_addr = instruction_addr;

  return ERROR_SUCCESS;
}
Esempio n. 14
0
int yr_ac_compile(
    YR_AC_AUTOMATON* automaton,
    YR_ARENA* arena,
    YR_AC_TABLES* tables)
{
  uint32_t i;

  FAIL_ON_ERROR(_yr_ac_create_failure_links(automaton));
  FAIL_ON_ERROR(_yr_ac_optimize_failure_links(automaton));
  FAIL_ON_ERROR(_yr_ac_build_transition_table(automaton));

  FAIL_ON_ERROR(yr_arena_reserve_memory(
      arena,
      automaton->tables_size * sizeof(tables->transitions[0]) +
      automaton->tables_size * sizeof(tables->matches[0])));

  FAIL_ON_ERROR(yr_arena_write_data(
      arena,
      automaton->t_table,
      sizeof(YR_AC_TRANSITION),
      (void**) &tables->transitions));

  for (i = 1; i < automaton->tables_size; i++)
  {
    FAIL_ON_ERROR(yr_arena_write_data(
        arena,
        automaton->t_table + i,
        sizeof(YR_AC_TRANSITION),
        NULL));
  }

  FAIL_ON_ERROR(yr_arena_write_data(
      arena,
      automaton->m_table,
      sizeof(YR_AC_MATCH_TABLE_ENTRY),
      (void**) &tables->matches));

  FAIL_ON_ERROR(yr_arena_make_relocatable(
      arena,
      tables->matches,
      offsetof(YR_AC_MATCH_TABLE_ENTRY, match),
      EOL));

  for (i = 1; i < automaton->tables_size; i++)
  {
    void* ptr;

    FAIL_ON_ERROR(yr_arena_write_data(
        arena,
        automaton->m_table + i,
        sizeof(YR_AC_MATCH_TABLE_ENTRY),
        (void**) &ptr));

    FAIL_ON_ERROR(yr_arena_make_relocatable(
        arena,
        ptr,
        offsetof(YR_AC_MATCH_TABLE_ENTRY, match),
        EOL));
  }

  return ERROR_SUCCESS;
}
Esempio n. 15
0
File: scan.c Progetto: Cbrdiv/yara
int _yr_scan_verify_chained_string_match(
    YR_STRING* matching_string,
    YR_SCAN_CONTEXT* context,
    uint8_t* match_data,
    uint64_t match_base,
    uint64_t match_offset,
    int32_t match_length)
{
  YR_STRING* string;
  YR_MATCH* match;
  YR_MATCH* next_match;
  YR_MATCH* new_match;

  uint64_t lower_offset;
  uint64_t ending_offset;
  int32_t full_chain_length;

  int tidx = context->tidx;
  int add_match = FALSE;

  if (matching_string->chained_to == NULL)
  {
    add_match = TRUE;
  }
  else
  {
    if (matching_string->unconfirmed_matches[tidx].head != NULL)
      lower_offset = matching_string->unconfirmed_matches[tidx].head->offset;
    else
      lower_offset = match_offset;

    match = matching_string->chained_to->unconfirmed_matches[tidx].head;

    while (match != NULL)
    {
      next_match = match->next;
      ending_offset = match->offset + match->length;

      if (ending_offset + matching_string->chain_gap_max < lower_offset)
      {
        _yr_scan_remove_match_from_list(
            match, &matching_string->chained_to->unconfirmed_matches[tidx]);
      }
      else
      {
        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
            ending_offset + matching_string->chain_gap_min <= match_offset)
        {
          add_match = TRUE;
          break;
        }
      }

      match = next_match;
    }
  }

  if (add_match)
  {
    if (STRING_IS_CHAIN_TAIL(matching_string))
    {
      match = matching_string->chained_to->unconfirmed_matches[tidx].head;

      while (match != NULL)
      {
        ending_offset = match->offset + match->length;

        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
            ending_offset + matching_string->chain_gap_min <= match_offset)
        {
          _yr_scan_update_match_chain_length(
              tidx, matching_string->chained_to, match, 1);
        }

        match = match->next;
      }

      full_chain_length = 0;
      string = matching_string;

      while(string->chained_to != NULL)
      {
        full_chain_length++;
        string = string->chained_to;
      }

      // "string" points now to the head of the strings chain

      match = string->unconfirmed_matches[tidx].head;

      while (match != NULL)
      {
        next_match = match->next;

        if (match->chain_length == full_chain_length)
        {
          _yr_scan_remove_match_from_list(
              match, &string->unconfirmed_matches[tidx]);

          match->length = (int32_t) \
              (match_offset - match->offset + match_length);

          match->data = match_data - match_offset + match->offset;
          match->prev = NULL;
          match->next = NULL;

          FAIL_ON_ERROR(_yr_scan_add_match_to_list(
              match, &string->matches[tidx], FALSE));
        }

        match = next_match;
      }
    }
    else
    {
      if (matching_string->matches[tidx].count == 0 &&
          matching_string->unconfirmed_matches[tidx].count == 0)
      {
        // If this is the first match for the string, put the string in the
        // list of strings whose flags needs to be cleared after the scan.

        FAIL_ON_ERROR(yr_arena_write_data(
            context->matching_strings_arena,
            &matching_string,
            sizeof(matching_string),
            NULL));
      }

      FAIL_ON_ERROR(yr_arena_allocate_memory(
          context->matches_arena,
          sizeof(YR_MATCH),
          (void**) &new_match));

      new_match->base = match_base;
      new_match->offset = match_offset;
      new_match->length = match_length;
      new_match->data = match_data;
      new_match->chain_length = 0;
      new_match->prev = NULL;
      new_match->next = NULL;

      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
          new_match,
          &matching_string->unconfirmed_matches[tidx],
          FALSE));
    }
  }

  return ERROR_SUCCESS;
}
Esempio n. 16
0
File: re.c Progetto: devilcoder/yara
int _yr_re_emit(
    RE_NODE* re_node,
    YR_ARENA* arena,
    int flags,
    uint8_t** code_addr,
    int* code_size)
{
  int i;
  int branch_size;
  int split_size;
  int inst_size;
  int jmp_size;

  RE_NODE* left;
  RE_NODE* right;

  int16_t* split_offset_addr = NULL;
  int16_t* jmp_offset_addr = NULL;
  uint8_t* instruction_addr = NULL;

  *code_size = 0;

  switch(re_node->type)
  {
  case RE_NODE_LITERAL:

    FAIL_ON_ERROR(_yr_emit_inst_arg_uint8(
        arena,
        RE_OPCODE_LITERAL,
        re_node->value,
        &instruction_addr,
        NULL,
        code_size));
    break;

  case RE_NODE_MASKED_LITERAL:

    FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
        arena,
        RE_OPCODE_MASKED_LITERAL,
        re_node->mask << 8 | re_node->value,
        &instruction_addr,
        NULL,
        code_size));
    break;

  case RE_NODE_WORD_CHAR:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_WORD_CHAR,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_WORD_CHAR:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_NON_WORD_CHAR,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_SPACE:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_SPACE,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_SPACE:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_NON_SPACE,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_DIGIT:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_DIGIT,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_NON_DIGIT:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_NON_DIGIT,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_ANY:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_ANY,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_CLASS:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_CLASS,
        &instruction_addr,
        code_size));

    FAIL_ON_ERROR(yr_arena_write_data(
        arena,
        re_node->class_vector,
        32,
        NULL));

    *code_size += 32;
    break;

  case RE_NODE_ANCHOR_START:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_MATCH_AT_START,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_ANCHOR_END:

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_MATCH_AT_END,
        &instruction_addr,
        code_size));
    break;

  case RE_NODE_CONCAT:

    if (flags & EMIT_FLAGS_BACKWARDS)
    {
      left = re_node->right;
      right = re_node->left;
    }
    else
    {
      left = re_node->left;
      right = re_node->right;
    }

    FAIL_ON_ERROR(_yr_re_emit(
        left,
        arena,
        flags,
        &instruction_addr,
        &branch_size));

    *code_size += branch_size;

    FAIL_ON_ERROR(_yr_re_emit(
        right,
        arena,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    break;

  case RE_NODE_PLUS:

    // Code for e+ looks like:
    //
    //          L1: code for e
    //              split L1, L2
    //          L2:

    FAIL_ON_ERROR(_yr_re_emit(
        re_node->left,
        arena,
        flags,
        &instruction_addr,
        &branch_size));

    *code_size += branch_size;

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        re_node->greedy ? RE_OPCODE_SPLIT_B : RE_OPCODE_SPLIT_A,
        -branch_size,
        NULL,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;
    break;

  case RE_NODE_STAR:

    // Code for e* looks like:
    //
    //          L1: split L1, L2
    //              code for e
    //              jmp L1
    //          L2:

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
        0,
        &instruction_addr,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;

    FAIL_ON_ERROR(_yr_re_emit(
        re_node->left,
        arena,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Emit jump with offset set to 0.

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        RE_OPCODE_JUMP,
        -(branch_size + split_size),
        NULL,
        &jmp_offset_addr,
        &jmp_size));

    *code_size += jmp_size;

    // Update split offset.
    *split_offset_addr = split_size + branch_size + jmp_size;
    break;

  case RE_NODE_ALT:

    // Code for e1|e2 looks like:
    //
    //              split L1, L2
    //          L1: code for e1
    //              jmp L3
    //          L2: code for e2
    //          L3:

    // Emit a split instruction with offset set to 0 temporarily. Offset
    // will be updated after we know the size of the code generated for
    // the left node (e1).

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        RE_OPCODE_SPLIT_A,
        0,
        &instruction_addr,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;

    FAIL_ON_ERROR(_yr_re_emit(
        re_node->left,
        arena,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Emit jump with offset set to 0.

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        RE_OPCODE_JUMP,
        0,
        NULL,
        &jmp_offset_addr,
        &jmp_size));

    *code_size += jmp_size;

    // Update split offset.
    *split_offset_addr = split_size + branch_size + jmp_size;

    FAIL_ON_ERROR(_yr_re_emit(
        re_node->right,
        arena,
        flags,
        NULL,
        &branch_size));

    *code_size += branch_size;

    // Update offset for jmp instruction.
    *jmp_offset_addr = branch_size + jmp_size;
    break;


  case RE_NODE_RANGE:

    // Code for e1{n,m} looks like:
    //
    //            code for e1 (n times)
    //            push m-n
    //        L0: split L1, L2
    //        L1: code for e1
    //            jnztop L0
    //        L2: pop

    if (re_node->start > 0)
    {
      FAIL_ON_ERROR(_yr_re_emit(
          re_node->left,
          arena,
          flags,
          &instruction_addr,
          &branch_size));

      *code_size += branch_size;

      for (i = 0; i < re_node->start - 1; i++)
      {
        FAIL_ON_ERROR(_yr_re_emit(
            re_node->left,
            arena,
            flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
            NULL,
            &branch_size));

        *code_size += branch_size;
      }
    }

    // m == n, no more code needed.
    if (re_node->end == re_node->start)
      break;

    FAIL_ON_ERROR(_yr_emit_inst_arg_uint16(
        arena,
        RE_OPCODE_PUSH,
        re_node->end - re_node->start,
        re_node->start == 0 ? &instruction_addr : NULL,
        NULL,
        &inst_size));

    *code_size += inst_size;

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        re_node->greedy ? RE_OPCODE_SPLIT_A : RE_OPCODE_SPLIT_B,
        0,
        NULL,
        &split_offset_addr,
        &split_size));

    *code_size += split_size;

    FAIL_ON_ERROR(_yr_re_emit(
        re_node->left,
        arena,
        flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
        NULL,
        &branch_size));

    *code_size += branch_size;

    FAIL_ON_ERROR(_yr_emit_inst_arg_int16(
        arena,
        RE_OPCODE_JNZ,
        -(branch_size + split_size),
        NULL,
        &jmp_offset_addr,
        &jmp_size));

    *code_size += jmp_size;
    *split_offset_addr = split_size + branch_size + jmp_size;

    FAIL_ON_ERROR(_yr_emit_inst(
        arena,
        RE_OPCODE_POP,
        NULL,
        &inst_size));

    *code_size += inst_size;
    break;
  }

  if (!(flags & EMIT_FLAGS_DONT_ANNOTATE_RE))
  {
    if (flags & EMIT_FLAGS_BACKWARDS)
      re_node->backward_code = instruction_addr;
    else
      re_node->forward_code = instruction_addr;
  }

  if (code_addr != NULL)
    *code_addr = instruction_addr;

  return ERROR_SUCCESS;
}
Esempio n. 17
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_quality)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
    else
    {
      // Non-literal strings can't be marked as fixed offset because once we
      // find a string atom in the scanned data we don't know the offset where
      // the string should start, as the non-literal strings can contain
      // variable-length portions.

      flags &= ~STRING_GFLAGS_FIXED_OFFSET;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;
  (*string)->fixed_offset = UNDEFINED;

  #ifdef PROFILING_ENABLED
  (*string)->clock_ticks = 0;
  #endif

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = (uint32_t) literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length + 1,   // +1 to include terminating NULL
        (void**) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          (int32_t) literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  *min_atom_quality = yr_atoms_min_quality(atom_list);

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len <= MAX_ATOM_LENGTH)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Esempio n. 18
0
File: compiler.c Progetto: c4nc/yara
int _yr_compiler_compile_rules(
  YR_COMPILER* compiler)
{
  YARA_RULES_FILE_HEADER* rules_file_header = NULL;
  YR_ARENA* arena = NULL;
  YR_RULE null_rule;
  YR_EXTERNAL_VARIABLE null_external;
  YR_AC_TABLES tables;

  int8_t halt = OP_HALT;
  int result;

  // Write halt instruction at the end of code.
  yr_arena_write_data(
      compiler->code_arena,
      &halt,
      sizeof(int8_t),
      NULL);

  // Write a null rule indicating the end.
  memset(&null_rule, 0xFA, sizeof(YR_RULE));
  null_rule.g_flags = RULE_GFLAGS_NULL;

  yr_arena_write_data(
      compiler->rules_arena,
      &null_rule,
      sizeof(YR_RULE),
      NULL);

  // Write a null external the end.
  memset(&null_external, 0xFA, sizeof(YR_EXTERNAL_VARIABLE));
  null_external.type = EXTERNAL_VARIABLE_TYPE_NULL;

  yr_arena_write_data(
      compiler->externals_arena,
      &null_external,
      sizeof(YR_EXTERNAL_VARIABLE),
      NULL);

  // Write Aho-Corasick automaton to arena.
  result = yr_ac_compile(
      compiler->automaton,
      compiler->automaton_arena,
      &tables);

  if (result == ERROR_SUCCESS)
    result = yr_arena_create(1024, 0, &arena);

  if (result == ERROR_SUCCESS)
    result = yr_arena_allocate_struct(
        arena,
        sizeof(YARA_RULES_FILE_HEADER),
        (void**) &rules_file_header,
        offsetof(YARA_RULES_FILE_HEADER, rules_list_head),
        offsetof(YARA_RULES_FILE_HEADER, externals_list_head),
        offsetof(YARA_RULES_FILE_HEADER, code_start),
        offsetof(YARA_RULES_FILE_HEADER, match_table),
        offsetof(YARA_RULES_FILE_HEADER, transition_table),
        EOL);

  if (result == ERROR_SUCCESS)
  {
    rules_file_header->rules_list_head = (YR_RULE*) yr_arena_base_address(
        compiler->rules_arena);

    rules_file_header->externals_list_head = (YR_EXTERNAL_VARIABLE*)
		yr_arena_base_address(compiler->externals_arena);

    rules_file_header->code_start = (uint8_t*) yr_arena_base_address(
        compiler->code_arena);

    rules_file_header->match_table = tables.matches;
    rules_file_header->transition_table = tables.transitions;
  }

  if (result == ERROR_SUCCESS)
  {
    result = yr_arena_append(
        arena,
        compiler->code_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->code_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->re_code_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->re_code_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->rules_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->rules_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->strings_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->strings_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->externals_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->externals_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->namespaces_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->namespaces_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->metas_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->metas_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->sz_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->sz_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->automaton_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->automaton_arena = NULL;
    result = yr_arena_append(
        arena,
        compiler->matches_arena);
  }

  if (result == ERROR_SUCCESS)
  {
    compiler->matches_arena = NULL;
    compiler->compiled_rules_arena = arena;
    result = yr_arena_coalesce(arena);
  }
  else
  {
    yr_arena_destroy(arena);
  }

  return result;
}
Esempio n. 19
0
int _yr_parser_write_string(
    const char* identifier,
    int flags,
    YR_COMPILER* compiler,
    SIZED_STRING* str,
    RE* re,
    YR_STRING** string,
    int* min_atom_length)
{
  SIZED_STRING* literal_string;
  YR_AC_MATCH* new_match;

  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;

  int result;
  int max_string_len;
  int free_literal = FALSE;

  *string = NULL;

  result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      offsetof(YR_STRING, chained_to),
      EOL);

  if (result != ERROR_SUCCESS)
    return result;

  result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &(*string)->identifier);

  if (result != ERROR_SUCCESS)
    return result;

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    literal_string = yr_re_extract_literal(re);

    if (literal_string != NULL)
    {
      flags |= STRING_GFLAGS_LITERAL;
      free_literal = TRUE;
    }
  }
  else
  {
    literal_string = str;
    flags |= STRING_GFLAGS_LITERAL;
  }

  (*string)->g_flags = flags;
  (*string)->chained_to = NULL;

  memset((*string)->matches, 0,
         sizeof((*string)->matches));

  memset((*string)->unconfirmed_matches, 0,
         sizeof((*string)->unconfirmed_matches));

  if (flags & STRING_GFLAGS_LITERAL)
  {
    (*string)->length = literal_string->length;

    result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string->c_string,
        literal_string->length,
        (void*) &(*string)->string);

    if (result == ERROR_SUCCESS)
    {
      result = yr_atoms_extract_from_string(
          (uint8_t*) literal_string->c_string,
          literal_string->length,
          flags,
          &atom_list);
    }
  }
  else
  {
    result = yr_re_emit_code(re, compiler->re_code_arena);

    if (result == ERROR_SUCCESS)
      result = yr_atoms_extract_from_re(re, flags, &atom_list);
  }

  if (result == ERROR_SUCCESS)
  {
    // Add the string to Aho-Corasick automaton.

    if (atom_list != NULL)
    {
      result = yr_ac_add_string(
          compiler->automaton_arena,
          compiler->automaton,
          *string,
          atom_list);
    }
    else
    {
      result = yr_arena_allocate_struct(
          compiler->automaton_arena,
          sizeof(YR_AC_MATCH),
          (void**) &new_match,
          offsetof(YR_AC_MATCH, string),
          offsetof(YR_AC_MATCH, forward_code),
          offsetof(YR_AC_MATCH, backward_code),
          offsetof(YR_AC_MATCH, next),
          EOL);

      if (result == ERROR_SUCCESS)
      {
        new_match->backtrack = 0;
        new_match->string = *string;
        new_match->forward_code = re->root_node->forward_code;
        new_match->backward_code = NULL;
        new_match->next = compiler->automaton->root->matches;
        compiler->automaton->root->matches = new_match;
      }
    }
  }

  atom = atom_list;

  if (atom != NULL)
    *min_atom_length = MAX_ATOM_LENGTH;
  else
    *min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < *min_atom_length)
      *min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (flags & STRING_GFLAGS_LITERAL)
  {
    if (flags & STRING_GFLAGS_WIDE)
      max_string_len = (*string)->length * 2;
    else
      max_string_len = (*string)->length;

    if (max_string_len == *min_atom_length)
      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (free_literal)
    yr_free(literal_string);

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  return result;
}
Esempio n. 20
0
File: scan.c Progetto: Cbrdiv/yara
int _yr_scan_match_callback(
    uint8_t* match_data,
    int32_t match_length,
    int flags,
    void* args)
{
  CALLBACK_ARGS* callback_args = (CALLBACK_ARGS*) args;

  YR_STRING* string = callback_args->string;
  YR_MATCH* new_match;

  int result = ERROR_SUCCESS;
  int tidx = callback_args->context->tidx;

  size_t match_offset = match_data - callback_args->data;

  // total match length is the sum of backward and forward matches.
  match_length += callback_args->forward_matches;

  if (callback_args->full_word)
  {
    if (flags & RE_FLAGS_WIDE)
    {
      if (match_offset >= 2 &&
          *(match_data - 1) == 0 &&
          isalnum(*(match_data - 2)))
        return ERROR_SUCCESS;

      if (match_offset + match_length + 1 < callback_args->data_size &&
          *(match_data + match_length + 1) == 0 &&
          isalnum(*(match_data + match_length)))
        return ERROR_SUCCESS;
    }
    else
    {
      if (match_offset >= 1 &&
          isalnum(*(match_data - 1)))
        return ERROR_SUCCESS;

      if (match_offset + match_length < callback_args->data_size &&
          isalnum(*(match_data + match_length)))
        return ERROR_SUCCESS;
    }
  }

  if (STRING_IS_CHAIN_PART(string))
  {
    result = _yr_scan_verify_chained_string_match(
        string,
        callback_args->context,
        match_data,
        callback_args->data_base,
        match_offset,
        match_length);
  }
  else
  {
    if (string->matches[tidx].count == 0)
    {
      // If this is the first match for the string, put the string in the
      // list of strings whose flags needs to be cleared after the scan.

      FAIL_ON_ERROR(yr_arena_write_data(
          callback_args->context->matching_strings_arena,
          &string,
          sizeof(string),
          NULL));
    }

    result = yr_arena_allocate_memory(
        callback_args->context->matches_arena,
        sizeof(YR_MATCH),
        (void**) &new_match);

    if (result == ERROR_SUCCESS)
    {
      new_match->base = callback_args->data_base;
      new_match->offset = match_offset;
      new_match->length = match_length;
      new_match->data = match_data;
      new_match->prev = NULL;
      new_match->next = NULL;

      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
          new_match,
          &string->matches[tidx],
          STRING_IS_GREEDY_REGEXP(string)));
    }
  }

  return result;
}
Esempio n. 21
0
YR_STRING* yr_parser_reduce_string_declaration(
    yyscan_t yyscanner,
    int32_t flags,
    const char* identifier,
    SIZED_STRING* str)
{
  int i;
  int error_offset;
  int min_atom_length;
  char* file_name;
  char message[512];

  YR_STRING* string;
  YR_AC_MATCH* new_match;
  ATOM_TREE* atom_tree;
  YR_ATOM_LIST_ITEM* atom;
  YR_ATOM_LIST_ITEM* atom_list = NULL;
  RE* re = NULL;

  uint8_t* literal_string;

  int literal_string_len;
  int max_string_len;

  YR_COMPILER* compiler = yyget_extra(yyscanner);

  compiler->last_result = yr_arena_allocate_struct(
      compiler->strings_arena,
      sizeof(YR_STRING),
      (void**) &string,
      offsetof(YR_STRING, identifier),
      offsetof(YR_STRING, string),
      EOL);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  compiler->last_result = yr_arena_write_string(
      compiler->sz_arena,
      identifier,
      &string->identifier);

  if (compiler->last_result != ERROR_SUCCESS)
    return NULL;

  if (strcmp(identifier,"$") == 0)
    flags |= STRING_GFLAGS_ANONYMOUS;

  if (!(flags & STRING_GFLAGS_WIDE))
    flags |= STRING_GFLAGS_ASCII;

  // The STRING_GFLAGS_SINGLE_MATCH flag indicates that finding
  // a single match for the string is enough. This is true in
  // most cases, except when the string count (#) and string offset (@)
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
  // initially, and unmarked later if required.

  flags |= STRING_GFLAGS_SINGLE_MATCH;

  string->g_flags = flags;

  memset(string->matches, 0, sizeof(string->matches));

  if (flags & STRING_GFLAGS_HEXADECIMAL ||
      flags & STRING_GFLAGS_REGEXP)
  {
    if (flags & STRING_GFLAGS_HEXADECIMAL)
      compiler->last_result = yr_re_compile_hex(
          str->c_string, &re);
    else
      compiler->last_result = yr_re_compile(
          str->c_string, &re);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      snprintf(
          message,
          sizeof(message),
          "invalid %s in string \"%s\": %s",
          (flags & STRING_GFLAGS_HEXADECIMAL) ?
              "hex string" : "regular expression",
          identifier,
          re->error_message);

      yr_compiler_set_error_extra_info(compiler, message);
      string = NULL;
      goto _exit;
    }

    if (re->flags & RE_FLAGS_START_ANCHORED)
      string->g_flags |= STRING_GFLAGS_START_ANCHORED;

    if (re->flags & RE_FLAGS_END_ANCHORED)
      string->g_flags |= STRING_GFLAGS_END_ANCHORED;

    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;

    if (re->flags & RE_FLAGS_LITERAL_STRING)
    {
      string->g_flags |= STRING_GFLAGS_LITERAL;
      literal_string = re->literal_string;
      literal_string_len = re->literal_string_len;

      compiler->last_result = yr_atoms_extract_from_string(
          literal_string, literal_string_len, string->g_flags, &atom_list);
    }
    else
    {
      compiler->last_result = yr_re_emit_code(
          re, compiler->re_code_arena);

      if (compiler->last_result != ERROR_SUCCESS)
      {
        string = NULL;
        goto _exit;
      }

      compiler->last_result = yr_atoms_extract_from_re(
          re, string->g_flags, &atom_list);
    }
  }
  else
  {
    string->g_flags |= STRING_GFLAGS_LITERAL;
    literal_string = (uint8_t*) str->c_string;
    literal_string_len = str->length;

    compiler->last_result  = yr_atoms_extract_from_string(
        literal_string, literal_string_len, string->g_flags, &atom_list);
  }

  if (compiler->last_result != ERROR_SUCCESS)
  {
    string = NULL;
    goto _exit;
  }

  if (STRING_IS_LITERAL(string))
  {
    compiler->last_result = yr_arena_write_data(
        compiler->sz_arena,
        literal_string,
        literal_string_len,
        (void*) &string->string);

    if (compiler->last_result != ERROR_SUCCESS)
    {
      string = NULL;
      goto _exit;
    }

    string->length = literal_string_len;
  }

  // Add the string to Aho-Corasick automaton.

  if (atom_list != NULL)
  {
    compiler->last_result = yr_ac_add_string(
      compiler->automaton_arena,
      compiler->automaton,
      string,
      atom_list);
  }
  else
  {
    compiler->last_result = yr_arena_allocate_struct(
        compiler->automaton_arena,
        sizeof(YR_AC_MATCH),
        (void**) &new_match,
        offsetof(YR_AC_MATCH, string),
        offsetof(YR_AC_MATCH, forward_code),
        offsetof(YR_AC_MATCH, backward_code),
        offsetof(YR_AC_MATCH, next),
        EOL);

    if (compiler->last_result == ERROR_SUCCESS)
    {
      new_match->backtrack = 0;
      new_match->string = string;
      new_match->forward_code = re->root_node->forward_code;
      new_match->backward_code = NULL;
      new_match->next = compiler->automaton->root->matches;
      compiler->automaton->root->matches = new_match;
    }
  }

  atom = atom_list;

  if (atom != NULL)
    min_atom_length = MAX_ATOM_LENGTH;
  else
    min_atom_length = 0;

  while (atom != NULL)
  {
    if (atom->atom_length < min_atom_length)
      min_atom_length = atom->atom_length;
    atom = atom->next;
  }

  if (STRING_IS_LITERAL(string))
  {
    if (STRING_IS_WIDE(string))
      max_string_len = string->length * 2;
    else
      max_string_len = string->length;

    if (max_string_len == min_atom_length)
      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
  }

  if (compiler->file_name_stack_ptr > 0)
    file_name = compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
  else
    file_name = NULL;

  if (min_atom_length < 2 && compiler->error_report_function != NULL)
  {
    snprintf(
        message,
        sizeof(message),
        "%s is slowing down scanning%s",
        string->identifier,
        min_atom_length == 0 ? " (critical!)" : "");

    compiler->error_report_function(
        YARA_ERROR_LEVEL_WARNING,
        file_name,
        yyget_lineno(yyscanner),
        message);
  }

  if (compiler->last_result != ERROR_SUCCESS)
    string = NULL;

_exit:

  if (atom_list != NULL)
    yr_atoms_list_destroy(atom_list);

  if (re != NULL)
    yr_re_destroy(re);

  return string;
}